d085fb5e990db7ee5b7125f517739f5fcf88b6266a3c1d0adc5822b5cf7db6ae
Browse files- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/__init__.py +81 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/assign_score_withk.py +123 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/ball_query.py +55 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/bbox.py +72 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/border_align.py +109 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/box_iou_rotated.py +45 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/carafe.py +287 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/cc_attention.py +83 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/contour_expand.py +49 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/corner_pool.py +161 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/correlation.py +196 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_conv.py +405 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_roi_pool.py +204 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py +43 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/focal_loss.py +212 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/furthest_point_sample.py +83 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py +268 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/gather_points.py +57 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/group_points.py +224 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/info.py +36 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/iou3d.py +85 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/knn.py +77 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/masked_conv.py +111 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/merge_cells.py +149 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py +282 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py +358 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/nms.py +417 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/pixel_group.py +75 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/point_sample.py +336 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_in_boxes.py +133 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_sampler.py +177 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/psa_mask.py +92 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align.py +223 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align_rotated.py +177 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_pool.py +86 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py +114 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py +77 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/saconv.py +145 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/scatter_points.py +135 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/sync_bn.py +279 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_interpolate.py +68 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_nn.py +51 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/tin_shift.py +68 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/upfirdn2d.py +330 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/voxelize.py +132 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/__init__.py +13 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/_functions.py +79 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/collate.py +84 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/data_container.py +89 -0
- microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/data_parallel.py +89 -0
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/__init__.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from .assign_score_withk import assign_score_withk
|
| 3 |
+
from .ball_query import ball_query
|
| 4 |
+
from .bbox import bbox_overlaps
|
| 5 |
+
from .border_align import BorderAlign, border_align
|
| 6 |
+
from .box_iou_rotated import box_iou_rotated
|
| 7 |
+
from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
|
| 8 |
+
from .cc_attention import CrissCrossAttention
|
| 9 |
+
from .contour_expand import contour_expand
|
| 10 |
+
from .corner_pool import CornerPool
|
| 11 |
+
from .correlation import Correlation
|
| 12 |
+
from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d
|
| 13 |
+
from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack,
|
| 14 |
+
ModulatedDeformRoIPoolPack, deform_roi_pool)
|
| 15 |
+
from .deprecated_wrappers import Conv2d_deprecated as Conv2d
|
| 16 |
+
from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d
|
| 17 |
+
from .deprecated_wrappers import Linear_deprecated as Linear
|
| 18 |
+
from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d
|
| 19 |
+
from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss,
|
| 20 |
+
sigmoid_focal_loss, softmax_focal_loss)
|
| 21 |
+
from .furthest_point_sample import (furthest_point_sample,
|
| 22 |
+
furthest_point_sample_with_dist)
|
| 23 |
+
from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu
|
| 24 |
+
from .gather_points import gather_points
|
| 25 |
+
from .group_points import GroupAll, QueryAndGroup, grouping_operation
|
| 26 |
+
from .info import (get_compiler_version, get_compiling_cuda_version,
|
| 27 |
+
get_onnxruntime_op_path)
|
| 28 |
+
from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev
|
| 29 |
+
from .knn import knn
|
| 30 |
+
from .masked_conv import MaskedConv2d, masked_conv2d
|
| 31 |
+
from .modulated_deform_conv import (ModulatedDeformConv2d,
|
| 32 |
+
ModulatedDeformConv2dPack,
|
| 33 |
+
modulated_deform_conv2d)
|
| 34 |
+
from .multi_scale_deform_attn import MultiScaleDeformableAttention
|
| 35 |
+
from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms
|
| 36 |
+
from .pixel_group import pixel_group
|
| 37 |
+
from .point_sample import (SimpleRoIAlign, point_sample,
|
| 38 |
+
rel_roi_point_to_rel_img_point)
|
| 39 |
+
from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
|
| 40 |
+
points_in_boxes_part)
|
| 41 |
+
from .points_sampler import PointsSampler
|
| 42 |
+
from .psa_mask import PSAMask
|
| 43 |
+
from .roi_align import RoIAlign, roi_align
|
| 44 |
+
from .roi_align_rotated import RoIAlignRotated, roi_align_rotated
|
| 45 |
+
from .roi_pool import RoIPool, roi_pool
|
| 46 |
+
from .roiaware_pool3d import RoIAwarePool3d
|
| 47 |
+
from .roipoint_pool3d import RoIPointPool3d
|
| 48 |
+
from .saconv import SAConv2d
|
| 49 |
+
from .scatter_points import DynamicScatter, dynamic_scatter
|
| 50 |
+
from .sync_bn import SyncBatchNorm
|
| 51 |
+
from .three_interpolate import three_interpolate
|
| 52 |
+
from .three_nn import three_nn
|
| 53 |
+
from .tin_shift import TINShift, tin_shift
|
| 54 |
+
from .upfirdn2d import upfirdn2d
|
| 55 |
+
from .voxelize import Voxelization, voxelization
|
| 56 |
+
|
| 57 |
+
__all__ = [
|
| 58 |
+
'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe',
|
| 59 |
+
'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack',
|
| 60 |
+
'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack',
|
| 61 |
+
'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss',
|
| 62 |
+
'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss',
|
| 63 |
+
'get_compiler_version', 'get_compiling_cuda_version',
|
| 64 |
+
'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d',
|
| 65 |
+
'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack',
|
| 66 |
+
'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match',
|
| 67 |
+
'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d',
|
| 68 |
+
'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask',
|
| 69 |
+
'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign',
|
| 70 |
+
'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk',
|
| 71 |
+
'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query',
|
| 72 |
+
'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu',
|
| 73 |
+
'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup',
|
| 74 |
+
'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn',
|
| 75 |
+
'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign',
|
| 76 |
+
'border_align', 'gather_points', 'furthest_point_sample',
|
| 77 |
+
'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
|
| 78 |
+
'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization',
|
| 79 |
+
'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d',
|
| 80 |
+
'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all'
|
| 81 |
+
]
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/assign_score_withk.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch.autograd import Function
|
| 2 |
+
|
| 3 |
+
from ..utils import ext_loader
|
| 4 |
+
|
| 5 |
+
ext_module = ext_loader.load_ext(
|
| 6 |
+
'_ext', ['assign_score_withk_forward', 'assign_score_withk_backward'])
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class AssignScoreWithK(Function):
|
| 10 |
+
r"""Perform weighted sum to generate output features according to scores.
|
| 11 |
+
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
|
| 12 |
+
scene_seg/lib/paconv_lib/src/gpu>`_.
|
| 13 |
+
|
| 14 |
+
This is a memory-efficient CUDA implementation of assign_scores operation,
|
| 15 |
+
which first transform all point features with weight bank, then assemble
|
| 16 |
+
neighbor features with ``knn_idx`` and perform weighted sum of ``scores``.
|
| 17 |
+
|
| 18 |
+
See the `paper <https://arxiv.org/pdf/2103.14635.pdf>`_ appendix Sec. D for
|
| 19 |
+
more detailed descriptions.
|
| 20 |
+
|
| 21 |
+
Note:
|
| 22 |
+
This implementation assumes using ``neighbor`` kernel input, which is
|
| 23 |
+
(point_features - center_features, point_features).
|
| 24 |
+
See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/
|
| 25 |
+
pointnet2/paconv.py#L128 for more details.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
@staticmethod
|
| 29 |
+
def forward(ctx,
|
| 30 |
+
scores,
|
| 31 |
+
point_features,
|
| 32 |
+
center_features,
|
| 33 |
+
knn_idx,
|
| 34 |
+
aggregate='sum'):
|
| 35 |
+
"""
|
| 36 |
+
Args:
|
| 37 |
+
scores (torch.Tensor): (B, npoint, K, M), predicted scores to
|
| 38 |
+
aggregate weight matrices in the weight bank.
|
| 39 |
+
``npoint`` is the number of sampled centers.
|
| 40 |
+
``K`` is the number of queried neighbors.
|
| 41 |
+
``M`` is the number of weight matrices in the weight bank.
|
| 42 |
+
point_features (torch.Tensor): (B, N, M, out_dim)
|
| 43 |
+
Pre-computed point features to be aggregated.
|
| 44 |
+
center_features (torch.Tensor): (B, N, M, out_dim)
|
| 45 |
+
Pre-computed center features to be aggregated.
|
| 46 |
+
knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN.
|
| 47 |
+
We assume the first idx in each row is the idx of the center.
|
| 48 |
+
aggregate (str, optional): Aggregation method.
|
| 49 |
+
Can be 'sum', 'avg' or 'max'. Defaults: 'sum'.
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
torch.Tensor: (B, out_dim, npoint, K), the aggregated features.
|
| 53 |
+
"""
|
| 54 |
+
agg = {'sum': 0, 'avg': 1, 'max': 2}
|
| 55 |
+
|
| 56 |
+
B, N, M, out_dim = point_features.size()
|
| 57 |
+
_, npoint, K, _ = scores.size()
|
| 58 |
+
|
| 59 |
+
output = point_features.new_zeros((B, out_dim, npoint, K))
|
| 60 |
+
ext_module.assign_score_withk_forward(
|
| 61 |
+
point_features.contiguous(),
|
| 62 |
+
center_features.contiguous(),
|
| 63 |
+
scores.contiguous(),
|
| 64 |
+
knn_idx.contiguous(),
|
| 65 |
+
output,
|
| 66 |
+
B=B,
|
| 67 |
+
N0=N,
|
| 68 |
+
N1=npoint,
|
| 69 |
+
M=M,
|
| 70 |
+
K=K,
|
| 71 |
+
O=out_dim,
|
| 72 |
+
aggregate=agg[aggregate])
|
| 73 |
+
|
| 74 |
+
ctx.save_for_backward(output, point_features, center_features, scores,
|
| 75 |
+
knn_idx)
|
| 76 |
+
ctx.agg = agg[aggregate]
|
| 77 |
+
|
| 78 |
+
return output
|
| 79 |
+
|
| 80 |
+
@staticmethod
|
| 81 |
+
def backward(ctx, grad_out):
|
| 82 |
+
"""
|
| 83 |
+
Args:
|
| 84 |
+
grad_out (torch.Tensor): (B, out_dim, npoint, K)
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
grad_scores (torch.Tensor): (B, npoint, K, M)
|
| 88 |
+
grad_point_features (torch.Tensor): (B, N, M, out_dim)
|
| 89 |
+
grad_center_features (torch.Tensor): (B, N, M, out_dim)
|
| 90 |
+
"""
|
| 91 |
+
_, point_features, center_features, scores, knn_idx = ctx.saved_tensors
|
| 92 |
+
|
| 93 |
+
agg = ctx.agg
|
| 94 |
+
|
| 95 |
+
B, N, M, out_dim = point_features.size()
|
| 96 |
+
_, npoint, K, _ = scores.size()
|
| 97 |
+
|
| 98 |
+
grad_point_features = point_features.new_zeros(point_features.shape)
|
| 99 |
+
grad_center_features = center_features.new_zeros(center_features.shape)
|
| 100 |
+
grad_scores = scores.new_zeros(scores.shape)
|
| 101 |
+
|
| 102 |
+
ext_module.assign_score_withk_backward(
|
| 103 |
+
grad_out.contiguous(),
|
| 104 |
+
point_features.contiguous(),
|
| 105 |
+
center_features.contiguous(),
|
| 106 |
+
scores.contiguous(),
|
| 107 |
+
knn_idx.contiguous(),
|
| 108 |
+
grad_point_features,
|
| 109 |
+
grad_center_features,
|
| 110 |
+
grad_scores,
|
| 111 |
+
B=B,
|
| 112 |
+
N0=N,
|
| 113 |
+
N1=npoint,
|
| 114 |
+
M=M,
|
| 115 |
+
K=K,
|
| 116 |
+
O=out_dim,
|
| 117 |
+
aggregate=agg)
|
| 118 |
+
|
| 119 |
+
return grad_scores, grad_point_features, \
|
| 120 |
+
grad_center_features, None, None
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
assign_score_withk = AssignScoreWithK.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/ball_query.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch.autograd import Function
|
| 4 |
+
|
| 5 |
+
from ..utils import ext_loader
|
| 6 |
+
|
| 7 |
+
ext_module = ext_loader.load_ext('_ext', ['ball_query_forward'])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class BallQuery(Function):
|
| 11 |
+
"""Find nearby points in spherical space."""
|
| 12 |
+
|
| 13 |
+
@staticmethod
|
| 14 |
+
def forward(ctx, min_radius: float, max_radius: float, sample_num: int,
|
| 15 |
+
xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor:
|
| 16 |
+
"""
|
| 17 |
+
Args:
|
| 18 |
+
min_radius (float): minimum radius of the balls.
|
| 19 |
+
max_radius (float): maximum radius of the balls.
|
| 20 |
+
sample_num (int): maximum number of features in the balls.
|
| 21 |
+
xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
| 22 |
+
center_xyz (Tensor): (B, npoint, 3) centers of the ball query.
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Tensor: (B, npoint, nsample) tensor with the indices of
|
| 26 |
+
the features that form the query balls.
|
| 27 |
+
"""
|
| 28 |
+
assert center_xyz.is_contiguous()
|
| 29 |
+
assert xyz.is_contiguous()
|
| 30 |
+
assert min_radius < max_radius
|
| 31 |
+
|
| 32 |
+
B, N, _ = xyz.size()
|
| 33 |
+
npoint = center_xyz.size(1)
|
| 34 |
+
idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int)
|
| 35 |
+
|
| 36 |
+
ext_module.ball_query_forward(
|
| 37 |
+
center_xyz,
|
| 38 |
+
xyz,
|
| 39 |
+
idx,
|
| 40 |
+
b=B,
|
| 41 |
+
n=N,
|
| 42 |
+
m=npoint,
|
| 43 |
+
min_radius=min_radius,
|
| 44 |
+
max_radius=max_radius,
|
| 45 |
+
nsample=sample_num)
|
| 46 |
+
if torch.__version__ != 'parrots':
|
| 47 |
+
ctx.mark_non_differentiable(idx)
|
| 48 |
+
return idx
|
| 49 |
+
|
| 50 |
+
@staticmethod
|
| 51 |
+
def backward(ctx, a=None):
|
| 52 |
+
return None, None, None, None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
ball_query = BallQuery.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/bbox.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from ..utils import ext_loader
|
| 3 |
+
|
| 4 |
+
ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
|
| 8 |
+
"""Calculate overlap between two set of bboxes.
|
| 9 |
+
|
| 10 |
+
If ``aligned`` is ``False``, then calculate the ious between each bbox
|
| 11 |
+
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
|
| 12 |
+
bboxes1 and bboxes2.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format or empty.
|
| 16 |
+
bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format or empty.
|
| 17 |
+
If aligned is ``True``, then m and n must be equal.
|
| 18 |
+
mode (str): "iou" (intersection over union) or iof (intersection over
|
| 19 |
+
foreground).
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
ious(Tensor): shape (m, n) if aligned == False else shape (m, 1)
|
| 23 |
+
|
| 24 |
+
Example:
|
| 25 |
+
>>> bboxes1 = torch.FloatTensor([
|
| 26 |
+
>>> [0, 0, 10, 10],
|
| 27 |
+
>>> [10, 10, 20, 20],
|
| 28 |
+
>>> [32, 32, 38, 42],
|
| 29 |
+
>>> ])
|
| 30 |
+
>>> bboxes2 = torch.FloatTensor([
|
| 31 |
+
>>> [0, 0, 10, 20],
|
| 32 |
+
>>> [0, 10, 10, 19],
|
| 33 |
+
>>> [10, 10, 20, 20],
|
| 34 |
+
>>> ])
|
| 35 |
+
>>> bbox_overlaps(bboxes1, bboxes2)
|
| 36 |
+
tensor([[0.5000, 0.0000, 0.0000],
|
| 37 |
+
[0.0000, 0.0000, 1.0000],
|
| 38 |
+
[0.0000, 0.0000, 0.0000]])
|
| 39 |
+
|
| 40 |
+
Example:
|
| 41 |
+
>>> empty = torch.FloatTensor([])
|
| 42 |
+
>>> nonempty = torch.FloatTensor([
|
| 43 |
+
>>> [0, 0, 10, 9],
|
| 44 |
+
>>> ])
|
| 45 |
+
>>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
|
| 46 |
+
>>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
|
| 47 |
+
>>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
mode_dict = {'iou': 0, 'iof': 1}
|
| 51 |
+
assert mode in mode_dict.keys()
|
| 52 |
+
mode_flag = mode_dict[mode]
|
| 53 |
+
# Either the boxes are empty or the length of boxes' last dimension is 4
|
| 54 |
+
assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0)
|
| 55 |
+
assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0)
|
| 56 |
+
assert offset == 1 or offset == 0
|
| 57 |
+
|
| 58 |
+
rows = bboxes1.size(0)
|
| 59 |
+
cols = bboxes2.size(0)
|
| 60 |
+
if aligned:
|
| 61 |
+
assert rows == cols
|
| 62 |
+
|
| 63 |
+
if rows * cols == 0:
|
| 64 |
+
return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols)
|
| 65 |
+
|
| 66 |
+
if aligned:
|
| 67 |
+
ious = bboxes1.new_zeros(rows)
|
| 68 |
+
else:
|
| 69 |
+
ious = bboxes1.new_zeros((rows, cols))
|
| 70 |
+
ext_module.bbox_overlaps(
|
| 71 |
+
bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)
|
| 72 |
+
return ious
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/border_align.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
# modified from
|
| 3 |
+
# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
import torch.nn as nn
|
| 7 |
+
from torch.autograd import Function
|
| 8 |
+
from torch.autograd.function import once_differentiable
|
| 9 |
+
|
| 10 |
+
from ..utils import ext_loader
|
| 11 |
+
|
| 12 |
+
ext_module = ext_loader.load_ext(
|
| 13 |
+
'_ext', ['border_align_forward', 'border_align_backward'])
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class BorderAlignFunction(Function):
|
| 17 |
+
|
| 18 |
+
@staticmethod
|
| 19 |
+
def symbolic(g, input, boxes, pool_size):
|
| 20 |
+
return g.op(
|
| 21 |
+
'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size)
|
| 22 |
+
|
| 23 |
+
@staticmethod
|
| 24 |
+
def forward(ctx, input, boxes, pool_size):
|
| 25 |
+
ctx.pool_size = pool_size
|
| 26 |
+
ctx.input_shape = input.size()
|
| 27 |
+
|
| 28 |
+
assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]'
|
| 29 |
+
assert boxes.size(2) == 4, \
|
| 30 |
+
'the last dimension of boxes must be (x1, y1, x2, y2)'
|
| 31 |
+
assert input.size(1) % 4 == 0, \
|
| 32 |
+
'the channel for input feature must be divisible by factor 4'
|
| 33 |
+
|
| 34 |
+
# [B, C//4, H*W, 4]
|
| 35 |
+
output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4)
|
| 36 |
+
output = input.new_zeros(output_shape)
|
| 37 |
+
# `argmax_idx` only used for backward
|
| 38 |
+
argmax_idx = input.new_zeros(output_shape).to(torch.int)
|
| 39 |
+
|
| 40 |
+
ext_module.border_align_forward(
|
| 41 |
+
input, boxes, output, argmax_idx, pool_size=ctx.pool_size)
|
| 42 |
+
|
| 43 |
+
ctx.save_for_backward(boxes, argmax_idx)
|
| 44 |
+
return output
|
| 45 |
+
|
| 46 |
+
@staticmethod
|
| 47 |
+
@once_differentiable
|
| 48 |
+
def backward(ctx, grad_output):
|
| 49 |
+
boxes, argmax_idx = ctx.saved_tensors
|
| 50 |
+
grad_input = grad_output.new_zeros(ctx.input_shape)
|
| 51 |
+
# complex head architecture may cause grad_output uncontiguous
|
| 52 |
+
grad_output = grad_output.contiguous()
|
| 53 |
+
ext_module.border_align_backward(
|
| 54 |
+
grad_output,
|
| 55 |
+
boxes,
|
| 56 |
+
argmax_idx,
|
| 57 |
+
grad_input,
|
| 58 |
+
pool_size=ctx.pool_size)
|
| 59 |
+
return grad_input, None, None
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
border_align = BorderAlignFunction.apply
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class BorderAlign(nn.Module):
|
| 66 |
+
r"""Border align pooling layer.
|
| 67 |
+
|
| 68 |
+
Applies border_align over the input feature based on predicted bboxes.
|
| 69 |
+
The details were described in the paper
|
| 70 |
+
`BorderDet: Border Feature for Dense Object Detection
|
| 71 |
+
<https://arxiv.org/abs/2007.11056>`_.
|
| 72 |
+
|
| 73 |
+
For each border line (e.g. top, left, bottom or right) of each box,
|
| 74 |
+
border_align does the following:
|
| 75 |
+
1. uniformly samples `pool_size`+1 positions on this line, involving \
|
| 76 |
+
the start and end points.
|
| 77 |
+
2. the corresponding features on these points are computed by \
|
| 78 |
+
bilinear interpolation.
|
| 79 |
+
3. max pooling over all the `pool_size`+1 positions are used for \
|
| 80 |
+
computing pooled feature.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
pool_size (int): number of positions sampled over the boxes' borders
|
| 84 |
+
(e.g. top, bottom, left, right).
|
| 85 |
+
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
def __init__(self, pool_size):
|
| 89 |
+
super(BorderAlign, self).__init__()
|
| 90 |
+
self.pool_size = pool_size
|
| 91 |
+
|
| 92 |
+
def forward(self, input, boxes):
|
| 93 |
+
"""
|
| 94 |
+
Args:
|
| 95 |
+
input: Features with shape [N,4C,H,W]. Channels ranged in [0,C),
|
| 96 |
+
[C,2C), [2C,3C), [3C,4C) represent the top, left, bottom,
|
| 97 |
+
right features respectively.
|
| 98 |
+
boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
Tensor: Pooled features with shape [N,C,H*W,4]. The order is
|
| 102 |
+
(top,left,bottom,right) for the last dimension.
|
| 103 |
+
"""
|
| 104 |
+
return border_align(input, boxes, self.pool_size)
|
| 105 |
+
|
| 106 |
+
def __repr__(self):
|
| 107 |
+
s = self.__class__.__name__
|
| 108 |
+
s += f'(pool_size={self.pool_size})'
|
| 109 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/box_iou_rotated.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from ..utils import ext_loader
|
| 3 |
+
|
| 4 |
+
ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated'])
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False):
|
| 8 |
+
"""Return intersection-over-union (Jaccard index) of boxes.
|
| 9 |
+
|
| 10 |
+
Both sets of boxes are expected to be in
|
| 11 |
+
(x_center, y_center, width, height, angle) format.
|
| 12 |
+
|
| 13 |
+
If ``aligned`` is ``False``, then calculate the ious between each bbox
|
| 14 |
+
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
|
| 15 |
+
bboxes1 and bboxes2.
|
| 16 |
+
|
| 17 |
+
Arguments:
|
| 18 |
+
boxes1 (Tensor): rotated bboxes 1. \
|
| 19 |
+
It has shape (N, 5), indicating (x, y, w, h, theta) for each row.
|
| 20 |
+
Note that theta is in radian.
|
| 21 |
+
boxes2 (Tensor): rotated bboxes 2. \
|
| 22 |
+
It has shape (M, 5), indicating (x, y, w, h, theta) for each row.
|
| 23 |
+
Note that theta is in radian.
|
| 24 |
+
mode (str): "iou" (intersection over union) or iof (intersection over
|
| 25 |
+
foreground).
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
ious(Tensor): shape (N, M) if aligned == False else shape (N,)
|
| 29 |
+
"""
|
| 30 |
+
assert mode in ['iou', 'iof']
|
| 31 |
+
mode_dict = {'iou': 0, 'iof': 1}
|
| 32 |
+
mode_flag = mode_dict[mode]
|
| 33 |
+
rows = bboxes1.size(0)
|
| 34 |
+
cols = bboxes2.size(0)
|
| 35 |
+
if aligned:
|
| 36 |
+
ious = bboxes1.new_zeros(rows)
|
| 37 |
+
else:
|
| 38 |
+
ious = bboxes1.new_zeros((rows * cols))
|
| 39 |
+
bboxes1 = bboxes1.contiguous()
|
| 40 |
+
bboxes2 = bboxes2.contiguous()
|
| 41 |
+
ext_module.box_iou_rotated(
|
| 42 |
+
bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned)
|
| 43 |
+
if not aligned:
|
| 44 |
+
ious = ious.view(rows, cols)
|
| 45 |
+
return ious
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/carafe.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
from torch.autograd import Function
|
| 6 |
+
from torch.nn.modules.module import Module
|
| 7 |
+
|
| 8 |
+
from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init
|
| 9 |
+
from ..utils import ext_loader
|
| 10 |
+
|
| 11 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 12 |
+
'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward',
|
| 13 |
+
'carafe_backward'
|
| 14 |
+
])
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class CARAFENaiveFunction(Function):
|
| 18 |
+
|
| 19 |
+
@staticmethod
|
| 20 |
+
def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
|
| 21 |
+
return g.op(
|
| 22 |
+
'mmcv::MMCVCARAFENaive',
|
| 23 |
+
features,
|
| 24 |
+
masks,
|
| 25 |
+
kernel_size_i=kernel_size,
|
| 26 |
+
group_size_i=group_size,
|
| 27 |
+
scale_factor_f=scale_factor)
|
| 28 |
+
|
| 29 |
+
@staticmethod
|
| 30 |
+
def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
|
| 31 |
+
assert scale_factor >= 1
|
| 32 |
+
assert masks.size(1) == kernel_size * kernel_size * group_size
|
| 33 |
+
assert masks.size(-1) == features.size(-1) * scale_factor
|
| 34 |
+
assert masks.size(-2) == features.size(-2) * scale_factor
|
| 35 |
+
assert features.size(1) % group_size == 0
|
| 36 |
+
assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
|
| 37 |
+
ctx.kernel_size = kernel_size
|
| 38 |
+
ctx.group_size = group_size
|
| 39 |
+
ctx.scale_factor = scale_factor
|
| 40 |
+
ctx.feature_size = features.size()
|
| 41 |
+
ctx.mask_size = masks.size()
|
| 42 |
+
|
| 43 |
+
n, c, h, w = features.size()
|
| 44 |
+
output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
|
| 45 |
+
ext_module.carafe_naive_forward(
|
| 46 |
+
features,
|
| 47 |
+
masks,
|
| 48 |
+
output,
|
| 49 |
+
kernel_size=kernel_size,
|
| 50 |
+
group_size=group_size,
|
| 51 |
+
scale_factor=scale_factor)
|
| 52 |
+
|
| 53 |
+
if features.requires_grad or masks.requires_grad:
|
| 54 |
+
ctx.save_for_backward(features, masks)
|
| 55 |
+
return output
|
| 56 |
+
|
| 57 |
+
@staticmethod
|
| 58 |
+
def backward(ctx, grad_output):
|
| 59 |
+
assert grad_output.is_cuda
|
| 60 |
+
|
| 61 |
+
features, masks = ctx.saved_tensors
|
| 62 |
+
kernel_size = ctx.kernel_size
|
| 63 |
+
group_size = ctx.group_size
|
| 64 |
+
scale_factor = ctx.scale_factor
|
| 65 |
+
|
| 66 |
+
grad_input = torch.zeros_like(features)
|
| 67 |
+
grad_masks = torch.zeros_like(masks)
|
| 68 |
+
ext_module.carafe_naive_backward(
|
| 69 |
+
grad_output.contiguous(),
|
| 70 |
+
features,
|
| 71 |
+
masks,
|
| 72 |
+
grad_input,
|
| 73 |
+
grad_masks,
|
| 74 |
+
kernel_size=kernel_size,
|
| 75 |
+
group_size=group_size,
|
| 76 |
+
scale_factor=scale_factor)
|
| 77 |
+
|
| 78 |
+
return grad_input, grad_masks, None, None, None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
carafe_naive = CARAFENaiveFunction.apply
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class CARAFENaive(Module):
|
| 85 |
+
|
| 86 |
+
def __init__(self, kernel_size, group_size, scale_factor):
|
| 87 |
+
super(CARAFENaive, self).__init__()
|
| 88 |
+
|
| 89 |
+
assert isinstance(kernel_size, int) and isinstance(
|
| 90 |
+
group_size, int) and isinstance(scale_factor, int)
|
| 91 |
+
self.kernel_size = kernel_size
|
| 92 |
+
self.group_size = group_size
|
| 93 |
+
self.scale_factor = scale_factor
|
| 94 |
+
|
| 95 |
+
def forward(self, features, masks):
|
| 96 |
+
return carafe_naive(features, masks, self.kernel_size, self.group_size,
|
| 97 |
+
self.scale_factor)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class CARAFEFunction(Function):
|
| 101 |
+
|
| 102 |
+
@staticmethod
|
| 103 |
+
def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
|
| 104 |
+
return g.op(
|
| 105 |
+
'mmcv::MMCVCARAFE',
|
| 106 |
+
features,
|
| 107 |
+
masks,
|
| 108 |
+
kernel_size_i=kernel_size,
|
| 109 |
+
group_size_i=group_size,
|
| 110 |
+
scale_factor_f=scale_factor)
|
| 111 |
+
|
| 112 |
+
@staticmethod
|
| 113 |
+
def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
|
| 114 |
+
assert scale_factor >= 1
|
| 115 |
+
assert masks.size(1) == kernel_size * kernel_size * group_size
|
| 116 |
+
assert masks.size(-1) == features.size(-1) * scale_factor
|
| 117 |
+
assert masks.size(-2) == features.size(-2) * scale_factor
|
| 118 |
+
assert features.size(1) % group_size == 0
|
| 119 |
+
assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
|
| 120 |
+
ctx.kernel_size = kernel_size
|
| 121 |
+
ctx.group_size = group_size
|
| 122 |
+
ctx.scale_factor = scale_factor
|
| 123 |
+
ctx.feature_size = features.size()
|
| 124 |
+
ctx.mask_size = masks.size()
|
| 125 |
+
|
| 126 |
+
n, c, h, w = features.size()
|
| 127 |
+
output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
|
| 128 |
+
routput = features.new_zeros(output.size(), requires_grad=False)
|
| 129 |
+
rfeatures = features.new_zeros(features.size(), requires_grad=False)
|
| 130 |
+
rmasks = masks.new_zeros(masks.size(), requires_grad=False)
|
| 131 |
+
ext_module.carafe_forward(
|
| 132 |
+
features,
|
| 133 |
+
masks,
|
| 134 |
+
rfeatures,
|
| 135 |
+
routput,
|
| 136 |
+
rmasks,
|
| 137 |
+
output,
|
| 138 |
+
kernel_size=kernel_size,
|
| 139 |
+
group_size=group_size,
|
| 140 |
+
scale_factor=scale_factor)
|
| 141 |
+
|
| 142 |
+
if features.requires_grad or masks.requires_grad:
|
| 143 |
+
ctx.save_for_backward(features, masks, rfeatures)
|
| 144 |
+
return output
|
| 145 |
+
|
| 146 |
+
@staticmethod
|
| 147 |
+
def backward(ctx, grad_output):
|
| 148 |
+
assert grad_output.is_cuda
|
| 149 |
+
|
| 150 |
+
features, masks, rfeatures = ctx.saved_tensors
|
| 151 |
+
kernel_size = ctx.kernel_size
|
| 152 |
+
group_size = ctx.group_size
|
| 153 |
+
scale_factor = ctx.scale_factor
|
| 154 |
+
|
| 155 |
+
rgrad_output = torch.zeros_like(grad_output, requires_grad=False)
|
| 156 |
+
rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False)
|
| 157 |
+
rgrad_input = torch.zeros_like(features, requires_grad=False)
|
| 158 |
+
rgrad_masks = torch.zeros_like(masks, requires_grad=False)
|
| 159 |
+
grad_input = torch.zeros_like(features, requires_grad=False)
|
| 160 |
+
grad_masks = torch.zeros_like(masks, requires_grad=False)
|
| 161 |
+
ext_module.carafe_backward(
|
| 162 |
+
grad_output.contiguous(),
|
| 163 |
+
rfeatures,
|
| 164 |
+
masks,
|
| 165 |
+
rgrad_output,
|
| 166 |
+
rgrad_input_hs,
|
| 167 |
+
rgrad_input,
|
| 168 |
+
rgrad_masks,
|
| 169 |
+
grad_input,
|
| 170 |
+
grad_masks,
|
| 171 |
+
kernel_size=kernel_size,
|
| 172 |
+
group_size=group_size,
|
| 173 |
+
scale_factor=scale_factor)
|
| 174 |
+
return grad_input, grad_masks, None, None, None
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
carafe = CARAFEFunction.apply
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
class CARAFE(Module):
|
| 181 |
+
""" CARAFE: Content-Aware ReAssembly of FEatures
|
| 182 |
+
|
| 183 |
+
Please refer to https://arxiv.org/abs/1905.02188 for more details.
|
| 184 |
+
|
| 185 |
+
Args:
|
| 186 |
+
kernel_size (int): reassemble kernel size
|
| 187 |
+
group_size (int): reassemble group size
|
| 188 |
+
scale_factor (int): upsample ratio
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
upsampled feature map
|
| 192 |
+
"""
|
| 193 |
+
|
| 194 |
+
def __init__(self, kernel_size, group_size, scale_factor):
|
| 195 |
+
super(CARAFE, self).__init__()
|
| 196 |
+
|
| 197 |
+
assert isinstance(kernel_size, int) and isinstance(
|
| 198 |
+
group_size, int) and isinstance(scale_factor, int)
|
| 199 |
+
self.kernel_size = kernel_size
|
| 200 |
+
self.group_size = group_size
|
| 201 |
+
self.scale_factor = scale_factor
|
| 202 |
+
|
| 203 |
+
def forward(self, features, masks):
|
| 204 |
+
return carafe(features, masks, self.kernel_size, self.group_size,
|
| 205 |
+
self.scale_factor)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
@UPSAMPLE_LAYERS.register_module(name='carafe')
|
| 209 |
+
class CARAFEPack(nn.Module):
|
| 210 |
+
"""A unified package of CARAFE upsampler that contains: 1) channel
|
| 211 |
+
compressor 2) content encoder 3) CARAFE op.
|
| 212 |
+
|
| 213 |
+
Official implementation of ICCV 2019 paper
|
| 214 |
+
CARAFE: Content-Aware ReAssembly of FEatures
|
| 215 |
+
Please refer to https://arxiv.org/abs/1905.02188 for more details.
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
channels (int): input feature channels
|
| 219 |
+
scale_factor (int): upsample ratio
|
| 220 |
+
up_kernel (int): kernel size of CARAFE op
|
| 221 |
+
up_group (int): group size of CARAFE op
|
| 222 |
+
encoder_kernel (int): kernel size of content encoder
|
| 223 |
+
encoder_dilation (int): dilation of content encoder
|
| 224 |
+
compressed_channels (int): output channels of channels compressor
|
| 225 |
+
|
| 226 |
+
Returns:
|
| 227 |
+
upsampled feature map
|
| 228 |
+
"""
|
| 229 |
+
|
| 230 |
+
def __init__(self,
|
| 231 |
+
channels,
|
| 232 |
+
scale_factor,
|
| 233 |
+
up_kernel=5,
|
| 234 |
+
up_group=1,
|
| 235 |
+
encoder_kernel=3,
|
| 236 |
+
encoder_dilation=1,
|
| 237 |
+
compressed_channels=64):
|
| 238 |
+
super(CARAFEPack, self).__init__()
|
| 239 |
+
self.channels = channels
|
| 240 |
+
self.scale_factor = scale_factor
|
| 241 |
+
self.up_kernel = up_kernel
|
| 242 |
+
self.up_group = up_group
|
| 243 |
+
self.encoder_kernel = encoder_kernel
|
| 244 |
+
self.encoder_dilation = encoder_dilation
|
| 245 |
+
self.compressed_channels = compressed_channels
|
| 246 |
+
self.channel_compressor = nn.Conv2d(channels, self.compressed_channels,
|
| 247 |
+
1)
|
| 248 |
+
self.content_encoder = nn.Conv2d(
|
| 249 |
+
self.compressed_channels,
|
| 250 |
+
self.up_kernel * self.up_kernel * self.up_group *
|
| 251 |
+
self.scale_factor * self.scale_factor,
|
| 252 |
+
self.encoder_kernel,
|
| 253 |
+
padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2),
|
| 254 |
+
dilation=self.encoder_dilation,
|
| 255 |
+
groups=1)
|
| 256 |
+
self.init_weights()
|
| 257 |
+
|
| 258 |
+
def init_weights(self):
|
| 259 |
+
for m in self.modules():
|
| 260 |
+
if isinstance(m, nn.Conv2d):
|
| 261 |
+
xavier_init(m, distribution='uniform')
|
| 262 |
+
normal_init(self.content_encoder, std=0.001)
|
| 263 |
+
|
| 264 |
+
def kernel_normalizer(self, mask):
|
| 265 |
+
mask = F.pixel_shuffle(mask, self.scale_factor)
|
| 266 |
+
n, mask_c, h, w = mask.size()
|
| 267 |
+
# use float division explicitly,
|
| 268 |
+
# to void inconsistency while exporting to onnx
|
| 269 |
+
mask_channel = int(mask_c / float(self.up_kernel**2))
|
| 270 |
+
mask = mask.view(n, mask_channel, -1, h, w)
|
| 271 |
+
|
| 272 |
+
mask = F.softmax(mask, dim=2, dtype=mask.dtype)
|
| 273 |
+
mask = mask.view(n, mask_c, h, w).contiguous()
|
| 274 |
+
|
| 275 |
+
return mask
|
| 276 |
+
|
| 277 |
+
def feature_reassemble(self, x, mask):
|
| 278 |
+
x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor)
|
| 279 |
+
return x
|
| 280 |
+
|
| 281 |
+
def forward(self, x):
|
| 282 |
+
compressed_x = self.channel_compressor(x)
|
| 283 |
+
mask = self.content_encoder(compressed_x)
|
| 284 |
+
mask = self.kernel_normalizer(mask)
|
| 285 |
+
|
| 286 |
+
x = self.feature_reassemble(x, mask)
|
| 287 |
+
return x
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/cc_attention.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
|
| 6 |
+
from annotator.mmpkg.mmcv.cnn import PLUGIN_LAYERS, Scale
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def NEG_INF_DIAG(n, device):
|
| 10 |
+
"""Returns a diagonal matrix of size [n, n].
|
| 11 |
+
|
| 12 |
+
The diagonal are all "-inf". This is for avoiding calculating the
|
| 13 |
+
overlapped element in the Criss-Cross twice.
|
| 14 |
+
"""
|
| 15 |
+
return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@PLUGIN_LAYERS.register_module()
|
| 19 |
+
class CrissCrossAttention(nn.Module):
|
| 20 |
+
"""Criss-Cross Attention Module.
|
| 21 |
+
|
| 22 |
+
.. note::
|
| 23 |
+
Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch
|
| 24 |
+
to a pure PyTorch and equivalent implementation. For more
|
| 25 |
+
details, please refer to https://github.com/open-mmlab/mmcv/pull/1201.
|
| 26 |
+
|
| 27 |
+
Speed comparison for one forward pass
|
| 28 |
+
|
| 29 |
+
- Input size: [2,512,97,97]
|
| 30 |
+
- Device: 1 NVIDIA GeForce RTX 2080 Ti
|
| 31 |
+
|
| 32 |
+
+-----------------------+---------------+------------+---------------+
|
| 33 |
+
| |PyTorch version|CUDA version|Relative speed |
|
| 34 |
+
+=======================+===============+============+===============+
|
| 35 |
+
|with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x |
|
| 36 |
+
+-----------------------+---------------+------------+---------------+
|
| 37 |
+
|no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x |
|
| 38 |
+
+-----------------------+---------------+------------+---------------+
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
in_channels (int): Channels of the input feature map.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, in_channels):
|
| 45 |
+
super().__init__()
|
| 46 |
+
self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
|
| 47 |
+
self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
|
| 48 |
+
self.value_conv = nn.Conv2d(in_channels, in_channels, 1)
|
| 49 |
+
self.gamma = Scale(0.)
|
| 50 |
+
self.in_channels = in_channels
|
| 51 |
+
|
| 52 |
+
def forward(self, x):
|
| 53 |
+
"""forward function of Criss-Cross Attention.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
x (Tensor): Input feature. \
|
| 57 |
+
shape (batch_size, in_channels, height, width)
|
| 58 |
+
Returns:
|
| 59 |
+
Tensor: Output of the layer, with shape of \
|
| 60 |
+
(batch_size, in_channels, height, width)
|
| 61 |
+
"""
|
| 62 |
+
B, C, H, W = x.size()
|
| 63 |
+
query = self.query_conv(x)
|
| 64 |
+
key = self.key_conv(x)
|
| 65 |
+
value = self.value_conv(x)
|
| 66 |
+
energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(
|
| 67 |
+
H, query.device)
|
| 68 |
+
energy_H = energy_H.transpose(1, 2)
|
| 69 |
+
energy_W = torch.einsum('bchw,bchj->bhwj', query, key)
|
| 70 |
+
attn = F.softmax(
|
| 71 |
+
torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)]
|
| 72 |
+
out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H])
|
| 73 |
+
out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:])
|
| 74 |
+
|
| 75 |
+
out = self.gamma(out) + x
|
| 76 |
+
out = out.contiguous()
|
| 77 |
+
|
| 78 |
+
return out
|
| 79 |
+
|
| 80 |
+
def __repr__(self):
|
| 81 |
+
s = self.__class__.__name__
|
| 82 |
+
s += f'(in_channels={self.in_channels})'
|
| 83 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/contour_expand.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from ..utils import ext_loader
|
| 6 |
+
|
| 7 |
+
ext_module = ext_loader.load_ext('_ext', ['contour_expand'])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
|
| 11 |
+
kernel_num):
|
| 12 |
+
"""Expand kernel contours so that foreground pixels are assigned into
|
| 13 |
+
instances.
|
| 14 |
+
|
| 15 |
+
Arguments:
|
| 16 |
+
kernel_mask (np.array or Tensor): The instance kernel mask with
|
| 17 |
+
size hxw.
|
| 18 |
+
internal_kernel_label (np.array or Tensor): The instance internal
|
| 19 |
+
kernel label with size hxw.
|
| 20 |
+
min_kernel_area (int): The minimum kernel area.
|
| 21 |
+
kernel_num (int): The instance kernel number.
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
label (list): The instance index map with size hxw.
|
| 25 |
+
"""
|
| 26 |
+
assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
|
| 27 |
+
assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
|
| 28 |
+
assert isinstance(min_kernel_area, int)
|
| 29 |
+
assert isinstance(kernel_num, int)
|
| 30 |
+
|
| 31 |
+
if isinstance(kernel_mask, np.ndarray):
|
| 32 |
+
kernel_mask = torch.from_numpy(kernel_mask)
|
| 33 |
+
if isinstance(internal_kernel_label, np.ndarray):
|
| 34 |
+
internal_kernel_label = torch.from_numpy(internal_kernel_label)
|
| 35 |
+
|
| 36 |
+
if torch.__version__ == 'parrots':
|
| 37 |
+
if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
|
| 38 |
+
label = []
|
| 39 |
+
else:
|
| 40 |
+
label = ext_module.contour_expand(
|
| 41 |
+
kernel_mask,
|
| 42 |
+
internal_kernel_label,
|
| 43 |
+
min_kernel_area=min_kernel_area,
|
| 44 |
+
kernel_num=kernel_num)
|
| 45 |
+
label = label.tolist()
|
| 46 |
+
else:
|
| 47 |
+
label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
|
| 48 |
+
min_kernel_area, kernel_num)
|
| 49 |
+
return label
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/corner_pool.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch import nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
|
| 6 |
+
from ..utils import ext_loader
|
| 7 |
+
|
| 8 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 9 |
+
'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward',
|
| 10 |
+
'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward',
|
| 11 |
+
'right_pool_forward', 'right_pool_backward'
|
| 12 |
+
])
|
| 13 |
+
|
| 14 |
+
_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TopPoolFunction(Function):
|
| 18 |
+
|
| 19 |
+
@staticmethod
|
| 20 |
+
def symbolic(g, input):
|
| 21 |
+
output = g.op(
|
| 22 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top']))
|
| 23 |
+
return output
|
| 24 |
+
|
| 25 |
+
@staticmethod
|
| 26 |
+
def forward(ctx, input):
|
| 27 |
+
output = ext_module.top_pool_forward(input)
|
| 28 |
+
ctx.save_for_backward(input)
|
| 29 |
+
return output
|
| 30 |
+
|
| 31 |
+
@staticmethod
|
| 32 |
+
def backward(ctx, grad_output):
|
| 33 |
+
input, = ctx.saved_tensors
|
| 34 |
+
output = ext_module.top_pool_backward(input, grad_output)
|
| 35 |
+
return output
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class BottomPoolFunction(Function):
|
| 39 |
+
|
| 40 |
+
@staticmethod
|
| 41 |
+
def symbolic(g, input):
|
| 42 |
+
output = g.op(
|
| 43 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom']))
|
| 44 |
+
return output
|
| 45 |
+
|
| 46 |
+
@staticmethod
|
| 47 |
+
def forward(ctx, input):
|
| 48 |
+
output = ext_module.bottom_pool_forward(input)
|
| 49 |
+
ctx.save_for_backward(input)
|
| 50 |
+
return output
|
| 51 |
+
|
| 52 |
+
@staticmethod
|
| 53 |
+
def backward(ctx, grad_output):
|
| 54 |
+
input, = ctx.saved_tensors
|
| 55 |
+
output = ext_module.bottom_pool_backward(input, grad_output)
|
| 56 |
+
return output
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class LeftPoolFunction(Function):
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
def symbolic(g, input):
|
| 63 |
+
output = g.op(
|
| 64 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left']))
|
| 65 |
+
return output
|
| 66 |
+
|
| 67 |
+
@staticmethod
|
| 68 |
+
def forward(ctx, input):
|
| 69 |
+
output = ext_module.left_pool_forward(input)
|
| 70 |
+
ctx.save_for_backward(input)
|
| 71 |
+
return output
|
| 72 |
+
|
| 73 |
+
@staticmethod
|
| 74 |
+
def backward(ctx, grad_output):
|
| 75 |
+
input, = ctx.saved_tensors
|
| 76 |
+
output = ext_module.left_pool_backward(input, grad_output)
|
| 77 |
+
return output
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class RightPoolFunction(Function):
|
| 81 |
+
|
| 82 |
+
@staticmethod
|
| 83 |
+
def symbolic(g, input):
|
| 84 |
+
output = g.op(
|
| 85 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right']))
|
| 86 |
+
return output
|
| 87 |
+
|
| 88 |
+
@staticmethod
|
| 89 |
+
def forward(ctx, input):
|
| 90 |
+
output = ext_module.right_pool_forward(input)
|
| 91 |
+
ctx.save_for_backward(input)
|
| 92 |
+
return output
|
| 93 |
+
|
| 94 |
+
@staticmethod
|
| 95 |
+
def backward(ctx, grad_output):
|
| 96 |
+
input, = ctx.saved_tensors
|
| 97 |
+
output = ext_module.right_pool_backward(input, grad_output)
|
| 98 |
+
return output
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class CornerPool(nn.Module):
|
| 102 |
+
"""Corner Pooling.
|
| 103 |
+
|
| 104 |
+
Corner Pooling is a new type of pooling layer that helps a
|
| 105 |
+
convolutional network better localize corners of bounding boxes.
|
| 106 |
+
|
| 107 |
+
Please refer to https://arxiv.org/abs/1808.01244 for more details.
|
| 108 |
+
Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
mode(str): Pooling orientation for the pooling layer
|
| 112 |
+
|
| 113 |
+
- 'bottom': Bottom Pooling
|
| 114 |
+
- 'left': Left Pooling
|
| 115 |
+
- 'right': Right Pooling
|
| 116 |
+
- 'top': Top Pooling
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
Feature map after pooling.
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
pool_functions = {
|
| 123 |
+
'bottom': BottomPoolFunction,
|
| 124 |
+
'left': LeftPoolFunction,
|
| 125 |
+
'right': RightPoolFunction,
|
| 126 |
+
'top': TopPoolFunction,
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
cummax_dim_flip = {
|
| 130 |
+
'bottom': (2, False),
|
| 131 |
+
'left': (3, True),
|
| 132 |
+
'right': (3, False),
|
| 133 |
+
'top': (2, True),
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
def __init__(self, mode):
|
| 137 |
+
super(CornerPool, self).__init__()
|
| 138 |
+
assert mode in self.pool_functions
|
| 139 |
+
self.mode = mode
|
| 140 |
+
self.corner_pool = self.pool_functions[mode]
|
| 141 |
+
|
| 142 |
+
def forward(self, x):
|
| 143 |
+
if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
|
| 144 |
+
if torch.onnx.is_in_onnx_export():
|
| 145 |
+
assert torch.__version__ >= '1.7.0', \
|
| 146 |
+
'When `cummax` serves as an intermediate component whose '\
|
| 147 |
+
'outputs is used as inputs for another modules, it\'s '\
|
| 148 |
+
'expected that pytorch version must be >= 1.7.0, '\
|
| 149 |
+
'otherwise Error appears like: `RuntimeError: tuple '\
|
| 150 |
+
'appears in op that does not forward tuples, unsupported '\
|
| 151 |
+
'kind: prim::PythonOp`.'
|
| 152 |
+
|
| 153 |
+
dim, flip = self.cummax_dim_flip[self.mode]
|
| 154 |
+
if flip:
|
| 155 |
+
x = x.flip(dim)
|
| 156 |
+
pool_tensor, _ = torch.cummax(x, dim=dim)
|
| 157 |
+
if flip:
|
| 158 |
+
pool_tensor = pool_tensor.flip(dim)
|
| 159 |
+
return pool_tensor
|
| 160 |
+
else:
|
| 161 |
+
return self.corner_pool.apply(x)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/correlation.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch import Tensor, nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
from torch.autograd.function import once_differentiable
|
| 6 |
+
from torch.nn.modules.utils import _pair
|
| 7 |
+
|
| 8 |
+
from ..utils import ext_loader
|
| 9 |
+
|
| 10 |
+
ext_module = ext_loader.load_ext(
|
| 11 |
+
'_ext', ['correlation_forward', 'correlation_backward'])
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CorrelationFunction(Function):
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
def forward(ctx,
|
| 18 |
+
input1,
|
| 19 |
+
input2,
|
| 20 |
+
kernel_size=1,
|
| 21 |
+
max_displacement=1,
|
| 22 |
+
stride=1,
|
| 23 |
+
padding=1,
|
| 24 |
+
dilation=1,
|
| 25 |
+
dilation_patch=1):
|
| 26 |
+
|
| 27 |
+
ctx.save_for_backward(input1, input2)
|
| 28 |
+
|
| 29 |
+
kH, kW = ctx.kernel_size = _pair(kernel_size)
|
| 30 |
+
patch_size = max_displacement * 2 + 1
|
| 31 |
+
ctx.patch_size = patch_size
|
| 32 |
+
dH, dW = ctx.stride = _pair(stride)
|
| 33 |
+
padH, padW = ctx.padding = _pair(padding)
|
| 34 |
+
dilationH, dilationW = ctx.dilation = _pair(dilation)
|
| 35 |
+
dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(
|
| 36 |
+
dilation_patch)
|
| 37 |
+
|
| 38 |
+
output_size = CorrelationFunction._output_size(ctx, input1)
|
| 39 |
+
|
| 40 |
+
output = input1.new_zeros(output_size)
|
| 41 |
+
|
| 42 |
+
ext_module.correlation_forward(
|
| 43 |
+
input1,
|
| 44 |
+
input2,
|
| 45 |
+
output,
|
| 46 |
+
kH=kH,
|
| 47 |
+
kW=kW,
|
| 48 |
+
patchH=patch_size,
|
| 49 |
+
patchW=patch_size,
|
| 50 |
+
padH=padH,
|
| 51 |
+
padW=padW,
|
| 52 |
+
dilationH=dilationH,
|
| 53 |
+
dilationW=dilationW,
|
| 54 |
+
dilation_patchH=dilation_patchH,
|
| 55 |
+
dilation_patchW=dilation_patchW,
|
| 56 |
+
dH=dH,
|
| 57 |
+
dW=dW)
|
| 58 |
+
|
| 59 |
+
return output
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
@once_differentiable
|
| 63 |
+
def backward(ctx, grad_output):
|
| 64 |
+
input1, input2 = ctx.saved_tensors
|
| 65 |
+
|
| 66 |
+
kH, kW = ctx.kernel_size
|
| 67 |
+
patch_size = ctx.patch_size
|
| 68 |
+
padH, padW = ctx.padding
|
| 69 |
+
dilationH, dilationW = ctx.dilation
|
| 70 |
+
dilation_patchH, dilation_patchW = ctx.dilation_patch
|
| 71 |
+
dH, dW = ctx.stride
|
| 72 |
+
grad_input1 = torch.zeros_like(input1)
|
| 73 |
+
grad_input2 = torch.zeros_like(input2)
|
| 74 |
+
|
| 75 |
+
ext_module.correlation_backward(
|
| 76 |
+
grad_output,
|
| 77 |
+
input1,
|
| 78 |
+
input2,
|
| 79 |
+
grad_input1,
|
| 80 |
+
grad_input2,
|
| 81 |
+
kH=kH,
|
| 82 |
+
kW=kW,
|
| 83 |
+
patchH=patch_size,
|
| 84 |
+
patchW=patch_size,
|
| 85 |
+
padH=padH,
|
| 86 |
+
padW=padW,
|
| 87 |
+
dilationH=dilationH,
|
| 88 |
+
dilationW=dilationW,
|
| 89 |
+
dilation_patchH=dilation_patchH,
|
| 90 |
+
dilation_patchW=dilation_patchW,
|
| 91 |
+
dH=dH,
|
| 92 |
+
dW=dW)
|
| 93 |
+
return grad_input1, grad_input2, None, None, None, None, None, None
|
| 94 |
+
|
| 95 |
+
@staticmethod
|
| 96 |
+
def _output_size(ctx, input1):
|
| 97 |
+
iH, iW = input1.size(2), input1.size(3)
|
| 98 |
+
batch_size = input1.size(0)
|
| 99 |
+
kH, kW = ctx.kernel_size
|
| 100 |
+
patch_size = ctx.patch_size
|
| 101 |
+
dH, dW = ctx.stride
|
| 102 |
+
padH, padW = ctx.padding
|
| 103 |
+
dilationH, dilationW = ctx.dilation
|
| 104 |
+
dilatedKH = (kH - 1) * dilationH + 1
|
| 105 |
+
dilatedKW = (kW - 1) * dilationW + 1
|
| 106 |
+
|
| 107 |
+
oH = int((iH + 2 * padH - dilatedKH) / dH + 1)
|
| 108 |
+
oW = int((iW + 2 * padW - dilatedKW) / dW + 1)
|
| 109 |
+
|
| 110 |
+
output_size = (batch_size, patch_size, patch_size, oH, oW)
|
| 111 |
+
return output_size
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class Correlation(nn.Module):
|
| 115 |
+
r"""Correlation operator
|
| 116 |
+
|
| 117 |
+
This correlation operator works for optical flow correlation computation.
|
| 118 |
+
|
| 119 |
+
There are two batched tensors with shape :math:`(N, C, H, W)`,
|
| 120 |
+
and the correlation output's shape is :math:`(N, max\_displacement \times
|
| 121 |
+
2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})`
|
| 122 |
+
|
| 123 |
+
where
|
| 124 |
+
|
| 125 |
+
.. math::
|
| 126 |
+
H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding -
|
| 127 |
+
dilation \times (kernel\_size - 1) - 1}
|
| 128 |
+
{stride} + 1\right\rfloor
|
| 129 |
+
|
| 130 |
+
.. math::
|
| 131 |
+
W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation
|
| 132 |
+
\times (kernel\_size - 1) - 1}
|
| 133 |
+
{stride} + 1\right\rfloor
|
| 134 |
+
|
| 135 |
+
the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding
|
| 136 |
+
window convolution between input1 and shifted input2,
|
| 137 |
+
|
| 138 |
+
.. math::
|
| 139 |
+
Corr(N_i, dx, dy) =
|
| 140 |
+
\sum_{c=0}^{C-1}
|
| 141 |
+
input1(N_i, c) \star
|
| 142 |
+
\mathcal{S}(input2(N_i, c), dy, dx)
|
| 143 |
+
|
| 144 |
+
where :math:`\star` is the valid 2d sliding window convolution operator,
|
| 145 |
+
and :math:`\mathcal{S}` means shifting the input features (auto-complete
|
| 146 |
+
zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in
|
| 147 |
+
[-max\_displacement \times dilation\_patch, max\_displacement \times
|
| 148 |
+
dilation\_patch]`.
|
| 149 |
+
|
| 150 |
+
Args:
|
| 151 |
+
kernel_size (int): The size of sliding window i.e. local neighborhood
|
| 152 |
+
representing the center points and involved in correlation
|
| 153 |
+
computation. Defaults to 1.
|
| 154 |
+
max_displacement (int): The radius for computing correlation volume,
|
| 155 |
+
but the actual working space can be dilated by dilation_patch.
|
| 156 |
+
Defaults to 1.
|
| 157 |
+
stride (int): The stride of the sliding blocks in the input spatial
|
| 158 |
+
dimensions. Defaults to 1.
|
| 159 |
+
padding (int): Zero padding added to all four sides of the input1.
|
| 160 |
+
Defaults to 0.
|
| 161 |
+
dilation (int): The spacing of local neighborhood that will involved
|
| 162 |
+
in correlation. Defaults to 1.
|
| 163 |
+
dilation_patch (int): The spacing between position need to compute
|
| 164 |
+
correlation. Defaults to 1.
|
| 165 |
+
"""
|
| 166 |
+
|
| 167 |
+
def __init__(self,
|
| 168 |
+
kernel_size: int = 1,
|
| 169 |
+
max_displacement: int = 1,
|
| 170 |
+
stride: int = 1,
|
| 171 |
+
padding: int = 0,
|
| 172 |
+
dilation: int = 1,
|
| 173 |
+
dilation_patch: int = 1) -> None:
|
| 174 |
+
super().__init__()
|
| 175 |
+
self.kernel_size = kernel_size
|
| 176 |
+
self.max_displacement = max_displacement
|
| 177 |
+
self.stride = stride
|
| 178 |
+
self.padding = padding
|
| 179 |
+
self.dilation = dilation
|
| 180 |
+
self.dilation_patch = dilation_patch
|
| 181 |
+
|
| 182 |
+
def forward(self, input1: Tensor, input2: Tensor) -> Tensor:
|
| 183 |
+
return CorrelationFunction.apply(input1, input2, self.kernel_size,
|
| 184 |
+
self.max_displacement, self.stride,
|
| 185 |
+
self.padding, self.dilation,
|
| 186 |
+
self.dilation_patch)
|
| 187 |
+
|
| 188 |
+
def __repr__(self) -> str:
|
| 189 |
+
s = self.__class__.__name__
|
| 190 |
+
s += f'(kernel_size={self.kernel_size}, '
|
| 191 |
+
s += f'max_displacement={self.max_displacement}, '
|
| 192 |
+
s += f'stride={self.stride}, '
|
| 193 |
+
s += f'padding={self.padding}, '
|
| 194 |
+
s += f'dilation={self.dilation}, '
|
| 195 |
+
s += f'dilation_patch={self.dilation_patch})'
|
| 196 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_conv.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from typing import Tuple, Union
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
from torch import Tensor
|
| 8 |
+
from torch.autograd import Function
|
| 9 |
+
from torch.autograd.function import once_differentiable
|
| 10 |
+
from torch.nn.modules.utils import _pair, _single
|
| 11 |
+
|
| 12 |
+
from annotator.mmpkg.mmcv.utils import deprecated_api_warning
|
| 13 |
+
from ..cnn import CONV_LAYERS
|
| 14 |
+
from ..utils import ext_loader, print_log
|
| 15 |
+
|
| 16 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 17 |
+
'deform_conv_forward', 'deform_conv_backward_input',
|
| 18 |
+
'deform_conv_backward_parameters'
|
| 19 |
+
])
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class DeformConv2dFunction(Function):
|
| 23 |
+
|
| 24 |
+
@staticmethod
|
| 25 |
+
def symbolic(g,
|
| 26 |
+
input,
|
| 27 |
+
offset,
|
| 28 |
+
weight,
|
| 29 |
+
stride,
|
| 30 |
+
padding,
|
| 31 |
+
dilation,
|
| 32 |
+
groups,
|
| 33 |
+
deform_groups,
|
| 34 |
+
bias=False,
|
| 35 |
+
im2col_step=32):
|
| 36 |
+
return g.op(
|
| 37 |
+
'mmcv::MMCVDeformConv2d',
|
| 38 |
+
input,
|
| 39 |
+
offset,
|
| 40 |
+
weight,
|
| 41 |
+
stride_i=stride,
|
| 42 |
+
padding_i=padding,
|
| 43 |
+
dilation_i=dilation,
|
| 44 |
+
groups_i=groups,
|
| 45 |
+
deform_groups_i=deform_groups,
|
| 46 |
+
bias_i=bias,
|
| 47 |
+
im2col_step_i=im2col_step)
|
| 48 |
+
|
| 49 |
+
@staticmethod
|
| 50 |
+
def forward(ctx,
|
| 51 |
+
input,
|
| 52 |
+
offset,
|
| 53 |
+
weight,
|
| 54 |
+
stride=1,
|
| 55 |
+
padding=0,
|
| 56 |
+
dilation=1,
|
| 57 |
+
groups=1,
|
| 58 |
+
deform_groups=1,
|
| 59 |
+
bias=False,
|
| 60 |
+
im2col_step=32):
|
| 61 |
+
if input is not None and input.dim() != 4:
|
| 62 |
+
raise ValueError(
|
| 63 |
+
f'Expected 4D tensor as input, got {input.dim()}D tensor \
|
| 64 |
+
instead.')
|
| 65 |
+
assert bias is False, 'Only support bias is False.'
|
| 66 |
+
ctx.stride = _pair(stride)
|
| 67 |
+
ctx.padding = _pair(padding)
|
| 68 |
+
ctx.dilation = _pair(dilation)
|
| 69 |
+
ctx.groups = groups
|
| 70 |
+
ctx.deform_groups = deform_groups
|
| 71 |
+
ctx.im2col_step = im2col_step
|
| 72 |
+
|
| 73 |
+
# When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
|
| 74 |
+
# amp won't cast the type of model (float32), but "offset" is cast
|
| 75 |
+
# to float16 by nn.Conv2d automatically, leading to the type
|
| 76 |
+
# mismatch with input (when it is float32) or weight.
|
| 77 |
+
# The flag for whether to use fp16 or amp is the type of "offset",
|
| 78 |
+
# we cast weight and input to temporarily support fp16 and amp
|
| 79 |
+
# whatever the pytorch version is.
|
| 80 |
+
input = input.type_as(offset)
|
| 81 |
+
weight = weight.type_as(input)
|
| 82 |
+
ctx.save_for_backward(input, offset, weight)
|
| 83 |
+
|
| 84 |
+
output = input.new_empty(
|
| 85 |
+
DeformConv2dFunction._output_size(ctx, input, weight))
|
| 86 |
+
|
| 87 |
+
ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
|
| 88 |
+
|
| 89 |
+
cur_im2col_step = min(ctx.im2col_step, input.size(0))
|
| 90 |
+
assert (input.size(0) %
|
| 91 |
+
cur_im2col_step) == 0, 'im2col step must divide batchsize'
|
| 92 |
+
ext_module.deform_conv_forward(
|
| 93 |
+
input,
|
| 94 |
+
weight,
|
| 95 |
+
offset,
|
| 96 |
+
output,
|
| 97 |
+
ctx.bufs_[0],
|
| 98 |
+
ctx.bufs_[1],
|
| 99 |
+
kW=weight.size(3),
|
| 100 |
+
kH=weight.size(2),
|
| 101 |
+
dW=ctx.stride[1],
|
| 102 |
+
dH=ctx.stride[0],
|
| 103 |
+
padW=ctx.padding[1],
|
| 104 |
+
padH=ctx.padding[0],
|
| 105 |
+
dilationW=ctx.dilation[1],
|
| 106 |
+
dilationH=ctx.dilation[0],
|
| 107 |
+
group=ctx.groups,
|
| 108 |
+
deformable_group=ctx.deform_groups,
|
| 109 |
+
im2col_step=cur_im2col_step)
|
| 110 |
+
return output
|
| 111 |
+
|
| 112 |
+
@staticmethod
|
| 113 |
+
@once_differentiable
|
| 114 |
+
def backward(ctx, grad_output):
|
| 115 |
+
input, offset, weight = ctx.saved_tensors
|
| 116 |
+
|
| 117 |
+
grad_input = grad_offset = grad_weight = None
|
| 118 |
+
|
| 119 |
+
cur_im2col_step = min(ctx.im2col_step, input.size(0))
|
| 120 |
+
assert (input.size(0) % cur_im2col_step
|
| 121 |
+
) == 0, 'batch size must be divisible by im2col_step'
|
| 122 |
+
|
| 123 |
+
grad_output = grad_output.contiguous()
|
| 124 |
+
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
|
| 125 |
+
grad_input = torch.zeros_like(input)
|
| 126 |
+
grad_offset = torch.zeros_like(offset)
|
| 127 |
+
ext_module.deform_conv_backward_input(
|
| 128 |
+
input,
|
| 129 |
+
offset,
|
| 130 |
+
grad_output,
|
| 131 |
+
grad_input,
|
| 132 |
+
grad_offset,
|
| 133 |
+
weight,
|
| 134 |
+
ctx.bufs_[0],
|
| 135 |
+
kW=weight.size(3),
|
| 136 |
+
kH=weight.size(2),
|
| 137 |
+
dW=ctx.stride[1],
|
| 138 |
+
dH=ctx.stride[0],
|
| 139 |
+
padW=ctx.padding[1],
|
| 140 |
+
padH=ctx.padding[0],
|
| 141 |
+
dilationW=ctx.dilation[1],
|
| 142 |
+
dilationH=ctx.dilation[0],
|
| 143 |
+
group=ctx.groups,
|
| 144 |
+
deformable_group=ctx.deform_groups,
|
| 145 |
+
im2col_step=cur_im2col_step)
|
| 146 |
+
|
| 147 |
+
if ctx.needs_input_grad[2]:
|
| 148 |
+
grad_weight = torch.zeros_like(weight)
|
| 149 |
+
ext_module.deform_conv_backward_parameters(
|
| 150 |
+
input,
|
| 151 |
+
offset,
|
| 152 |
+
grad_output,
|
| 153 |
+
grad_weight,
|
| 154 |
+
ctx.bufs_[0],
|
| 155 |
+
ctx.bufs_[1],
|
| 156 |
+
kW=weight.size(3),
|
| 157 |
+
kH=weight.size(2),
|
| 158 |
+
dW=ctx.stride[1],
|
| 159 |
+
dH=ctx.stride[0],
|
| 160 |
+
padW=ctx.padding[1],
|
| 161 |
+
padH=ctx.padding[0],
|
| 162 |
+
dilationW=ctx.dilation[1],
|
| 163 |
+
dilationH=ctx.dilation[0],
|
| 164 |
+
group=ctx.groups,
|
| 165 |
+
deformable_group=ctx.deform_groups,
|
| 166 |
+
scale=1,
|
| 167 |
+
im2col_step=cur_im2col_step)
|
| 168 |
+
|
| 169 |
+
return grad_input, grad_offset, grad_weight, \
|
| 170 |
+
None, None, None, None, None, None, None
|
| 171 |
+
|
| 172 |
+
@staticmethod
|
| 173 |
+
def _output_size(ctx, input, weight):
|
| 174 |
+
channels = weight.size(0)
|
| 175 |
+
output_size = (input.size(0), channels)
|
| 176 |
+
for d in range(input.dim() - 2):
|
| 177 |
+
in_size = input.size(d + 2)
|
| 178 |
+
pad = ctx.padding[d]
|
| 179 |
+
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
|
| 180 |
+
stride_ = ctx.stride[d]
|
| 181 |
+
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
|
| 182 |
+
if not all(map(lambda s: s > 0, output_size)):
|
| 183 |
+
raise ValueError(
|
| 184 |
+
'convolution input is too small (output would be ' +
|
| 185 |
+
'x'.join(map(str, output_size)) + ')')
|
| 186 |
+
return output_size
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
deform_conv2d = DeformConv2dFunction.apply
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
class DeformConv2d(nn.Module):
|
| 193 |
+
r"""Deformable 2D convolution.
|
| 194 |
+
|
| 195 |
+
Applies a deformable 2D convolution over an input signal composed of
|
| 196 |
+
several input planes. DeformConv2d was described in the paper
|
| 197 |
+
`Deformable Convolutional Networks
|
| 198 |
+
<https://arxiv.org/pdf/1703.06211.pdf>`_
|
| 199 |
+
|
| 200 |
+
Note:
|
| 201 |
+
The argument ``im2col_step`` was added in version 1.3.17, which means
|
| 202 |
+
number of samples processed by the ``im2col_cuda_kernel`` per call.
|
| 203 |
+
It enables users to define ``batch_size`` and ``im2col_step`` more
|
| 204 |
+
flexibly and solved `issue mmcv#1440
|
| 205 |
+
<https://github.com/open-mmlab/mmcv/issues/1440>`_.
|
| 206 |
+
|
| 207 |
+
Args:
|
| 208 |
+
in_channels (int): Number of channels in the input image.
|
| 209 |
+
out_channels (int): Number of channels produced by the convolution.
|
| 210 |
+
kernel_size(int, tuple): Size of the convolving kernel.
|
| 211 |
+
stride(int, tuple): Stride of the convolution. Default: 1.
|
| 212 |
+
padding (int or tuple): Zero-padding added to both sides of the input.
|
| 213 |
+
Default: 0.
|
| 214 |
+
dilation (int or tuple): Spacing between kernel elements. Default: 1.
|
| 215 |
+
groups (int): Number of blocked connections from input.
|
| 216 |
+
channels to output channels. Default: 1.
|
| 217 |
+
deform_groups (int): Number of deformable group partitions.
|
| 218 |
+
bias (bool): If True, adds a learnable bias to the output.
|
| 219 |
+
Default: False.
|
| 220 |
+
im2col_step (int): Number of samples processed by im2col_cuda_kernel
|
| 221 |
+
per call. It will work when ``batch_size`` > ``im2col_step``, but
|
| 222 |
+
``batch_size`` must be divisible by ``im2col_step``. Default: 32.
|
| 223 |
+
`New in version 1.3.17.`
|
| 224 |
+
"""
|
| 225 |
+
|
| 226 |
+
@deprecated_api_warning({'deformable_groups': 'deform_groups'},
|
| 227 |
+
cls_name='DeformConv2d')
|
| 228 |
+
def __init__(self,
|
| 229 |
+
in_channels: int,
|
| 230 |
+
out_channels: int,
|
| 231 |
+
kernel_size: Union[int, Tuple[int, ...]],
|
| 232 |
+
stride: Union[int, Tuple[int, ...]] = 1,
|
| 233 |
+
padding: Union[int, Tuple[int, ...]] = 0,
|
| 234 |
+
dilation: Union[int, Tuple[int, ...]] = 1,
|
| 235 |
+
groups: int = 1,
|
| 236 |
+
deform_groups: int = 1,
|
| 237 |
+
bias: bool = False,
|
| 238 |
+
im2col_step: int = 32) -> None:
|
| 239 |
+
super(DeformConv2d, self).__init__()
|
| 240 |
+
|
| 241 |
+
assert not bias, \
|
| 242 |
+
f'bias={bias} is not supported in DeformConv2d.'
|
| 243 |
+
assert in_channels % groups == 0, \
|
| 244 |
+
f'in_channels {in_channels} cannot be divisible by groups {groups}'
|
| 245 |
+
assert out_channels % groups == 0, \
|
| 246 |
+
f'out_channels {out_channels} cannot be divisible by groups \
|
| 247 |
+
{groups}'
|
| 248 |
+
|
| 249 |
+
self.in_channels = in_channels
|
| 250 |
+
self.out_channels = out_channels
|
| 251 |
+
self.kernel_size = _pair(kernel_size)
|
| 252 |
+
self.stride = _pair(stride)
|
| 253 |
+
self.padding = _pair(padding)
|
| 254 |
+
self.dilation = _pair(dilation)
|
| 255 |
+
self.groups = groups
|
| 256 |
+
self.deform_groups = deform_groups
|
| 257 |
+
self.im2col_step = im2col_step
|
| 258 |
+
# enable compatibility with nn.Conv2d
|
| 259 |
+
self.transposed = False
|
| 260 |
+
self.output_padding = _single(0)
|
| 261 |
+
|
| 262 |
+
# only weight, no bias
|
| 263 |
+
self.weight = nn.Parameter(
|
| 264 |
+
torch.Tensor(out_channels, in_channels // self.groups,
|
| 265 |
+
*self.kernel_size))
|
| 266 |
+
|
| 267 |
+
self.reset_parameters()
|
| 268 |
+
|
| 269 |
+
def reset_parameters(self):
|
| 270 |
+
# switch the initialization of `self.weight` to the standard kaiming
|
| 271 |
+
# method described in `Delving deep into rectifiers: Surpassing
|
| 272 |
+
# human-level performance on ImageNet classification` - He, K. et al.
|
| 273 |
+
# (2015), using a uniform distribution
|
| 274 |
+
nn.init.kaiming_uniform_(self.weight, nonlinearity='relu')
|
| 275 |
+
|
| 276 |
+
def forward(self, x: Tensor, offset: Tensor) -> Tensor:
|
| 277 |
+
"""Deformable Convolutional forward function.
|
| 278 |
+
|
| 279 |
+
Args:
|
| 280 |
+
x (Tensor): Input feature, shape (B, C_in, H_in, W_in)
|
| 281 |
+
offset (Tensor): Offset for deformable convolution, shape
|
| 282 |
+
(B, deform_groups*kernel_size[0]*kernel_size[1]*2,
|
| 283 |
+
H_out, W_out), H_out, W_out are equal to the output's.
|
| 284 |
+
|
| 285 |
+
An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
|
| 286 |
+
The spatial arrangement is like:
|
| 287 |
+
|
| 288 |
+
.. code:: text
|
| 289 |
+
|
| 290 |
+
(x0, y0) (x1, y1) (x2, y2)
|
| 291 |
+
(x3, y3) (x4, y4) (x5, y5)
|
| 292 |
+
(x6, y6) (x7, y7) (x8, y8)
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
Tensor: Output of the layer.
|
| 296 |
+
"""
|
| 297 |
+
# To fix an assert error in deform_conv_cuda.cpp:128
|
| 298 |
+
# input image is smaller than kernel
|
| 299 |
+
input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) <
|
| 300 |
+
self.kernel_size[1])
|
| 301 |
+
if input_pad:
|
| 302 |
+
pad_h = max(self.kernel_size[0] - x.size(2), 0)
|
| 303 |
+
pad_w = max(self.kernel_size[1] - x.size(3), 0)
|
| 304 |
+
x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
|
| 305 |
+
offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0)
|
| 306 |
+
offset = offset.contiguous()
|
| 307 |
+
out = deform_conv2d(x, offset, self.weight, self.stride, self.padding,
|
| 308 |
+
self.dilation, self.groups, self.deform_groups,
|
| 309 |
+
False, self.im2col_step)
|
| 310 |
+
if input_pad:
|
| 311 |
+
out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
|
| 312 |
+
pad_w].contiguous()
|
| 313 |
+
return out
|
| 314 |
+
|
| 315 |
+
def __repr__(self):
|
| 316 |
+
s = self.__class__.__name__
|
| 317 |
+
s += f'(in_channels={self.in_channels},\n'
|
| 318 |
+
s += f'out_channels={self.out_channels},\n'
|
| 319 |
+
s += f'kernel_size={self.kernel_size},\n'
|
| 320 |
+
s += f'stride={self.stride},\n'
|
| 321 |
+
s += f'padding={self.padding},\n'
|
| 322 |
+
s += f'dilation={self.dilation},\n'
|
| 323 |
+
s += f'groups={self.groups},\n'
|
| 324 |
+
s += f'deform_groups={self.deform_groups},\n'
|
| 325 |
+
# bias is not supported in DeformConv2d.
|
| 326 |
+
s += 'bias=False)'
|
| 327 |
+
return s
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
@CONV_LAYERS.register_module('DCN')
|
| 331 |
+
class DeformConv2dPack(DeformConv2d):
|
| 332 |
+
"""A Deformable Conv Encapsulation that acts as normal Conv layers.
|
| 333 |
+
|
| 334 |
+
The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
|
| 335 |
+
The spatial arrangement is like:
|
| 336 |
+
|
| 337 |
+
.. code:: text
|
| 338 |
+
|
| 339 |
+
(x0, y0) (x1, y1) (x2, y2)
|
| 340 |
+
(x3, y3) (x4, y4) (x5, y5)
|
| 341 |
+
(x6, y6) (x7, y7) (x8, y8)
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
in_channels (int): Same as nn.Conv2d.
|
| 345 |
+
out_channels (int): Same as nn.Conv2d.
|
| 346 |
+
kernel_size (int or tuple[int]): Same as nn.Conv2d.
|
| 347 |
+
stride (int or tuple[int]): Same as nn.Conv2d.
|
| 348 |
+
padding (int or tuple[int]): Same as nn.Conv2d.
|
| 349 |
+
dilation (int or tuple[int]): Same as nn.Conv2d.
|
| 350 |
+
groups (int): Same as nn.Conv2d.
|
| 351 |
+
bias (bool or str): If specified as `auto`, it will be decided by the
|
| 352 |
+
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
|
| 353 |
+
False.
|
| 354 |
+
"""
|
| 355 |
+
|
| 356 |
+
_version = 2
|
| 357 |
+
|
| 358 |
+
def __init__(self, *args, **kwargs):
|
| 359 |
+
super(DeformConv2dPack, self).__init__(*args, **kwargs)
|
| 360 |
+
self.conv_offset = nn.Conv2d(
|
| 361 |
+
self.in_channels,
|
| 362 |
+
self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
|
| 363 |
+
kernel_size=self.kernel_size,
|
| 364 |
+
stride=_pair(self.stride),
|
| 365 |
+
padding=_pair(self.padding),
|
| 366 |
+
dilation=_pair(self.dilation),
|
| 367 |
+
bias=True)
|
| 368 |
+
self.init_offset()
|
| 369 |
+
|
| 370 |
+
def init_offset(self):
|
| 371 |
+
self.conv_offset.weight.data.zero_()
|
| 372 |
+
self.conv_offset.bias.data.zero_()
|
| 373 |
+
|
| 374 |
+
def forward(self, x):
|
| 375 |
+
offset = self.conv_offset(x)
|
| 376 |
+
return deform_conv2d(x, offset, self.weight, self.stride, self.padding,
|
| 377 |
+
self.dilation, self.groups, self.deform_groups,
|
| 378 |
+
False, self.im2col_step)
|
| 379 |
+
|
| 380 |
+
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
|
| 381 |
+
missing_keys, unexpected_keys, error_msgs):
|
| 382 |
+
version = local_metadata.get('version', None)
|
| 383 |
+
|
| 384 |
+
if version is None or version < 2:
|
| 385 |
+
# the key is different in early versions
|
| 386 |
+
# In version < 2, DeformConvPack loads previous benchmark models.
|
| 387 |
+
if (prefix + 'conv_offset.weight' not in state_dict
|
| 388 |
+
and prefix[:-1] + '_offset.weight' in state_dict):
|
| 389 |
+
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
|
| 390 |
+
prefix[:-1] + '_offset.weight')
|
| 391 |
+
if (prefix + 'conv_offset.bias' not in state_dict
|
| 392 |
+
and prefix[:-1] + '_offset.bias' in state_dict):
|
| 393 |
+
state_dict[prefix +
|
| 394 |
+
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
|
| 395 |
+
'_offset.bias')
|
| 396 |
+
|
| 397 |
+
if version is not None and version > 1:
|
| 398 |
+
print_log(
|
| 399 |
+
f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to '
|
| 400 |
+
'version 2.',
|
| 401 |
+
logger='root')
|
| 402 |
+
|
| 403 |
+
super()._load_from_state_dict(state_dict, prefix, local_metadata,
|
| 404 |
+
strict, missing_keys, unexpected_keys,
|
| 405 |
+
error_msgs)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_roi_pool.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from torch import nn
|
| 3 |
+
from torch.autograd import Function
|
| 4 |
+
from torch.autograd.function import once_differentiable
|
| 5 |
+
from torch.nn.modules.utils import _pair
|
| 6 |
+
|
| 7 |
+
from ..utils import ext_loader
|
| 8 |
+
|
| 9 |
+
ext_module = ext_loader.load_ext(
|
| 10 |
+
'_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward'])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class DeformRoIPoolFunction(Function):
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def symbolic(g, input, rois, offset, output_size, spatial_scale,
|
| 17 |
+
sampling_ratio, gamma):
|
| 18 |
+
return g.op(
|
| 19 |
+
'mmcv::MMCVDeformRoIPool',
|
| 20 |
+
input,
|
| 21 |
+
rois,
|
| 22 |
+
offset,
|
| 23 |
+
pooled_height_i=output_size[0],
|
| 24 |
+
pooled_width_i=output_size[1],
|
| 25 |
+
spatial_scale_f=spatial_scale,
|
| 26 |
+
sampling_ratio_f=sampling_ratio,
|
| 27 |
+
gamma_f=gamma)
|
| 28 |
+
|
| 29 |
+
@staticmethod
|
| 30 |
+
def forward(ctx,
|
| 31 |
+
input,
|
| 32 |
+
rois,
|
| 33 |
+
offset,
|
| 34 |
+
output_size,
|
| 35 |
+
spatial_scale=1.0,
|
| 36 |
+
sampling_ratio=0,
|
| 37 |
+
gamma=0.1):
|
| 38 |
+
if offset is None:
|
| 39 |
+
offset = input.new_zeros(0)
|
| 40 |
+
ctx.output_size = _pair(output_size)
|
| 41 |
+
ctx.spatial_scale = float(spatial_scale)
|
| 42 |
+
ctx.sampling_ratio = int(sampling_ratio)
|
| 43 |
+
ctx.gamma = float(gamma)
|
| 44 |
+
|
| 45 |
+
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
|
| 46 |
+
|
| 47 |
+
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
|
| 48 |
+
ctx.output_size[1])
|
| 49 |
+
output = input.new_zeros(output_shape)
|
| 50 |
+
|
| 51 |
+
ext_module.deform_roi_pool_forward(
|
| 52 |
+
input,
|
| 53 |
+
rois,
|
| 54 |
+
offset,
|
| 55 |
+
output,
|
| 56 |
+
pooled_height=ctx.output_size[0],
|
| 57 |
+
pooled_width=ctx.output_size[1],
|
| 58 |
+
spatial_scale=ctx.spatial_scale,
|
| 59 |
+
sampling_ratio=ctx.sampling_ratio,
|
| 60 |
+
gamma=ctx.gamma)
|
| 61 |
+
|
| 62 |
+
ctx.save_for_backward(input, rois, offset)
|
| 63 |
+
return output
|
| 64 |
+
|
| 65 |
+
@staticmethod
|
| 66 |
+
@once_differentiable
|
| 67 |
+
def backward(ctx, grad_output):
|
| 68 |
+
input, rois, offset = ctx.saved_tensors
|
| 69 |
+
grad_input = grad_output.new_zeros(input.shape)
|
| 70 |
+
grad_offset = grad_output.new_zeros(offset.shape)
|
| 71 |
+
|
| 72 |
+
ext_module.deform_roi_pool_backward(
|
| 73 |
+
grad_output,
|
| 74 |
+
input,
|
| 75 |
+
rois,
|
| 76 |
+
offset,
|
| 77 |
+
grad_input,
|
| 78 |
+
grad_offset,
|
| 79 |
+
pooled_height=ctx.output_size[0],
|
| 80 |
+
pooled_width=ctx.output_size[1],
|
| 81 |
+
spatial_scale=ctx.spatial_scale,
|
| 82 |
+
sampling_ratio=ctx.sampling_ratio,
|
| 83 |
+
gamma=ctx.gamma)
|
| 84 |
+
if grad_offset.numel() == 0:
|
| 85 |
+
grad_offset = None
|
| 86 |
+
return grad_input, None, grad_offset, None, None, None, None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
deform_roi_pool = DeformRoIPoolFunction.apply
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class DeformRoIPool(nn.Module):
|
| 93 |
+
|
| 94 |
+
def __init__(self,
|
| 95 |
+
output_size,
|
| 96 |
+
spatial_scale=1.0,
|
| 97 |
+
sampling_ratio=0,
|
| 98 |
+
gamma=0.1):
|
| 99 |
+
super(DeformRoIPool, self).__init__()
|
| 100 |
+
self.output_size = _pair(output_size)
|
| 101 |
+
self.spatial_scale = float(spatial_scale)
|
| 102 |
+
self.sampling_ratio = int(sampling_ratio)
|
| 103 |
+
self.gamma = float(gamma)
|
| 104 |
+
|
| 105 |
+
def forward(self, input, rois, offset=None):
|
| 106 |
+
return deform_roi_pool(input, rois, offset, self.output_size,
|
| 107 |
+
self.spatial_scale, self.sampling_ratio,
|
| 108 |
+
self.gamma)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class DeformRoIPoolPack(DeformRoIPool):
|
| 112 |
+
|
| 113 |
+
def __init__(self,
|
| 114 |
+
output_size,
|
| 115 |
+
output_channels,
|
| 116 |
+
deform_fc_channels=1024,
|
| 117 |
+
spatial_scale=1.0,
|
| 118 |
+
sampling_ratio=0,
|
| 119 |
+
gamma=0.1):
|
| 120 |
+
super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale,
|
| 121 |
+
sampling_ratio, gamma)
|
| 122 |
+
|
| 123 |
+
self.output_channels = output_channels
|
| 124 |
+
self.deform_fc_channels = deform_fc_channels
|
| 125 |
+
|
| 126 |
+
self.offset_fc = nn.Sequential(
|
| 127 |
+
nn.Linear(
|
| 128 |
+
self.output_size[0] * self.output_size[1] *
|
| 129 |
+
self.output_channels, self.deform_fc_channels),
|
| 130 |
+
nn.ReLU(inplace=True),
|
| 131 |
+
nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
|
| 132 |
+
nn.ReLU(inplace=True),
|
| 133 |
+
nn.Linear(self.deform_fc_channels,
|
| 134 |
+
self.output_size[0] * self.output_size[1] * 2))
|
| 135 |
+
self.offset_fc[-1].weight.data.zero_()
|
| 136 |
+
self.offset_fc[-1].bias.data.zero_()
|
| 137 |
+
|
| 138 |
+
def forward(self, input, rois):
|
| 139 |
+
assert input.size(1) == self.output_channels
|
| 140 |
+
x = deform_roi_pool(input, rois, None, self.output_size,
|
| 141 |
+
self.spatial_scale, self.sampling_ratio,
|
| 142 |
+
self.gamma)
|
| 143 |
+
rois_num = rois.size(0)
|
| 144 |
+
offset = self.offset_fc(x.view(rois_num, -1))
|
| 145 |
+
offset = offset.view(rois_num, 2, self.output_size[0],
|
| 146 |
+
self.output_size[1])
|
| 147 |
+
return deform_roi_pool(input, rois, offset, self.output_size,
|
| 148 |
+
self.spatial_scale, self.sampling_ratio,
|
| 149 |
+
self.gamma)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
class ModulatedDeformRoIPoolPack(DeformRoIPool):
|
| 153 |
+
|
| 154 |
+
def __init__(self,
|
| 155 |
+
output_size,
|
| 156 |
+
output_channels,
|
| 157 |
+
deform_fc_channels=1024,
|
| 158 |
+
spatial_scale=1.0,
|
| 159 |
+
sampling_ratio=0,
|
| 160 |
+
gamma=0.1):
|
| 161 |
+
super(ModulatedDeformRoIPoolPack,
|
| 162 |
+
self).__init__(output_size, spatial_scale, sampling_ratio, gamma)
|
| 163 |
+
|
| 164 |
+
self.output_channels = output_channels
|
| 165 |
+
self.deform_fc_channels = deform_fc_channels
|
| 166 |
+
|
| 167 |
+
self.offset_fc = nn.Sequential(
|
| 168 |
+
nn.Linear(
|
| 169 |
+
self.output_size[0] * self.output_size[1] *
|
| 170 |
+
self.output_channels, self.deform_fc_channels),
|
| 171 |
+
nn.ReLU(inplace=True),
|
| 172 |
+
nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
|
| 173 |
+
nn.ReLU(inplace=True),
|
| 174 |
+
nn.Linear(self.deform_fc_channels,
|
| 175 |
+
self.output_size[0] * self.output_size[1] * 2))
|
| 176 |
+
self.offset_fc[-1].weight.data.zero_()
|
| 177 |
+
self.offset_fc[-1].bias.data.zero_()
|
| 178 |
+
|
| 179 |
+
self.mask_fc = nn.Sequential(
|
| 180 |
+
nn.Linear(
|
| 181 |
+
self.output_size[0] * self.output_size[1] *
|
| 182 |
+
self.output_channels, self.deform_fc_channels),
|
| 183 |
+
nn.ReLU(inplace=True),
|
| 184 |
+
nn.Linear(self.deform_fc_channels,
|
| 185 |
+
self.output_size[0] * self.output_size[1] * 1),
|
| 186 |
+
nn.Sigmoid())
|
| 187 |
+
self.mask_fc[2].weight.data.zero_()
|
| 188 |
+
self.mask_fc[2].bias.data.zero_()
|
| 189 |
+
|
| 190 |
+
def forward(self, input, rois):
|
| 191 |
+
assert input.size(1) == self.output_channels
|
| 192 |
+
x = deform_roi_pool(input, rois, None, self.output_size,
|
| 193 |
+
self.spatial_scale, self.sampling_ratio,
|
| 194 |
+
self.gamma)
|
| 195 |
+
rois_num = rois.size(0)
|
| 196 |
+
offset = self.offset_fc(x.view(rois_num, -1))
|
| 197 |
+
offset = offset.view(rois_num, 2, self.output_size[0],
|
| 198 |
+
self.output_size[1])
|
| 199 |
+
mask = self.mask_fc(x.view(rois_num, -1))
|
| 200 |
+
mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1])
|
| 201 |
+
d = deform_roi_pool(input, rois, offset, self.output_size,
|
| 202 |
+
self.spatial_scale, self.sampling_ratio,
|
| 203 |
+
self.gamma)
|
| 204 |
+
return d * mask
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
# This file is for backward compatibility.
|
| 3 |
+
# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks.
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Conv2d_deprecated(Conv2d):
|
| 10 |
+
|
| 11 |
+
def __init__(self, *args, **kwargs):
|
| 12 |
+
super().__init__(*args, **kwargs)
|
| 13 |
+
warnings.warn(
|
| 14 |
+
'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in'
|
| 15 |
+
' the future. Please import them from "mmcv.cnn" instead')
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ConvTranspose2d_deprecated(ConvTranspose2d):
|
| 19 |
+
|
| 20 |
+
def __init__(self, *args, **kwargs):
|
| 21 |
+
super().__init__(*args, **kwargs)
|
| 22 |
+
warnings.warn(
|
| 23 |
+
'Importing ConvTranspose2d wrapper from "mmcv.ops" will be '
|
| 24 |
+
'deprecated in the future. Please import them from "mmcv.cnn" '
|
| 25 |
+
'instead')
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class MaxPool2d_deprecated(MaxPool2d):
|
| 29 |
+
|
| 30 |
+
def __init__(self, *args, **kwargs):
|
| 31 |
+
super().__init__(*args, **kwargs)
|
| 32 |
+
warnings.warn(
|
| 33 |
+
'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in'
|
| 34 |
+
' the future. Please import them from "mmcv.cnn" instead')
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class Linear_deprecated(Linear):
|
| 38 |
+
|
| 39 |
+
def __init__(self, *args, **kwargs):
|
| 40 |
+
super().__init__(*args, **kwargs)
|
| 41 |
+
warnings.warn(
|
| 42 |
+
'Importing Linear wrapper from "mmcv.ops" will be deprecated in'
|
| 43 |
+
' the future. Please import them from "mmcv.cnn" instead')
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/focal_loss.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
from torch.autograd.function import once_differentiable
|
| 6 |
+
|
| 7 |
+
from ..utils import ext_loader
|
| 8 |
+
|
| 9 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 10 |
+
'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
|
| 11 |
+
'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
|
| 12 |
+
])
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class SigmoidFocalLossFunction(Function):
|
| 16 |
+
|
| 17 |
+
@staticmethod
|
| 18 |
+
def symbolic(g, input, target, gamma, alpha, weight, reduction):
|
| 19 |
+
return g.op(
|
| 20 |
+
'mmcv::MMCVSigmoidFocalLoss',
|
| 21 |
+
input,
|
| 22 |
+
target,
|
| 23 |
+
gamma_f=gamma,
|
| 24 |
+
alpha_f=alpha,
|
| 25 |
+
weight_f=weight,
|
| 26 |
+
reduction_s=reduction)
|
| 27 |
+
|
| 28 |
+
@staticmethod
|
| 29 |
+
def forward(ctx,
|
| 30 |
+
input,
|
| 31 |
+
target,
|
| 32 |
+
gamma=2.0,
|
| 33 |
+
alpha=0.25,
|
| 34 |
+
weight=None,
|
| 35 |
+
reduction='mean'):
|
| 36 |
+
|
| 37 |
+
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
|
| 38 |
+
assert input.dim() == 2
|
| 39 |
+
assert target.dim() == 1
|
| 40 |
+
assert input.size(0) == target.size(0)
|
| 41 |
+
if weight is None:
|
| 42 |
+
weight = input.new_empty(0)
|
| 43 |
+
else:
|
| 44 |
+
assert weight.dim() == 1
|
| 45 |
+
assert input.size(1) == weight.size(0)
|
| 46 |
+
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
|
| 47 |
+
assert reduction in ctx.reduction_dict.keys()
|
| 48 |
+
|
| 49 |
+
ctx.gamma = float(gamma)
|
| 50 |
+
ctx.alpha = float(alpha)
|
| 51 |
+
ctx.reduction = ctx.reduction_dict[reduction]
|
| 52 |
+
|
| 53 |
+
output = input.new_zeros(input.size())
|
| 54 |
+
|
| 55 |
+
ext_module.sigmoid_focal_loss_forward(
|
| 56 |
+
input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
|
| 57 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
| 58 |
+
output = output.sum() / input.size(0)
|
| 59 |
+
elif ctx.reduction == ctx.reduction_dict['sum']:
|
| 60 |
+
output = output.sum()
|
| 61 |
+
ctx.save_for_backward(input, target, weight)
|
| 62 |
+
return output
|
| 63 |
+
|
| 64 |
+
@staticmethod
|
| 65 |
+
@once_differentiable
|
| 66 |
+
def backward(ctx, grad_output):
|
| 67 |
+
input, target, weight = ctx.saved_tensors
|
| 68 |
+
|
| 69 |
+
grad_input = input.new_zeros(input.size())
|
| 70 |
+
|
| 71 |
+
ext_module.sigmoid_focal_loss_backward(
|
| 72 |
+
input,
|
| 73 |
+
target,
|
| 74 |
+
weight,
|
| 75 |
+
grad_input,
|
| 76 |
+
gamma=ctx.gamma,
|
| 77 |
+
alpha=ctx.alpha)
|
| 78 |
+
|
| 79 |
+
grad_input *= grad_output
|
| 80 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
| 81 |
+
grad_input /= input.size(0)
|
| 82 |
+
return grad_input, None, None, None, None, None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
sigmoid_focal_loss = SigmoidFocalLossFunction.apply
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class SigmoidFocalLoss(nn.Module):
|
| 89 |
+
|
| 90 |
+
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
|
| 91 |
+
super(SigmoidFocalLoss, self).__init__()
|
| 92 |
+
self.gamma = gamma
|
| 93 |
+
self.alpha = alpha
|
| 94 |
+
self.register_buffer('weight', weight)
|
| 95 |
+
self.reduction = reduction
|
| 96 |
+
|
| 97 |
+
def forward(self, input, target):
|
| 98 |
+
return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
|
| 99 |
+
self.weight, self.reduction)
|
| 100 |
+
|
| 101 |
+
def __repr__(self):
|
| 102 |
+
s = self.__class__.__name__
|
| 103 |
+
s += f'(gamma={self.gamma}, '
|
| 104 |
+
s += f'alpha={self.alpha}, '
|
| 105 |
+
s += f'reduction={self.reduction})'
|
| 106 |
+
return s
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class SoftmaxFocalLossFunction(Function):
|
| 110 |
+
|
| 111 |
+
@staticmethod
|
| 112 |
+
def symbolic(g, input, target, gamma, alpha, weight, reduction):
|
| 113 |
+
return g.op(
|
| 114 |
+
'mmcv::MMCVSoftmaxFocalLoss',
|
| 115 |
+
input,
|
| 116 |
+
target,
|
| 117 |
+
gamma_f=gamma,
|
| 118 |
+
alpha_f=alpha,
|
| 119 |
+
weight_f=weight,
|
| 120 |
+
reduction_s=reduction)
|
| 121 |
+
|
| 122 |
+
@staticmethod
|
| 123 |
+
def forward(ctx,
|
| 124 |
+
input,
|
| 125 |
+
target,
|
| 126 |
+
gamma=2.0,
|
| 127 |
+
alpha=0.25,
|
| 128 |
+
weight=None,
|
| 129 |
+
reduction='mean'):
|
| 130 |
+
|
| 131 |
+
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
|
| 132 |
+
assert input.dim() == 2
|
| 133 |
+
assert target.dim() == 1
|
| 134 |
+
assert input.size(0) == target.size(0)
|
| 135 |
+
if weight is None:
|
| 136 |
+
weight = input.new_empty(0)
|
| 137 |
+
else:
|
| 138 |
+
assert weight.dim() == 1
|
| 139 |
+
assert input.size(1) == weight.size(0)
|
| 140 |
+
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
|
| 141 |
+
assert reduction in ctx.reduction_dict.keys()
|
| 142 |
+
|
| 143 |
+
ctx.gamma = float(gamma)
|
| 144 |
+
ctx.alpha = float(alpha)
|
| 145 |
+
ctx.reduction = ctx.reduction_dict[reduction]
|
| 146 |
+
|
| 147 |
+
channel_stats, _ = torch.max(input, dim=1)
|
| 148 |
+
input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
|
| 149 |
+
input_softmax.exp_()
|
| 150 |
+
|
| 151 |
+
channel_stats = input_softmax.sum(dim=1)
|
| 152 |
+
input_softmax /= channel_stats.unsqueeze(1).expand_as(input)
|
| 153 |
+
|
| 154 |
+
output = input.new_zeros(input.size(0))
|
| 155 |
+
ext_module.softmax_focal_loss_forward(
|
| 156 |
+
input_softmax,
|
| 157 |
+
target,
|
| 158 |
+
weight,
|
| 159 |
+
output,
|
| 160 |
+
gamma=ctx.gamma,
|
| 161 |
+
alpha=ctx.alpha)
|
| 162 |
+
|
| 163 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
| 164 |
+
output = output.sum() / input.size(0)
|
| 165 |
+
elif ctx.reduction == ctx.reduction_dict['sum']:
|
| 166 |
+
output = output.sum()
|
| 167 |
+
ctx.save_for_backward(input_softmax, target, weight)
|
| 168 |
+
return output
|
| 169 |
+
|
| 170 |
+
@staticmethod
|
| 171 |
+
def backward(ctx, grad_output):
|
| 172 |
+
input_softmax, target, weight = ctx.saved_tensors
|
| 173 |
+
buff = input_softmax.new_zeros(input_softmax.size(0))
|
| 174 |
+
grad_input = input_softmax.new_zeros(input_softmax.size())
|
| 175 |
+
|
| 176 |
+
ext_module.softmax_focal_loss_backward(
|
| 177 |
+
input_softmax,
|
| 178 |
+
target,
|
| 179 |
+
weight,
|
| 180 |
+
buff,
|
| 181 |
+
grad_input,
|
| 182 |
+
gamma=ctx.gamma,
|
| 183 |
+
alpha=ctx.alpha)
|
| 184 |
+
|
| 185 |
+
grad_input *= grad_output
|
| 186 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
| 187 |
+
grad_input /= input_softmax.size(0)
|
| 188 |
+
return grad_input, None, None, None, None, None
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
softmax_focal_loss = SoftmaxFocalLossFunction.apply
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
class SoftmaxFocalLoss(nn.Module):
|
| 195 |
+
|
| 196 |
+
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
|
| 197 |
+
super(SoftmaxFocalLoss, self).__init__()
|
| 198 |
+
self.gamma = gamma
|
| 199 |
+
self.alpha = alpha
|
| 200 |
+
self.register_buffer('weight', weight)
|
| 201 |
+
self.reduction = reduction
|
| 202 |
+
|
| 203 |
+
def forward(self, input, target):
|
| 204 |
+
return softmax_focal_loss(input, target, self.gamma, self.alpha,
|
| 205 |
+
self.weight, self.reduction)
|
| 206 |
+
|
| 207 |
+
def __repr__(self):
|
| 208 |
+
s = self.__class__.__name__
|
| 209 |
+
s += f'(gamma={self.gamma}, '
|
| 210 |
+
s += f'alpha={self.alpha}, '
|
| 211 |
+
s += f'reduction={self.reduction})'
|
| 212 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/furthest_point_sample.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.autograd import Function
|
| 3 |
+
|
| 4 |
+
from ..utils import ext_loader
|
| 5 |
+
|
| 6 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 7 |
+
'furthest_point_sampling_forward',
|
| 8 |
+
'furthest_point_sampling_with_dist_forward'
|
| 9 |
+
])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class FurthestPointSampling(Function):
|
| 13 |
+
"""Uses iterative furthest point sampling to select a set of features whose
|
| 14 |
+
corresponding points have the furthest distance."""
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
def forward(ctx, points_xyz: torch.Tensor,
|
| 18 |
+
num_points: int) -> torch.Tensor:
|
| 19 |
+
"""
|
| 20 |
+
Args:
|
| 21 |
+
points_xyz (Tensor): (B, N, 3) where N > num_points.
|
| 22 |
+
num_points (int): Number of points in the sampled set.
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Tensor: (B, num_points) indices of the sampled points.
|
| 26 |
+
"""
|
| 27 |
+
assert points_xyz.is_contiguous()
|
| 28 |
+
|
| 29 |
+
B, N = points_xyz.size()[:2]
|
| 30 |
+
output = torch.cuda.IntTensor(B, num_points)
|
| 31 |
+
temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
|
| 32 |
+
|
| 33 |
+
ext_module.furthest_point_sampling_forward(
|
| 34 |
+
points_xyz,
|
| 35 |
+
temp,
|
| 36 |
+
output,
|
| 37 |
+
b=B,
|
| 38 |
+
n=N,
|
| 39 |
+
m=num_points,
|
| 40 |
+
)
|
| 41 |
+
if torch.__version__ != 'parrots':
|
| 42 |
+
ctx.mark_non_differentiable(output)
|
| 43 |
+
return output
|
| 44 |
+
|
| 45 |
+
@staticmethod
|
| 46 |
+
def backward(xyz, a=None):
|
| 47 |
+
return None, None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class FurthestPointSamplingWithDist(Function):
|
| 51 |
+
"""Uses iterative furthest point sampling to select a set of features whose
|
| 52 |
+
corresponding points have the furthest distance."""
|
| 53 |
+
|
| 54 |
+
@staticmethod
|
| 55 |
+
def forward(ctx, points_dist: torch.Tensor,
|
| 56 |
+
num_points: int) -> torch.Tensor:
|
| 57 |
+
"""
|
| 58 |
+
Args:
|
| 59 |
+
points_dist (Tensor): (B, N, N) Distance between each point pair.
|
| 60 |
+
num_points (int): Number of points in the sampled set.
|
| 61 |
+
|
| 62 |
+
Returns:
|
| 63 |
+
Tensor: (B, num_points) indices of the sampled points.
|
| 64 |
+
"""
|
| 65 |
+
assert points_dist.is_contiguous()
|
| 66 |
+
|
| 67 |
+
B, N, _ = points_dist.size()
|
| 68 |
+
output = points_dist.new_zeros([B, num_points], dtype=torch.int32)
|
| 69 |
+
temp = points_dist.new_zeros([B, N]).fill_(1e10)
|
| 70 |
+
|
| 71 |
+
ext_module.furthest_point_sampling_with_dist_forward(
|
| 72 |
+
points_dist, temp, output, b=B, n=N, m=num_points)
|
| 73 |
+
if torch.__version__ != 'parrots':
|
| 74 |
+
ctx.mark_non_differentiable(output)
|
| 75 |
+
return output
|
| 76 |
+
|
| 77 |
+
@staticmethod
|
| 78 |
+
def backward(xyz, a=None):
|
| 79 |
+
return None, None
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
furthest_point_sample = FurthestPointSampling.apply
|
| 83 |
+
furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
|
| 4 |
+
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
|
| 5 |
+
# Augmentation (ADA)
|
| 6 |
+
# =======================================================================
|
| 7 |
+
|
| 8 |
+
# 1. Definitions
|
| 9 |
+
|
| 10 |
+
# "Licensor" means any person or entity that distributes its Work.
|
| 11 |
+
|
| 12 |
+
# "Software" means the original work of authorship made available under
|
| 13 |
+
# this License.
|
| 14 |
+
|
| 15 |
+
# "Work" means the Software and any additions to or derivative works of
|
| 16 |
+
# the Software that are made available under this License.
|
| 17 |
+
|
| 18 |
+
# The terms "reproduce," "reproduction," "derivative works," and
|
| 19 |
+
# "distribution" have the meaning as provided under U.S. copyright law;
|
| 20 |
+
# provided, however, that for the purposes of this License, derivative
|
| 21 |
+
# works shall not include works that remain separable from, or merely
|
| 22 |
+
# link (or bind by name) to the interfaces of, the Work.
|
| 23 |
+
|
| 24 |
+
# Works, including the Software, are "made available" under this License
|
| 25 |
+
# by including in or with the Work either (a) a copyright notice
|
| 26 |
+
# referencing the applicability of this License to the Work, or (b) a
|
| 27 |
+
# copy of this License.
|
| 28 |
+
|
| 29 |
+
# 2. License Grants
|
| 30 |
+
|
| 31 |
+
# 2.1 Copyright Grant. Subject to the terms and conditions of this
|
| 32 |
+
# License, each Licensor grants to you a perpetual, worldwide,
|
| 33 |
+
# non-exclusive, royalty-free, copyright license to reproduce,
|
| 34 |
+
# prepare derivative works of, publicly display, publicly perform,
|
| 35 |
+
# sublicense and distribute its Work and any resulting derivative
|
| 36 |
+
# works in any form.
|
| 37 |
+
|
| 38 |
+
# 3. Limitations
|
| 39 |
+
|
| 40 |
+
# 3.1 Redistribution. You may reproduce or distribute the Work only
|
| 41 |
+
# if (a) you do so under this License, (b) you include a complete
|
| 42 |
+
# copy of this License with your distribution, and (c) you retain
|
| 43 |
+
# without modification any copyright, patent, trademark, or
|
| 44 |
+
# attribution notices that are present in the Work.
|
| 45 |
+
|
| 46 |
+
# 3.2 Derivative Works. You may specify that additional or different
|
| 47 |
+
# terms apply to the use, reproduction, and distribution of your
|
| 48 |
+
# derivative works of the Work ("Your Terms") only if (a) Your Terms
|
| 49 |
+
# provide that the use limitation in Section 3.3 applies to your
|
| 50 |
+
# derivative works, and (b) you identify the specific derivative
|
| 51 |
+
# works that are subject to Your Terms. Notwithstanding Your Terms,
|
| 52 |
+
# this License (including the redistribution requirements in Section
|
| 53 |
+
# 3.1) will continue to apply to the Work itself.
|
| 54 |
+
|
| 55 |
+
# 3.3 Use Limitation. The Work and any derivative works thereof only
|
| 56 |
+
# may be used or intended for use non-commercially. Notwithstanding
|
| 57 |
+
# the foregoing, NVIDIA and its affiliates may use the Work and any
|
| 58 |
+
# derivative works commercially. As used herein, "non-commercially"
|
| 59 |
+
# means for research or evaluation purposes only.
|
| 60 |
+
|
| 61 |
+
# 3.4 Patent Claims. If you bring or threaten to bring a patent claim
|
| 62 |
+
# against any Licensor (including any claim, cross-claim or
|
| 63 |
+
# counterclaim in a lawsuit) to enforce any patents that you allege
|
| 64 |
+
# are infringed by any Work, then your rights under this License from
|
| 65 |
+
# such Licensor (including the grant in Section 2.1) will terminate
|
| 66 |
+
# immediately.
|
| 67 |
+
|
| 68 |
+
# 3.5 Trademarks. This License does not grant any rights to use any
|
| 69 |
+
# Licensor’s or its affiliates’ names, logos, or trademarks, except
|
| 70 |
+
# as necessary to reproduce the notices described in this License.
|
| 71 |
+
|
| 72 |
+
# 3.6 Termination. If you violate any term of this License, then your
|
| 73 |
+
# rights under this License (including the grant in Section 2.1) will
|
| 74 |
+
# terminate immediately.
|
| 75 |
+
|
| 76 |
+
# 4. Disclaimer of Warranty.
|
| 77 |
+
|
| 78 |
+
# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 79 |
+
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
|
| 80 |
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
|
| 81 |
+
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
|
| 82 |
+
# THIS LICENSE.
|
| 83 |
+
|
| 84 |
+
# 5. Limitation of Liability.
|
| 85 |
+
|
| 86 |
+
# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
|
| 87 |
+
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
|
| 88 |
+
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
|
| 89 |
+
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
|
| 90 |
+
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
|
| 91 |
+
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
|
| 92 |
+
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
|
| 93 |
+
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
|
| 94 |
+
# THE POSSIBILITY OF SUCH DAMAGES.
|
| 95 |
+
|
| 96 |
+
# =======================================================================
|
| 97 |
+
|
| 98 |
+
import torch
|
| 99 |
+
import torch.nn.functional as F
|
| 100 |
+
from torch import nn
|
| 101 |
+
from torch.autograd import Function
|
| 102 |
+
|
| 103 |
+
from ..utils import ext_loader
|
| 104 |
+
|
| 105 |
+
ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu'])
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class FusedBiasLeakyReLUFunctionBackward(Function):
|
| 109 |
+
"""Calculate second order deviation.
|
| 110 |
+
|
| 111 |
+
This function is to compute the second order deviation for the fused leaky
|
| 112 |
+
relu operation.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
@staticmethod
|
| 116 |
+
def forward(ctx, grad_output, out, negative_slope, scale):
|
| 117 |
+
ctx.save_for_backward(out)
|
| 118 |
+
ctx.negative_slope = negative_slope
|
| 119 |
+
ctx.scale = scale
|
| 120 |
+
|
| 121 |
+
empty = grad_output.new_empty(0)
|
| 122 |
+
|
| 123 |
+
grad_input = ext_module.fused_bias_leakyrelu(
|
| 124 |
+
grad_output,
|
| 125 |
+
empty,
|
| 126 |
+
out,
|
| 127 |
+
act=3,
|
| 128 |
+
grad=1,
|
| 129 |
+
alpha=negative_slope,
|
| 130 |
+
scale=scale)
|
| 131 |
+
|
| 132 |
+
dim = [0]
|
| 133 |
+
|
| 134 |
+
if grad_input.ndim > 2:
|
| 135 |
+
dim += list(range(2, grad_input.ndim))
|
| 136 |
+
|
| 137 |
+
grad_bias = grad_input.sum(dim).detach()
|
| 138 |
+
|
| 139 |
+
return grad_input, grad_bias
|
| 140 |
+
|
| 141 |
+
@staticmethod
|
| 142 |
+
def backward(ctx, gradgrad_input, gradgrad_bias):
|
| 143 |
+
out, = ctx.saved_tensors
|
| 144 |
+
|
| 145 |
+
# The second order deviation, in fact, contains two parts, while the
|
| 146 |
+
# the first part is zero. Thus, we direct consider the second part
|
| 147 |
+
# which is similar with the first order deviation in implementation.
|
| 148 |
+
gradgrad_out = ext_module.fused_bias_leakyrelu(
|
| 149 |
+
gradgrad_input,
|
| 150 |
+
gradgrad_bias.to(out.dtype),
|
| 151 |
+
out,
|
| 152 |
+
act=3,
|
| 153 |
+
grad=1,
|
| 154 |
+
alpha=ctx.negative_slope,
|
| 155 |
+
scale=ctx.scale)
|
| 156 |
+
|
| 157 |
+
return gradgrad_out, None, None, None
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class FusedBiasLeakyReLUFunction(Function):
|
| 161 |
+
|
| 162 |
+
@staticmethod
|
| 163 |
+
def forward(ctx, input, bias, negative_slope, scale):
|
| 164 |
+
empty = input.new_empty(0)
|
| 165 |
+
|
| 166 |
+
out = ext_module.fused_bias_leakyrelu(
|
| 167 |
+
input,
|
| 168 |
+
bias,
|
| 169 |
+
empty,
|
| 170 |
+
act=3,
|
| 171 |
+
grad=0,
|
| 172 |
+
alpha=negative_slope,
|
| 173 |
+
scale=scale)
|
| 174 |
+
ctx.save_for_backward(out)
|
| 175 |
+
ctx.negative_slope = negative_slope
|
| 176 |
+
ctx.scale = scale
|
| 177 |
+
|
| 178 |
+
return out
|
| 179 |
+
|
| 180 |
+
@staticmethod
|
| 181 |
+
def backward(ctx, grad_output):
|
| 182 |
+
out, = ctx.saved_tensors
|
| 183 |
+
|
| 184 |
+
grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
|
| 185 |
+
grad_output, out, ctx.negative_slope, ctx.scale)
|
| 186 |
+
|
| 187 |
+
return grad_input, grad_bias, None, None
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
class FusedBiasLeakyReLU(nn.Module):
|
| 191 |
+
"""Fused bias leaky ReLU.
|
| 192 |
+
|
| 193 |
+
This function is introduced in the StyleGAN2:
|
| 194 |
+
http://arxiv.org/abs/1912.04958
|
| 195 |
+
|
| 196 |
+
The bias term comes from the convolution operation. In addition, to keep
|
| 197 |
+
the variance of the feature map or gradients unchanged, they also adopt a
|
| 198 |
+
scale similarly with Kaiming initialization. However, since the
|
| 199 |
+
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
|
| 200 |
+
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
|
| 201 |
+
your own scale.
|
| 202 |
+
|
| 203 |
+
TODO: Implement the CPU version.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
channel (int): The channel number of the feature map.
|
| 207 |
+
negative_slope (float, optional): Same as nn.LeakyRelu.
|
| 208 |
+
Defaults to 0.2.
|
| 209 |
+
scale (float, optional): A scalar to adjust the variance of the feature
|
| 210 |
+
map. Defaults to 2**0.5.
|
| 211 |
+
"""
|
| 212 |
+
|
| 213 |
+
def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5):
|
| 214 |
+
super(FusedBiasLeakyReLU, self).__init__()
|
| 215 |
+
|
| 216 |
+
self.bias = nn.Parameter(torch.zeros(num_channels))
|
| 217 |
+
self.negative_slope = negative_slope
|
| 218 |
+
self.scale = scale
|
| 219 |
+
|
| 220 |
+
def forward(self, input):
|
| 221 |
+
return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
|
| 222 |
+
self.scale)
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
|
| 226 |
+
"""Fused bias leaky ReLU function.
|
| 227 |
+
|
| 228 |
+
This function is introduced in the StyleGAN2:
|
| 229 |
+
http://arxiv.org/abs/1912.04958
|
| 230 |
+
|
| 231 |
+
The bias term comes from the convolution operation. In addition, to keep
|
| 232 |
+
the variance of the feature map or gradients unchanged, they also adopt a
|
| 233 |
+
scale similarly with Kaiming initialization. However, since the
|
| 234 |
+
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
|
| 235 |
+
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
|
| 236 |
+
your own scale.
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
input (torch.Tensor): Input feature map.
|
| 240 |
+
bias (nn.Parameter): The bias from convolution operation.
|
| 241 |
+
negative_slope (float, optional): Same as nn.LeakyRelu.
|
| 242 |
+
Defaults to 0.2.
|
| 243 |
+
scale (float, optional): A scalar to adjust the variance of the feature
|
| 244 |
+
map. Defaults to 2**0.5.
|
| 245 |
+
|
| 246 |
+
Returns:
|
| 247 |
+
torch.Tensor: Feature map after non-linear activation.
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
if not input.is_cuda:
|
| 251 |
+
return bias_leakyrelu_ref(input, bias, negative_slope, scale)
|
| 252 |
+
|
| 253 |
+
return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype),
|
| 254 |
+
negative_slope, scale)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5):
|
| 258 |
+
|
| 259 |
+
if bias is not None:
|
| 260 |
+
assert bias.ndim == 1
|
| 261 |
+
assert bias.shape[0] == x.shape[1]
|
| 262 |
+
x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)])
|
| 263 |
+
|
| 264 |
+
x = F.leaky_relu(x, negative_slope)
|
| 265 |
+
if scale != 1:
|
| 266 |
+
x = x * scale
|
| 267 |
+
|
| 268 |
+
return x
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/gather_points.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.autograd import Function
|
| 3 |
+
|
| 4 |
+
from ..utils import ext_loader
|
| 5 |
+
|
| 6 |
+
ext_module = ext_loader.load_ext(
|
| 7 |
+
'_ext', ['gather_points_forward', 'gather_points_backward'])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class GatherPoints(Function):
|
| 11 |
+
"""Gather points with given index."""
|
| 12 |
+
|
| 13 |
+
@staticmethod
|
| 14 |
+
def forward(ctx, features: torch.Tensor,
|
| 15 |
+
indices: torch.Tensor) -> torch.Tensor:
|
| 16 |
+
"""
|
| 17 |
+
Args:
|
| 18 |
+
features (Tensor): (B, C, N) features to gather.
|
| 19 |
+
indices (Tensor): (B, M) where M is the number of points.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Tensor: (B, C, M) where M is the number of points.
|
| 23 |
+
"""
|
| 24 |
+
assert features.is_contiguous()
|
| 25 |
+
assert indices.is_contiguous()
|
| 26 |
+
|
| 27 |
+
B, npoint = indices.size()
|
| 28 |
+
_, C, N = features.size()
|
| 29 |
+
output = torch.cuda.FloatTensor(B, C, npoint)
|
| 30 |
+
|
| 31 |
+
ext_module.gather_points_forward(
|
| 32 |
+
features, indices, output, b=B, c=C, n=N, npoints=npoint)
|
| 33 |
+
|
| 34 |
+
ctx.for_backwards = (indices, C, N)
|
| 35 |
+
if torch.__version__ != 'parrots':
|
| 36 |
+
ctx.mark_non_differentiable(indices)
|
| 37 |
+
return output
|
| 38 |
+
|
| 39 |
+
@staticmethod
|
| 40 |
+
def backward(ctx, grad_out):
|
| 41 |
+
idx, C, N = ctx.for_backwards
|
| 42 |
+
B, npoint = idx.size()
|
| 43 |
+
|
| 44 |
+
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
|
| 45 |
+
grad_out_data = grad_out.data.contiguous()
|
| 46 |
+
ext_module.gather_points_backward(
|
| 47 |
+
grad_out_data,
|
| 48 |
+
idx,
|
| 49 |
+
grad_features.data,
|
| 50 |
+
b=B,
|
| 51 |
+
c=C,
|
| 52 |
+
n=N,
|
| 53 |
+
npoints=npoint)
|
| 54 |
+
return grad_features, None
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
gather_points = GatherPoints.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/group_points.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from typing import Tuple
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
from torch import nn as nn
|
| 6 |
+
from torch.autograd import Function
|
| 7 |
+
|
| 8 |
+
from ..utils import ext_loader
|
| 9 |
+
from .ball_query import ball_query
|
| 10 |
+
from .knn import knn
|
| 11 |
+
|
| 12 |
+
ext_module = ext_loader.load_ext(
|
| 13 |
+
'_ext', ['group_points_forward', 'group_points_backward'])
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class QueryAndGroup(nn.Module):
|
| 17 |
+
"""Groups points with a ball query of radius.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
max_radius (float): The maximum radius of the balls.
|
| 21 |
+
If None is given, we will use kNN sampling instead of ball query.
|
| 22 |
+
sample_num (int): Maximum number of features to gather in the ball.
|
| 23 |
+
min_radius (float, optional): The minimum radius of the balls.
|
| 24 |
+
Default: 0.
|
| 25 |
+
use_xyz (bool, optional): Whether to use xyz.
|
| 26 |
+
Default: True.
|
| 27 |
+
return_grouped_xyz (bool, optional): Whether to return grouped xyz.
|
| 28 |
+
Default: False.
|
| 29 |
+
normalize_xyz (bool, optional): Whether to normalize xyz.
|
| 30 |
+
Default: False.
|
| 31 |
+
uniform_sample (bool, optional): Whether to sample uniformly.
|
| 32 |
+
Default: False
|
| 33 |
+
return_unique_cnt (bool, optional): Whether to return the count of
|
| 34 |
+
unique samples. Default: False.
|
| 35 |
+
return_grouped_idx (bool, optional): Whether to return grouped idx.
|
| 36 |
+
Default: False.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def __init__(self,
|
| 40 |
+
max_radius,
|
| 41 |
+
sample_num,
|
| 42 |
+
min_radius=0,
|
| 43 |
+
use_xyz=True,
|
| 44 |
+
return_grouped_xyz=False,
|
| 45 |
+
normalize_xyz=False,
|
| 46 |
+
uniform_sample=False,
|
| 47 |
+
return_unique_cnt=False,
|
| 48 |
+
return_grouped_idx=False):
|
| 49 |
+
super().__init__()
|
| 50 |
+
self.max_radius = max_radius
|
| 51 |
+
self.min_radius = min_radius
|
| 52 |
+
self.sample_num = sample_num
|
| 53 |
+
self.use_xyz = use_xyz
|
| 54 |
+
self.return_grouped_xyz = return_grouped_xyz
|
| 55 |
+
self.normalize_xyz = normalize_xyz
|
| 56 |
+
self.uniform_sample = uniform_sample
|
| 57 |
+
self.return_unique_cnt = return_unique_cnt
|
| 58 |
+
self.return_grouped_idx = return_grouped_idx
|
| 59 |
+
if self.return_unique_cnt:
|
| 60 |
+
assert self.uniform_sample, \
|
| 61 |
+
'uniform_sample should be True when ' \
|
| 62 |
+
'returning the count of unique samples'
|
| 63 |
+
if self.max_radius is None:
|
| 64 |
+
assert not self.normalize_xyz, \
|
| 65 |
+
'can not normalize grouped xyz when max_radius is None'
|
| 66 |
+
|
| 67 |
+
def forward(self, points_xyz, center_xyz, features=None):
|
| 68 |
+
"""
|
| 69 |
+
Args:
|
| 70 |
+
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
| 71 |
+
center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods.
|
| 72 |
+
features (Tensor): (B, C, N) Descriptors of the features.
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
Tensor: (B, 3 + C, npoint, sample_num) Grouped feature.
|
| 76 |
+
"""
|
| 77 |
+
# if self.max_radius is None, we will perform kNN instead of ball query
|
| 78 |
+
# idx is of shape [B, npoint, sample_num]
|
| 79 |
+
if self.max_radius is None:
|
| 80 |
+
idx = knn(self.sample_num, points_xyz, center_xyz, False)
|
| 81 |
+
idx = idx.transpose(1, 2).contiguous()
|
| 82 |
+
else:
|
| 83 |
+
idx = ball_query(self.min_radius, self.max_radius, self.sample_num,
|
| 84 |
+
points_xyz, center_xyz)
|
| 85 |
+
|
| 86 |
+
if self.uniform_sample:
|
| 87 |
+
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
|
| 88 |
+
for i_batch in range(idx.shape[0]):
|
| 89 |
+
for i_region in range(idx.shape[1]):
|
| 90 |
+
unique_ind = torch.unique(idx[i_batch, i_region, :])
|
| 91 |
+
num_unique = unique_ind.shape[0]
|
| 92 |
+
unique_cnt[i_batch, i_region] = num_unique
|
| 93 |
+
sample_ind = torch.randint(
|
| 94 |
+
0,
|
| 95 |
+
num_unique, (self.sample_num - num_unique, ),
|
| 96 |
+
dtype=torch.long)
|
| 97 |
+
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
|
| 98 |
+
idx[i_batch, i_region, :] = all_ind
|
| 99 |
+
|
| 100 |
+
xyz_trans = points_xyz.transpose(1, 2).contiguous()
|
| 101 |
+
# (B, 3, npoint, sample_num)
|
| 102 |
+
grouped_xyz = grouping_operation(xyz_trans, idx)
|
| 103 |
+
grouped_xyz_diff = grouped_xyz - \
|
| 104 |
+
center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets
|
| 105 |
+
if self.normalize_xyz:
|
| 106 |
+
grouped_xyz_diff /= self.max_radius
|
| 107 |
+
|
| 108 |
+
if features is not None:
|
| 109 |
+
grouped_features = grouping_operation(features, idx)
|
| 110 |
+
if self.use_xyz:
|
| 111 |
+
# (B, C + 3, npoint, sample_num)
|
| 112 |
+
new_features = torch.cat([grouped_xyz_diff, grouped_features],
|
| 113 |
+
dim=1)
|
| 114 |
+
else:
|
| 115 |
+
new_features = grouped_features
|
| 116 |
+
else:
|
| 117 |
+
assert (self.use_xyz
|
| 118 |
+
), 'Cannot have not features and not use xyz as a feature!'
|
| 119 |
+
new_features = grouped_xyz_diff
|
| 120 |
+
|
| 121 |
+
ret = [new_features]
|
| 122 |
+
if self.return_grouped_xyz:
|
| 123 |
+
ret.append(grouped_xyz)
|
| 124 |
+
if self.return_unique_cnt:
|
| 125 |
+
ret.append(unique_cnt)
|
| 126 |
+
if self.return_grouped_idx:
|
| 127 |
+
ret.append(idx)
|
| 128 |
+
if len(ret) == 1:
|
| 129 |
+
return ret[0]
|
| 130 |
+
else:
|
| 131 |
+
return tuple(ret)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
class GroupAll(nn.Module):
|
| 135 |
+
"""Group xyz with feature.
|
| 136 |
+
|
| 137 |
+
Args:
|
| 138 |
+
use_xyz (bool): Whether to use xyz.
|
| 139 |
+
"""
|
| 140 |
+
|
| 141 |
+
def __init__(self, use_xyz: bool = True):
|
| 142 |
+
super().__init__()
|
| 143 |
+
self.use_xyz = use_xyz
|
| 144 |
+
|
| 145 |
+
def forward(self,
|
| 146 |
+
xyz: torch.Tensor,
|
| 147 |
+
new_xyz: torch.Tensor,
|
| 148 |
+
features: torch.Tensor = None):
|
| 149 |
+
"""
|
| 150 |
+
Args:
|
| 151 |
+
xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
| 152 |
+
new_xyz (Tensor): new xyz coordinates of the features.
|
| 153 |
+
features (Tensor): (B, C, N) features to group.
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
Tensor: (B, C + 3, 1, N) Grouped feature.
|
| 157 |
+
"""
|
| 158 |
+
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
|
| 159 |
+
if features is not None:
|
| 160 |
+
grouped_features = features.unsqueeze(2)
|
| 161 |
+
if self.use_xyz:
|
| 162 |
+
# (B, 3 + C, 1, N)
|
| 163 |
+
new_features = torch.cat([grouped_xyz, grouped_features],
|
| 164 |
+
dim=1)
|
| 165 |
+
else:
|
| 166 |
+
new_features = grouped_features
|
| 167 |
+
else:
|
| 168 |
+
new_features = grouped_xyz
|
| 169 |
+
|
| 170 |
+
return new_features
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
class GroupingOperation(Function):
|
| 174 |
+
"""Group feature with given index."""
|
| 175 |
+
|
| 176 |
+
@staticmethod
|
| 177 |
+
def forward(ctx, features: torch.Tensor,
|
| 178 |
+
indices: torch.Tensor) -> torch.Tensor:
|
| 179 |
+
"""
|
| 180 |
+
Args:
|
| 181 |
+
features (Tensor): (B, C, N) tensor of features to group.
|
| 182 |
+
indices (Tensor): (B, npoint, nsample) the indices of
|
| 183 |
+
features to group with.
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
Tensor: (B, C, npoint, nsample) Grouped features.
|
| 187 |
+
"""
|
| 188 |
+
features = features.contiguous()
|
| 189 |
+
indices = indices.contiguous()
|
| 190 |
+
|
| 191 |
+
B, nfeatures, nsample = indices.size()
|
| 192 |
+
_, C, N = features.size()
|
| 193 |
+
output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
|
| 194 |
+
|
| 195 |
+
ext_module.group_points_forward(B, C, N, nfeatures, nsample, features,
|
| 196 |
+
indices, output)
|
| 197 |
+
|
| 198 |
+
ctx.for_backwards = (indices, N)
|
| 199 |
+
return output
|
| 200 |
+
|
| 201 |
+
@staticmethod
|
| 202 |
+
def backward(ctx,
|
| 203 |
+
grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
| 204 |
+
"""
|
| 205 |
+
Args:
|
| 206 |
+
grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients
|
| 207 |
+
of the output from forward.
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Tensor: (B, C, N) gradient of the features.
|
| 211 |
+
"""
|
| 212 |
+
idx, N = ctx.for_backwards
|
| 213 |
+
|
| 214 |
+
B, C, npoint, nsample = grad_out.size()
|
| 215 |
+
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
|
| 216 |
+
|
| 217 |
+
grad_out_data = grad_out.data.contiguous()
|
| 218 |
+
ext_module.group_points_backward(B, C, N, npoint, nsample,
|
| 219 |
+
grad_out_data, idx,
|
| 220 |
+
grad_features.data)
|
| 221 |
+
return grad_features, None
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
grouping_operation = GroupingOperation.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/info.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import glob
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
if torch.__version__ == 'parrots':
|
| 8 |
+
import parrots
|
| 9 |
+
|
| 10 |
+
def get_compiler_version():
|
| 11 |
+
return 'GCC ' + parrots.version.compiler
|
| 12 |
+
|
| 13 |
+
def get_compiling_cuda_version():
|
| 14 |
+
return parrots.version.cuda
|
| 15 |
+
else:
|
| 16 |
+
from ..utils import ext_loader
|
| 17 |
+
ext_module = ext_loader.load_ext(
|
| 18 |
+
'_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
|
| 19 |
+
|
| 20 |
+
def get_compiler_version():
|
| 21 |
+
return ext_module.get_compiler_version()
|
| 22 |
+
|
| 23 |
+
def get_compiling_cuda_version():
|
| 24 |
+
return ext_module.get_compiling_cuda_version()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_onnxruntime_op_path():
|
| 28 |
+
wildcard = os.path.join(
|
| 29 |
+
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
|
| 30 |
+
'_ext_ort.*.so')
|
| 31 |
+
|
| 32 |
+
paths = glob.glob(wildcard)
|
| 33 |
+
if len(paths) > 0:
|
| 34 |
+
return paths[0]
|
| 35 |
+
else:
|
| 36 |
+
return ''
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/iou3d.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from ..utils import ext_loader
|
| 5 |
+
|
| 6 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 7 |
+
'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward',
|
| 8 |
+
'iou3d_nms_normal_forward'
|
| 9 |
+
])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def boxes_iou_bev(boxes_a, boxes_b):
|
| 13 |
+
"""Calculate boxes IoU in the Bird's Eye View.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
|
| 17 |
+
boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
ans_iou (torch.Tensor): IoU result with shape (M, N).
|
| 21 |
+
"""
|
| 22 |
+
ans_iou = boxes_a.new_zeros(
|
| 23 |
+
torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
|
| 24 |
+
|
| 25 |
+
ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(),
|
| 26 |
+
boxes_b.contiguous(), ans_iou)
|
| 27 |
+
|
| 28 |
+
return ans_iou
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
|
| 32 |
+
"""NMS function GPU implementation (for BEV boxes). The overlap of two
|
| 33 |
+
boxes for IoU calculation is defined as the exact overlapping area of the
|
| 34 |
+
two boxes. In this function, one can also set ``pre_max_size`` and
|
| 35 |
+
``post_max_size``.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
boxes (torch.Tensor): Input boxes with the shape of [N, 5]
|
| 39 |
+
([x1, y1, x2, y2, ry]).
|
| 40 |
+
scores (torch.Tensor): Scores of boxes with the shape of [N].
|
| 41 |
+
thresh (float): Overlap threshold of NMS.
|
| 42 |
+
pre_max_size (int, optional): Max size of boxes before NMS.
|
| 43 |
+
Default: None.
|
| 44 |
+
post_max_size (int, optional): Max size of boxes after NMS.
|
| 45 |
+
Default: None.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
torch.Tensor: Indexes after NMS.
|
| 49 |
+
"""
|
| 50 |
+
assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'
|
| 51 |
+
order = scores.sort(0, descending=True)[1]
|
| 52 |
+
|
| 53 |
+
if pre_max_size is not None:
|
| 54 |
+
order = order[:pre_max_size]
|
| 55 |
+
boxes = boxes[order].contiguous()
|
| 56 |
+
|
| 57 |
+
keep = torch.zeros(boxes.size(0), dtype=torch.long)
|
| 58 |
+
num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh)
|
| 59 |
+
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
|
| 60 |
+
if post_max_size is not None:
|
| 61 |
+
keep = keep[:post_max_size]
|
| 62 |
+
return keep
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def nms_normal_bev(boxes, scores, thresh):
|
| 66 |
+
"""Normal NMS function GPU implementation (for BEV boxes). The overlap of
|
| 67 |
+
two boxes for IoU calculation is defined as the exact overlapping area of
|
| 68 |
+
the two boxes WITH their yaw angle set to 0.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
boxes (torch.Tensor): Input boxes with shape (N, 5).
|
| 72 |
+
scores (torch.Tensor): Scores of predicted boxes with shape (N).
|
| 73 |
+
thresh (float): Overlap threshold of NMS.
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
torch.Tensor: Remaining indices with scores in descending order.
|
| 77 |
+
"""
|
| 78 |
+
assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'
|
| 79 |
+
order = scores.sort(0, descending=True)[1]
|
| 80 |
+
|
| 81 |
+
boxes = boxes[order].contiguous()
|
| 82 |
+
|
| 83 |
+
keep = torch.zeros(boxes.size(0), dtype=torch.long)
|
| 84 |
+
num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh)
|
| 85 |
+
return order[keep[:num_out].cuda(boxes.device)].contiguous()
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/knn.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.autograd import Function
|
| 3 |
+
|
| 4 |
+
from ..utils import ext_loader
|
| 5 |
+
|
| 6 |
+
ext_module = ext_loader.load_ext('_ext', ['knn_forward'])
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class KNN(Function):
|
| 10 |
+
r"""KNN (CUDA) based on heap data structure.
|
| 11 |
+
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
|
| 12 |
+
scene_seg/lib/pointops/src/knnquery_heap>`_.
|
| 13 |
+
|
| 14 |
+
Find k-nearest points.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
@staticmethod
|
| 18 |
+
def forward(ctx,
|
| 19 |
+
k: int,
|
| 20 |
+
xyz: torch.Tensor,
|
| 21 |
+
center_xyz: torch.Tensor = None,
|
| 22 |
+
transposed: bool = False) -> torch.Tensor:
|
| 23 |
+
"""
|
| 24 |
+
Args:
|
| 25 |
+
k (int): number of nearest neighbors.
|
| 26 |
+
xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).
|
| 27 |
+
xyz coordinates of the features.
|
| 28 |
+
center_xyz (Tensor, optional): (B, npoint, 3) if transposed ==
|
| 29 |
+
False, else (B, 3, npoint). centers of the knn query.
|
| 30 |
+
Default: None.
|
| 31 |
+
transposed (bool, optional): whether the input tensors are
|
| 32 |
+
transposed. Should not explicitly use this keyword when
|
| 33 |
+
calling knn (=KNN.apply), just add the fourth param.
|
| 34 |
+
Default: False.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
Tensor: (B, k, npoint) tensor with the indices of
|
| 38 |
+
the features that form k-nearest neighbours.
|
| 39 |
+
"""
|
| 40 |
+
assert (k > 0) & (k < 100), 'k should be in range(0, 100)'
|
| 41 |
+
|
| 42 |
+
if center_xyz is None:
|
| 43 |
+
center_xyz = xyz
|
| 44 |
+
|
| 45 |
+
if transposed:
|
| 46 |
+
xyz = xyz.transpose(2, 1).contiguous()
|
| 47 |
+
center_xyz = center_xyz.transpose(2, 1).contiguous()
|
| 48 |
+
|
| 49 |
+
assert xyz.is_contiguous() # [B, N, 3]
|
| 50 |
+
assert center_xyz.is_contiguous() # [B, npoint, 3]
|
| 51 |
+
|
| 52 |
+
center_xyz_device = center_xyz.get_device()
|
| 53 |
+
assert center_xyz_device == xyz.get_device(), \
|
| 54 |
+
'center_xyz and xyz should be put on the same device'
|
| 55 |
+
if torch.cuda.current_device() != center_xyz_device:
|
| 56 |
+
torch.cuda.set_device(center_xyz_device)
|
| 57 |
+
|
| 58 |
+
B, npoint, _ = center_xyz.shape
|
| 59 |
+
N = xyz.shape[1]
|
| 60 |
+
|
| 61 |
+
idx = center_xyz.new_zeros((B, npoint, k)).int()
|
| 62 |
+
dist2 = center_xyz.new_zeros((B, npoint, k)).float()
|
| 63 |
+
|
| 64 |
+
ext_module.knn_forward(
|
| 65 |
+
xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k)
|
| 66 |
+
# idx shape to [B, k, npoint]
|
| 67 |
+
idx = idx.transpose(2, 1).contiguous()
|
| 68 |
+
if torch.__version__ != 'parrots':
|
| 69 |
+
ctx.mark_non_differentiable(idx)
|
| 70 |
+
return idx
|
| 71 |
+
|
| 72 |
+
@staticmethod
|
| 73 |
+
def backward(ctx, a=None):
|
| 74 |
+
return None, None, None
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
knn = KNN.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/masked_conv.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import math
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
from torch.autograd import Function
|
| 7 |
+
from torch.autograd.function import once_differentiable
|
| 8 |
+
from torch.nn.modules.utils import _pair
|
| 9 |
+
|
| 10 |
+
from ..utils import ext_loader
|
| 11 |
+
|
| 12 |
+
ext_module = ext_loader.load_ext(
|
| 13 |
+
'_ext', ['masked_im2col_forward', 'masked_col2im_forward'])
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class MaskedConv2dFunction(Function):
|
| 17 |
+
|
| 18 |
+
@staticmethod
|
| 19 |
+
def symbolic(g, features, mask, weight, bias, padding, stride):
|
| 20 |
+
return g.op(
|
| 21 |
+
'mmcv::MMCVMaskedConv2d',
|
| 22 |
+
features,
|
| 23 |
+
mask,
|
| 24 |
+
weight,
|
| 25 |
+
bias,
|
| 26 |
+
padding_i=padding,
|
| 27 |
+
stride_i=stride)
|
| 28 |
+
|
| 29 |
+
@staticmethod
|
| 30 |
+
def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
|
| 31 |
+
assert mask.dim() == 3 and mask.size(0) == 1
|
| 32 |
+
assert features.dim() == 4 and features.size(0) == 1
|
| 33 |
+
assert features.size()[2:] == mask.size()[1:]
|
| 34 |
+
pad_h, pad_w = _pair(padding)
|
| 35 |
+
stride_h, stride_w = _pair(stride)
|
| 36 |
+
if stride_h != 1 or stride_w != 1:
|
| 37 |
+
raise ValueError(
|
| 38 |
+
'Stride could not only be 1 in masked_conv2d currently.')
|
| 39 |
+
out_channel, in_channel, kernel_h, kernel_w = weight.size()
|
| 40 |
+
|
| 41 |
+
batch_size = features.size(0)
|
| 42 |
+
out_h = int(
|
| 43 |
+
math.floor((features.size(2) + 2 * pad_h -
|
| 44 |
+
(kernel_h - 1) - 1) / stride_h + 1))
|
| 45 |
+
out_w = int(
|
| 46 |
+
math.floor((features.size(3) + 2 * pad_w -
|
| 47 |
+
(kernel_h - 1) - 1) / stride_w + 1))
|
| 48 |
+
mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
|
| 49 |
+
output = features.new_zeros(batch_size, out_channel, out_h, out_w)
|
| 50 |
+
if mask_inds.numel() > 0:
|
| 51 |
+
mask_h_idx = mask_inds[:, 0].contiguous()
|
| 52 |
+
mask_w_idx = mask_inds[:, 1].contiguous()
|
| 53 |
+
data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
|
| 54 |
+
mask_inds.size(0))
|
| 55 |
+
ext_module.masked_im2col_forward(
|
| 56 |
+
features,
|
| 57 |
+
mask_h_idx,
|
| 58 |
+
mask_w_idx,
|
| 59 |
+
data_col,
|
| 60 |
+
kernel_h=kernel_h,
|
| 61 |
+
kernel_w=kernel_w,
|
| 62 |
+
pad_h=pad_h,
|
| 63 |
+
pad_w=pad_w)
|
| 64 |
+
|
| 65 |
+
masked_output = torch.addmm(1, bias[:, None], 1,
|
| 66 |
+
weight.view(out_channel, -1), data_col)
|
| 67 |
+
ext_module.masked_col2im_forward(
|
| 68 |
+
masked_output,
|
| 69 |
+
mask_h_idx,
|
| 70 |
+
mask_w_idx,
|
| 71 |
+
output,
|
| 72 |
+
height=out_h,
|
| 73 |
+
width=out_w,
|
| 74 |
+
channels=out_channel)
|
| 75 |
+
return output
|
| 76 |
+
|
| 77 |
+
@staticmethod
|
| 78 |
+
@once_differentiable
|
| 79 |
+
def backward(ctx, grad_output):
|
| 80 |
+
return (None, ) * 5
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
masked_conv2d = MaskedConv2dFunction.apply
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class MaskedConv2d(nn.Conv2d):
|
| 87 |
+
"""A MaskedConv2d which inherits the official Conv2d.
|
| 88 |
+
|
| 89 |
+
The masked forward doesn't implement the backward function and only
|
| 90 |
+
supports the stride parameter to be 1 currently.
|
| 91 |
+
"""
|
| 92 |
+
|
| 93 |
+
def __init__(self,
|
| 94 |
+
in_channels,
|
| 95 |
+
out_channels,
|
| 96 |
+
kernel_size,
|
| 97 |
+
stride=1,
|
| 98 |
+
padding=0,
|
| 99 |
+
dilation=1,
|
| 100 |
+
groups=1,
|
| 101 |
+
bias=True):
|
| 102 |
+
super(MaskedConv2d,
|
| 103 |
+
self).__init__(in_channels, out_channels, kernel_size, stride,
|
| 104 |
+
padding, dilation, groups, bias)
|
| 105 |
+
|
| 106 |
+
def forward(self, input, mask=None):
|
| 107 |
+
if mask is None: # fallback to the normal Conv2d
|
| 108 |
+
return super(MaskedConv2d, self).forward(input)
|
| 109 |
+
else:
|
| 110 |
+
return masked_conv2d(input, mask, self.weight, self.bias,
|
| 111 |
+
self.padding)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/merge_cells.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from abc import abstractmethod
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
|
| 8 |
+
from ..cnn import ConvModule
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class BaseMergeCell(nn.Module):
|
| 12 |
+
"""The basic class for cells used in NAS-FPN and NAS-FCOS.
|
| 13 |
+
|
| 14 |
+
BaseMergeCell takes 2 inputs. After applying convolution
|
| 15 |
+
on them, they are resized to the target size. Then,
|
| 16 |
+
they go through binary_op, which depends on the type of cell.
|
| 17 |
+
If with_out_conv is True, the result of output will go through
|
| 18 |
+
another convolution layer.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
in_channels (int): number of input channels in out_conv layer.
|
| 22 |
+
out_channels (int): number of output channels in out_conv layer.
|
| 23 |
+
with_out_conv (bool): Whether to use out_conv layer
|
| 24 |
+
out_conv_cfg (dict): Config dict for convolution layer, which should
|
| 25 |
+
contain "groups", "kernel_size", "padding", "bias" to build
|
| 26 |
+
out_conv layer.
|
| 27 |
+
out_norm_cfg (dict): Config dict for normalization layer in out_conv.
|
| 28 |
+
out_conv_order (tuple): The order of conv/norm/activation layers in
|
| 29 |
+
out_conv.
|
| 30 |
+
with_input1_conv (bool): Whether to use convolution on input1.
|
| 31 |
+
with_input2_conv (bool): Whether to use convolution on input2.
|
| 32 |
+
input_conv_cfg (dict): Config dict for building input1_conv layer and
|
| 33 |
+
input2_conv layer, which is expected to contain the type of
|
| 34 |
+
convolution.
|
| 35 |
+
Default: None, which means using conv2d.
|
| 36 |
+
input_norm_cfg (dict): Config dict for normalization layer in
|
| 37 |
+
input1_conv and input2_conv layer. Default: None.
|
| 38 |
+
upsample_mode (str): Interpolation method used to resize the output
|
| 39 |
+
of input1_conv and input2_conv to target size. Currently, we
|
| 40 |
+
support ['nearest', 'bilinear']. Default: 'nearest'.
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
def __init__(self,
|
| 44 |
+
fused_channels=256,
|
| 45 |
+
out_channels=256,
|
| 46 |
+
with_out_conv=True,
|
| 47 |
+
out_conv_cfg=dict(
|
| 48 |
+
groups=1, kernel_size=3, padding=1, bias=True),
|
| 49 |
+
out_norm_cfg=None,
|
| 50 |
+
out_conv_order=('act', 'conv', 'norm'),
|
| 51 |
+
with_input1_conv=False,
|
| 52 |
+
with_input2_conv=False,
|
| 53 |
+
input_conv_cfg=None,
|
| 54 |
+
input_norm_cfg=None,
|
| 55 |
+
upsample_mode='nearest'):
|
| 56 |
+
super(BaseMergeCell, self).__init__()
|
| 57 |
+
assert upsample_mode in ['nearest', 'bilinear']
|
| 58 |
+
self.with_out_conv = with_out_conv
|
| 59 |
+
self.with_input1_conv = with_input1_conv
|
| 60 |
+
self.with_input2_conv = with_input2_conv
|
| 61 |
+
self.upsample_mode = upsample_mode
|
| 62 |
+
|
| 63 |
+
if self.with_out_conv:
|
| 64 |
+
self.out_conv = ConvModule(
|
| 65 |
+
fused_channels,
|
| 66 |
+
out_channels,
|
| 67 |
+
**out_conv_cfg,
|
| 68 |
+
norm_cfg=out_norm_cfg,
|
| 69 |
+
order=out_conv_order)
|
| 70 |
+
|
| 71 |
+
self.input1_conv = self._build_input_conv(
|
| 72 |
+
out_channels, input_conv_cfg,
|
| 73 |
+
input_norm_cfg) if with_input1_conv else nn.Sequential()
|
| 74 |
+
self.input2_conv = self._build_input_conv(
|
| 75 |
+
out_channels, input_conv_cfg,
|
| 76 |
+
input_norm_cfg) if with_input2_conv else nn.Sequential()
|
| 77 |
+
|
| 78 |
+
def _build_input_conv(self, channel, conv_cfg, norm_cfg):
|
| 79 |
+
return ConvModule(
|
| 80 |
+
channel,
|
| 81 |
+
channel,
|
| 82 |
+
3,
|
| 83 |
+
padding=1,
|
| 84 |
+
conv_cfg=conv_cfg,
|
| 85 |
+
norm_cfg=norm_cfg,
|
| 86 |
+
bias=True)
|
| 87 |
+
|
| 88 |
+
@abstractmethod
|
| 89 |
+
def _binary_op(self, x1, x2):
|
| 90 |
+
pass
|
| 91 |
+
|
| 92 |
+
def _resize(self, x, size):
|
| 93 |
+
if x.shape[-2:] == size:
|
| 94 |
+
return x
|
| 95 |
+
elif x.shape[-2:] < size:
|
| 96 |
+
return F.interpolate(x, size=size, mode=self.upsample_mode)
|
| 97 |
+
else:
|
| 98 |
+
assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
|
| 99 |
+
kernel_size = x.shape[-1] // size[-1]
|
| 100 |
+
x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
|
| 101 |
+
return x
|
| 102 |
+
|
| 103 |
+
def forward(self, x1, x2, out_size=None):
|
| 104 |
+
assert x1.shape[:2] == x2.shape[:2]
|
| 105 |
+
assert out_size is None or len(out_size) == 2
|
| 106 |
+
if out_size is None: # resize to larger one
|
| 107 |
+
out_size = max(x1.size()[2:], x2.size()[2:])
|
| 108 |
+
|
| 109 |
+
x1 = self.input1_conv(x1)
|
| 110 |
+
x2 = self.input2_conv(x2)
|
| 111 |
+
|
| 112 |
+
x1 = self._resize(x1, out_size)
|
| 113 |
+
x2 = self._resize(x2, out_size)
|
| 114 |
+
|
| 115 |
+
x = self._binary_op(x1, x2)
|
| 116 |
+
if self.with_out_conv:
|
| 117 |
+
x = self.out_conv(x)
|
| 118 |
+
return x
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
class SumCell(BaseMergeCell):
|
| 122 |
+
|
| 123 |
+
def __init__(self, in_channels, out_channels, **kwargs):
|
| 124 |
+
super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
|
| 125 |
+
|
| 126 |
+
def _binary_op(self, x1, x2):
|
| 127 |
+
return x1 + x2
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
class ConcatCell(BaseMergeCell):
|
| 131 |
+
|
| 132 |
+
def __init__(self, in_channels, out_channels, **kwargs):
|
| 133 |
+
super(ConcatCell, self).__init__(in_channels * 2, out_channels,
|
| 134 |
+
**kwargs)
|
| 135 |
+
|
| 136 |
+
def _binary_op(self, x1, x2):
|
| 137 |
+
ret = torch.cat([x1, x2], dim=1)
|
| 138 |
+
return ret
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
class GlobalPoolingCell(BaseMergeCell):
|
| 142 |
+
|
| 143 |
+
def __init__(self, in_channels=None, out_channels=None, **kwargs):
|
| 144 |
+
super().__init__(in_channels, out_channels, **kwargs)
|
| 145 |
+
self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
|
| 146 |
+
|
| 147 |
+
def _binary_op(self, x1, x2):
|
| 148 |
+
x2_att = self.global_pool(x2).sigmoid()
|
| 149 |
+
return x2 + x2_att * x1
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import math
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
from torch.autograd import Function
|
| 7 |
+
from torch.autograd.function import once_differentiable
|
| 8 |
+
from torch.nn.modules.utils import _pair, _single
|
| 9 |
+
|
| 10 |
+
from annotator.mmpkg.mmcv.utils import deprecated_api_warning
|
| 11 |
+
from ..cnn import CONV_LAYERS
|
| 12 |
+
from ..utils import ext_loader, print_log
|
| 13 |
+
|
| 14 |
+
ext_module = ext_loader.load_ext(
|
| 15 |
+
'_ext',
|
| 16 |
+
['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class ModulatedDeformConv2dFunction(Function):
|
| 20 |
+
|
| 21 |
+
@staticmethod
|
| 22 |
+
def symbolic(g, input, offset, mask, weight, bias, stride, padding,
|
| 23 |
+
dilation, groups, deform_groups):
|
| 24 |
+
input_tensors = [input, offset, mask, weight]
|
| 25 |
+
if bias is not None:
|
| 26 |
+
input_tensors.append(bias)
|
| 27 |
+
return g.op(
|
| 28 |
+
'mmcv::MMCVModulatedDeformConv2d',
|
| 29 |
+
*input_tensors,
|
| 30 |
+
stride_i=stride,
|
| 31 |
+
padding_i=padding,
|
| 32 |
+
dilation_i=dilation,
|
| 33 |
+
groups_i=groups,
|
| 34 |
+
deform_groups_i=deform_groups)
|
| 35 |
+
|
| 36 |
+
@staticmethod
|
| 37 |
+
def forward(ctx,
|
| 38 |
+
input,
|
| 39 |
+
offset,
|
| 40 |
+
mask,
|
| 41 |
+
weight,
|
| 42 |
+
bias=None,
|
| 43 |
+
stride=1,
|
| 44 |
+
padding=0,
|
| 45 |
+
dilation=1,
|
| 46 |
+
groups=1,
|
| 47 |
+
deform_groups=1):
|
| 48 |
+
if input is not None and input.dim() != 4:
|
| 49 |
+
raise ValueError(
|
| 50 |
+
f'Expected 4D tensor as input, got {input.dim()}D tensor \
|
| 51 |
+
instead.')
|
| 52 |
+
ctx.stride = _pair(stride)
|
| 53 |
+
ctx.padding = _pair(padding)
|
| 54 |
+
ctx.dilation = _pair(dilation)
|
| 55 |
+
ctx.groups = groups
|
| 56 |
+
ctx.deform_groups = deform_groups
|
| 57 |
+
ctx.with_bias = bias is not None
|
| 58 |
+
if not ctx.with_bias:
|
| 59 |
+
bias = input.new_empty(0) # fake tensor
|
| 60 |
+
# When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
|
| 61 |
+
# amp won't cast the type of model (float32), but "offset" is cast
|
| 62 |
+
# to float16 by nn.Conv2d automatically, leading to the type
|
| 63 |
+
# mismatch with input (when it is float32) or weight.
|
| 64 |
+
# The flag for whether to use fp16 or amp is the type of "offset",
|
| 65 |
+
# we cast weight and input to temporarily support fp16 and amp
|
| 66 |
+
# whatever the pytorch version is.
|
| 67 |
+
input = input.type_as(offset)
|
| 68 |
+
weight = weight.type_as(input)
|
| 69 |
+
ctx.save_for_backward(input, offset, mask, weight, bias)
|
| 70 |
+
output = input.new_empty(
|
| 71 |
+
ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
|
| 72 |
+
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
|
| 73 |
+
ext_module.modulated_deform_conv_forward(
|
| 74 |
+
input,
|
| 75 |
+
weight,
|
| 76 |
+
bias,
|
| 77 |
+
ctx._bufs[0],
|
| 78 |
+
offset,
|
| 79 |
+
mask,
|
| 80 |
+
output,
|
| 81 |
+
ctx._bufs[1],
|
| 82 |
+
kernel_h=weight.size(2),
|
| 83 |
+
kernel_w=weight.size(3),
|
| 84 |
+
stride_h=ctx.stride[0],
|
| 85 |
+
stride_w=ctx.stride[1],
|
| 86 |
+
pad_h=ctx.padding[0],
|
| 87 |
+
pad_w=ctx.padding[1],
|
| 88 |
+
dilation_h=ctx.dilation[0],
|
| 89 |
+
dilation_w=ctx.dilation[1],
|
| 90 |
+
group=ctx.groups,
|
| 91 |
+
deformable_group=ctx.deform_groups,
|
| 92 |
+
with_bias=ctx.with_bias)
|
| 93 |
+
return output
|
| 94 |
+
|
| 95 |
+
@staticmethod
|
| 96 |
+
@once_differentiable
|
| 97 |
+
def backward(ctx, grad_output):
|
| 98 |
+
input, offset, mask, weight, bias = ctx.saved_tensors
|
| 99 |
+
grad_input = torch.zeros_like(input)
|
| 100 |
+
grad_offset = torch.zeros_like(offset)
|
| 101 |
+
grad_mask = torch.zeros_like(mask)
|
| 102 |
+
grad_weight = torch.zeros_like(weight)
|
| 103 |
+
grad_bias = torch.zeros_like(bias)
|
| 104 |
+
grad_output = grad_output.contiguous()
|
| 105 |
+
ext_module.modulated_deform_conv_backward(
|
| 106 |
+
input,
|
| 107 |
+
weight,
|
| 108 |
+
bias,
|
| 109 |
+
ctx._bufs[0],
|
| 110 |
+
offset,
|
| 111 |
+
mask,
|
| 112 |
+
ctx._bufs[1],
|
| 113 |
+
grad_input,
|
| 114 |
+
grad_weight,
|
| 115 |
+
grad_bias,
|
| 116 |
+
grad_offset,
|
| 117 |
+
grad_mask,
|
| 118 |
+
grad_output,
|
| 119 |
+
kernel_h=weight.size(2),
|
| 120 |
+
kernel_w=weight.size(3),
|
| 121 |
+
stride_h=ctx.stride[0],
|
| 122 |
+
stride_w=ctx.stride[1],
|
| 123 |
+
pad_h=ctx.padding[0],
|
| 124 |
+
pad_w=ctx.padding[1],
|
| 125 |
+
dilation_h=ctx.dilation[0],
|
| 126 |
+
dilation_w=ctx.dilation[1],
|
| 127 |
+
group=ctx.groups,
|
| 128 |
+
deformable_group=ctx.deform_groups,
|
| 129 |
+
with_bias=ctx.with_bias)
|
| 130 |
+
if not ctx.with_bias:
|
| 131 |
+
grad_bias = None
|
| 132 |
+
|
| 133 |
+
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
|
| 134 |
+
None, None, None, None, None)
|
| 135 |
+
|
| 136 |
+
@staticmethod
|
| 137 |
+
def _output_size(ctx, input, weight):
|
| 138 |
+
channels = weight.size(0)
|
| 139 |
+
output_size = (input.size(0), channels)
|
| 140 |
+
for d in range(input.dim() - 2):
|
| 141 |
+
in_size = input.size(d + 2)
|
| 142 |
+
pad = ctx.padding[d]
|
| 143 |
+
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
|
| 144 |
+
stride_ = ctx.stride[d]
|
| 145 |
+
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
|
| 146 |
+
if not all(map(lambda s: s > 0, output_size)):
|
| 147 |
+
raise ValueError(
|
| 148 |
+
'convolution input is too small (output would be ' +
|
| 149 |
+
'x'.join(map(str, output_size)) + ')')
|
| 150 |
+
return output_size
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
class ModulatedDeformConv2d(nn.Module):
|
| 157 |
+
|
| 158 |
+
@deprecated_api_warning({'deformable_groups': 'deform_groups'},
|
| 159 |
+
cls_name='ModulatedDeformConv2d')
|
| 160 |
+
def __init__(self,
|
| 161 |
+
in_channels,
|
| 162 |
+
out_channels,
|
| 163 |
+
kernel_size,
|
| 164 |
+
stride=1,
|
| 165 |
+
padding=0,
|
| 166 |
+
dilation=1,
|
| 167 |
+
groups=1,
|
| 168 |
+
deform_groups=1,
|
| 169 |
+
bias=True):
|
| 170 |
+
super(ModulatedDeformConv2d, self).__init__()
|
| 171 |
+
self.in_channels = in_channels
|
| 172 |
+
self.out_channels = out_channels
|
| 173 |
+
self.kernel_size = _pair(kernel_size)
|
| 174 |
+
self.stride = _pair(stride)
|
| 175 |
+
self.padding = _pair(padding)
|
| 176 |
+
self.dilation = _pair(dilation)
|
| 177 |
+
self.groups = groups
|
| 178 |
+
self.deform_groups = deform_groups
|
| 179 |
+
# enable compatibility with nn.Conv2d
|
| 180 |
+
self.transposed = False
|
| 181 |
+
self.output_padding = _single(0)
|
| 182 |
+
|
| 183 |
+
self.weight = nn.Parameter(
|
| 184 |
+
torch.Tensor(out_channels, in_channels // groups,
|
| 185 |
+
*self.kernel_size))
|
| 186 |
+
if bias:
|
| 187 |
+
self.bias = nn.Parameter(torch.Tensor(out_channels))
|
| 188 |
+
else:
|
| 189 |
+
self.register_parameter('bias', None)
|
| 190 |
+
self.init_weights()
|
| 191 |
+
|
| 192 |
+
def init_weights(self):
|
| 193 |
+
n = self.in_channels
|
| 194 |
+
for k in self.kernel_size:
|
| 195 |
+
n *= k
|
| 196 |
+
stdv = 1. / math.sqrt(n)
|
| 197 |
+
self.weight.data.uniform_(-stdv, stdv)
|
| 198 |
+
if self.bias is not None:
|
| 199 |
+
self.bias.data.zero_()
|
| 200 |
+
|
| 201 |
+
def forward(self, x, offset, mask):
|
| 202 |
+
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
|
| 203 |
+
self.stride, self.padding,
|
| 204 |
+
self.dilation, self.groups,
|
| 205 |
+
self.deform_groups)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
@CONV_LAYERS.register_module('DCNv2')
|
| 209 |
+
class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
|
| 210 |
+
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv
|
| 211 |
+
layers.
|
| 212 |
+
|
| 213 |
+
Args:
|
| 214 |
+
in_channels (int): Same as nn.Conv2d.
|
| 215 |
+
out_channels (int): Same as nn.Conv2d.
|
| 216 |
+
kernel_size (int or tuple[int]): Same as nn.Conv2d.
|
| 217 |
+
stride (int): Same as nn.Conv2d, while tuple is not supported.
|
| 218 |
+
padding (int): Same as nn.Conv2d, while tuple is not supported.
|
| 219 |
+
dilation (int): Same as nn.Conv2d, while tuple is not supported.
|
| 220 |
+
groups (int): Same as nn.Conv2d.
|
| 221 |
+
bias (bool or str): If specified as `auto`, it will be decided by the
|
| 222 |
+
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
|
| 223 |
+
False.
|
| 224 |
+
"""
|
| 225 |
+
|
| 226 |
+
_version = 2
|
| 227 |
+
|
| 228 |
+
def __init__(self, *args, **kwargs):
|
| 229 |
+
super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
|
| 230 |
+
self.conv_offset = nn.Conv2d(
|
| 231 |
+
self.in_channels,
|
| 232 |
+
self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
|
| 233 |
+
kernel_size=self.kernel_size,
|
| 234 |
+
stride=self.stride,
|
| 235 |
+
padding=self.padding,
|
| 236 |
+
dilation=self.dilation,
|
| 237 |
+
bias=True)
|
| 238 |
+
self.init_weights()
|
| 239 |
+
|
| 240 |
+
def init_weights(self):
|
| 241 |
+
super(ModulatedDeformConv2dPack, self).init_weights()
|
| 242 |
+
if hasattr(self, 'conv_offset'):
|
| 243 |
+
self.conv_offset.weight.data.zero_()
|
| 244 |
+
self.conv_offset.bias.data.zero_()
|
| 245 |
+
|
| 246 |
+
def forward(self, x):
|
| 247 |
+
out = self.conv_offset(x)
|
| 248 |
+
o1, o2, mask = torch.chunk(out, 3, dim=1)
|
| 249 |
+
offset = torch.cat((o1, o2), dim=1)
|
| 250 |
+
mask = torch.sigmoid(mask)
|
| 251 |
+
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
|
| 252 |
+
self.stride, self.padding,
|
| 253 |
+
self.dilation, self.groups,
|
| 254 |
+
self.deform_groups)
|
| 255 |
+
|
| 256 |
+
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
|
| 257 |
+
missing_keys, unexpected_keys, error_msgs):
|
| 258 |
+
version = local_metadata.get('version', None)
|
| 259 |
+
|
| 260 |
+
if version is None or version < 2:
|
| 261 |
+
# the key is different in early versions
|
| 262 |
+
# In version < 2, ModulatedDeformConvPack
|
| 263 |
+
# loads previous benchmark models.
|
| 264 |
+
if (prefix + 'conv_offset.weight' not in state_dict
|
| 265 |
+
and prefix[:-1] + '_offset.weight' in state_dict):
|
| 266 |
+
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
|
| 267 |
+
prefix[:-1] + '_offset.weight')
|
| 268 |
+
if (prefix + 'conv_offset.bias' not in state_dict
|
| 269 |
+
and prefix[:-1] + '_offset.bias' in state_dict):
|
| 270 |
+
state_dict[prefix +
|
| 271 |
+
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
|
| 272 |
+
'_offset.bias')
|
| 273 |
+
|
| 274 |
+
if version is not None and version > 1:
|
| 275 |
+
print_log(
|
| 276 |
+
f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
|
| 277 |
+
'version 2.',
|
| 278 |
+
logger='root')
|
| 279 |
+
|
| 280 |
+
super()._load_from_state_dict(state_dict, prefix, local_metadata,
|
| 281 |
+
strict, missing_keys, unexpected_keys,
|
| 282 |
+
error_msgs)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import math
|
| 3 |
+
import warnings
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
import torch.nn as nn
|
| 7 |
+
import torch.nn.functional as F
|
| 8 |
+
from torch.autograd.function import Function, once_differentiable
|
| 9 |
+
|
| 10 |
+
from annotator.mmpkg.mmcv import deprecated_api_warning
|
| 11 |
+
from annotator.mmpkg.mmcv.cnn import constant_init, xavier_init
|
| 12 |
+
from annotator.mmpkg.mmcv.cnn.bricks.registry import ATTENTION
|
| 13 |
+
from annotator.mmpkg.mmcv.runner import BaseModule
|
| 14 |
+
from ..utils import ext_loader
|
| 15 |
+
|
| 16 |
+
ext_module = ext_loader.load_ext(
|
| 17 |
+
'_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class MultiScaleDeformableAttnFunction(Function):
|
| 21 |
+
|
| 22 |
+
@staticmethod
|
| 23 |
+
def forward(ctx, value, value_spatial_shapes, value_level_start_index,
|
| 24 |
+
sampling_locations, attention_weights, im2col_step):
|
| 25 |
+
"""GPU version of multi-scale deformable attention.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
value (Tensor): The value has shape
|
| 29 |
+
(bs, num_keys, mum_heads, embed_dims//num_heads)
|
| 30 |
+
value_spatial_shapes (Tensor): Spatial shape of
|
| 31 |
+
each feature map, has shape (num_levels, 2),
|
| 32 |
+
last dimension 2 represent (h, w)
|
| 33 |
+
sampling_locations (Tensor): The location of sampling points,
|
| 34 |
+
has shape
|
| 35 |
+
(bs ,num_queries, num_heads, num_levels, num_points, 2),
|
| 36 |
+
the last dimension 2 represent (x, y).
|
| 37 |
+
attention_weights (Tensor): The weight of sampling points used
|
| 38 |
+
when calculate the attention, has shape
|
| 39 |
+
(bs ,num_queries, num_heads, num_levels, num_points),
|
| 40 |
+
im2col_step (Tensor): The step used in image to column.
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
Tensor: has shape (bs, num_queries, embed_dims)
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
ctx.im2col_step = im2col_step
|
| 47 |
+
output = ext_module.ms_deform_attn_forward(
|
| 48 |
+
value,
|
| 49 |
+
value_spatial_shapes,
|
| 50 |
+
value_level_start_index,
|
| 51 |
+
sampling_locations,
|
| 52 |
+
attention_weights,
|
| 53 |
+
im2col_step=ctx.im2col_step)
|
| 54 |
+
ctx.save_for_backward(value, value_spatial_shapes,
|
| 55 |
+
value_level_start_index, sampling_locations,
|
| 56 |
+
attention_weights)
|
| 57 |
+
return output
|
| 58 |
+
|
| 59 |
+
@staticmethod
|
| 60 |
+
@once_differentiable
|
| 61 |
+
def backward(ctx, grad_output):
|
| 62 |
+
"""GPU version of backward function.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
grad_output (Tensor): Gradient
|
| 66 |
+
of output tensor of forward.
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Tuple[Tensor]: Gradient
|
| 70 |
+
of input tensors in forward.
|
| 71 |
+
"""
|
| 72 |
+
value, value_spatial_shapes, value_level_start_index,\
|
| 73 |
+
sampling_locations, attention_weights = ctx.saved_tensors
|
| 74 |
+
grad_value = torch.zeros_like(value)
|
| 75 |
+
grad_sampling_loc = torch.zeros_like(sampling_locations)
|
| 76 |
+
grad_attn_weight = torch.zeros_like(attention_weights)
|
| 77 |
+
|
| 78 |
+
ext_module.ms_deform_attn_backward(
|
| 79 |
+
value,
|
| 80 |
+
value_spatial_shapes,
|
| 81 |
+
value_level_start_index,
|
| 82 |
+
sampling_locations,
|
| 83 |
+
attention_weights,
|
| 84 |
+
grad_output.contiguous(),
|
| 85 |
+
grad_value,
|
| 86 |
+
grad_sampling_loc,
|
| 87 |
+
grad_attn_weight,
|
| 88 |
+
im2col_step=ctx.im2col_step)
|
| 89 |
+
|
| 90 |
+
return grad_value, None, None, \
|
| 91 |
+
grad_sampling_loc, grad_attn_weight, None
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
|
| 95 |
+
sampling_locations, attention_weights):
|
| 96 |
+
"""CPU version of multi-scale deformable attention.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
value (Tensor): The value has shape
|
| 100 |
+
(bs, num_keys, mum_heads, embed_dims//num_heads)
|
| 101 |
+
value_spatial_shapes (Tensor): Spatial shape of
|
| 102 |
+
each feature map, has shape (num_levels, 2),
|
| 103 |
+
last dimension 2 represent (h, w)
|
| 104 |
+
sampling_locations (Tensor): The location of sampling points,
|
| 105 |
+
has shape
|
| 106 |
+
(bs ,num_queries, num_heads, num_levels, num_points, 2),
|
| 107 |
+
the last dimension 2 represent (x, y).
|
| 108 |
+
attention_weights (Tensor): The weight of sampling points used
|
| 109 |
+
when calculate the attention, has shape
|
| 110 |
+
(bs ,num_queries, num_heads, num_levels, num_points),
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
Tensor: has shape (bs, num_queries, embed_dims)
|
| 114 |
+
"""
|
| 115 |
+
|
| 116 |
+
bs, _, num_heads, embed_dims = value.shape
|
| 117 |
+
_, num_queries, num_heads, num_levels, num_points, _ =\
|
| 118 |
+
sampling_locations.shape
|
| 119 |
+
value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],
|
| 120 |
+
dim=1)
|
| 121 |
+
sampling_grids = 2 * sampling_locations - 1
|
| 122 |
+
sampling_value_list = []
|
| 123 |
+
for level, (H_, W_) in enumerate(value_spatial_shapes):
|
| 124 |
+
# bs, H_*W_, num_heads, embed_dims ->
|
| 125 |
+
# bs, H_*W_, num_heads*embed_dims ->
|
| 126 |
+
# bs, num_heads*embed_dims, H_*W_ ->
|
| 127 |
+
# bs*num_heads, embed_dims, H_, W_
|
| 128 |
+
value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(
|
| 129 |
+
bs * num_heads, embed_dims, H_, W_)
|
| 130 |
+
# bs, num_queries, num_heads, num_points, 2 ->
|
| 131 |
+
# bs, num_heads, num_queries, num_points, 2 ->
|
| 132 |
+
# bs*num_heads, num_queries, num_points, 2
|
| 133 |
+
sampling_grid_l_ = sampling_grids[:, :, :,
|
| 134 |
+
level].transpose(1, 2).flatten(0, 1)
|
| 135 |
+
# bs*num_heads, embed_dims, num_queries, num_points
|
| 136 |
+
sampling_value_l_ = F.grid_sample(
|
| 137 |
+
value_l_,
|
| 138 |
+
sampling_grid_l_,
|
| 139 |
+
mode='bilinear',
|
| 140 |
+
padding_mode='zeros',
|
| 141 |
+
align_corners=False)
|
| 142 |
+
sampling_value_list.append(sampling_value_l_)
|
| 143 |
+
# (bs, num_queries, num_heads, num_levels, num_points) ->
|
| 144 |
+
# (bs, num_heads, num_queries, num_levels, num_points) ->
|
| 145 |
+
# (bs, num_heads, 1, num_queries, num_levels*num_points)
|
| 146 |
+
attention_weights = attention_weights.transpose(1, 2).reshape(
|
| 147 |
+
bs * num_heads, 1, num_queries, num_levels * num_points)
|
| 148 |
+
output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *
|
| 149 |
+
attention_weights).sum(-1).view(bs, num_heads * embed_dims,
|
| 150 |
+
num_queries)
|
| 151 |
+
return output.transpose(1, 2).contiguous()
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@ATTENTION.register_module()
|
| 155 |
+
class MultiScaleDeformableAttention(BaseModule):
|
| 156 |
+
"""An attention module used in Deformable-Detr.
|
| 157 |
+
|
| 158 |
+
`Deformable DETR: Deformable Transformers for End-to-End Object Detection.
|
| 159 |
+
<https://arxiv.org/pdf/2010.04159.pdf>`_.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
embed_dims (int): The embedding dimension of Attention.
|
| 163 |
+
Default: 256.
|
| 164 |
+
num_heads (int): Parallel attention heads. Default: 64.
|
| 165 |
+
num_levels (int): The number of feature map used in
|
| 166 |
+
Attention. Default: 4.
|
| 167 |
+
num_points (int): The number of sampling points for
|
| 168 |
+
each query in each head. Default: 4.
|
| 169 |
+
im2col_step (int): The step used in image_to_column.
|
| 170 |
+
Default: 64.
|
| 171 |
+
dropout (float): A Dropout layer on `inp_identity`.
|
| 172 |
+
Default: 0.1.
|
| 173 |
+
batch_first (bool): Key, Query and Value are shape of
|
| 174 |
+
(batch, n, embed_dim)
|
| 175 |
+
or (n, batch, embed_dim). Default to False.
|
| 176 |
+
norm_cfg (dict): Config dict for normalization layer.
|
| 177 |
+
Default: None.
|
| 178 |
+
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
| 179 |
+
Default: None.
|
| 180 |
+
"""
|
| 181 |
+
|
| 182 |
+
def __init__(self,
|
| 183 |
+
embed_dims=256,
|
| 184 |
+
num_heads=8,
|
| 185 |
+
num_levels=4,
|
| 186 |
+
num_points=4,
|
| 187 |
+
im2col_step=64,
|
| 188 |
+
dropout=0.1,
|
| 189 |
+
batch_first=False,
|
| 190 |
+
norm_cfg=None,
|
| 191 |
+
init_cfg=None):
|
| 192 |
+
super().__init__(init_cfg)
|
| 193 |
+
if embed_dims % num_heads != 0:
|
| 194 |
+
raise ValueError(f'embed_dims must be divisible by num_heads, '
|
| 195 |
+
f'but got {embed_dims} and {num_heads}')
|
| 196 |
+
dim_per_head = embed_dims // num_heads
|
| 197 |
+
self.norm_cfg = norm_cfg
|
| 198 |
+
self.dropout = nn.Dropout(dropout)
|
| 199 |
+
self.batch_first = batch_first
|
| 200 |
+
|
| 201 |
+
# you'd better set dim_per_head to a power of 2
|
| 202 |
+
# which is more efficient in the CUDA implementation
|
| 203 |
+
def _is_power_of_2(n):
|
| 204 |
+
if (not isinstance(n, int)) or (n < 0):
|
| 205 |
+
raise ValueError(
|
| 206 |
+
'invalid input for _is_power_of_2: {} (type: {})'.format(
|
| 207 |
+
n, type(n)))
|
| 208 |
+
return (n & (n - 1) == 0) and n != 0
|
| 209 |
+
|
| 210 |
+
if not _is_power_of_2(dim_per_head):
|
| 211 |
+
warnings.warn(
|
| 212 |
+
"You'd better set embed_dims in "
|
| 213 |
+
'MultiScaleDeformAttention to make '
|
| 214 |
+
'the dimension of each attention head a power of 2 '
|
| 215 |
+
'which is more efficient in our CUDA implementation.')
|
| 216 |
+
|
| 217 |
+
self.im2col_step = im2col_step
|
| 218 |
+
self.embed_dims = embed_dims
|
| 219 |
+
self.num_levels = num_levels
|
| 220 |
+
self.num_heads = num_heads
|
| 221 |
+
self.num_points = num_points
|
| 222 |
+
self.sampling_offsets = nn.Linear(
|
| 223 |
+
embed_dims, num_heads * num_levels * num_points * 2)
|
| 224 |
+
self.attention_weights = nn.Linear(embed_dims,
|
| 225 |
+
num_heads * num_levels * num_points)
|
| 226 |
+
self.value_proj = nn.Linear(embed_dims, embed_dims)
|
| 227 |
+
self.output_proj = nn.Linear(embed_dims, embed_dims)
|
| 228 |
+
self.init_weights()
|
| 229 |
+
|
| 230 |
+
def init_weights(self):
|
| 231 |
+
"""Default initialization for Parameters of Module."""
|
| 232 |
+
constant_init(self.sampling_offsets, 0.)
|
| 233 |
+
thetas = torch.arange(
|
| 234 |
+
self.num_heads,
|
| 235 |
+
dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
|
| 236 |
+
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
|
| 237 |
+
grid_init = (grid_init /
|
| 238 |
+
grid_init.abs().max(-1, keepdim=True)[0]).view(
|
| 239 |
+
self.num_heads, 1, 1,
|
| 240 |
+
2).repeat(1, self.num_levels, self.num_points, 1)
|
| 241 |
+
for i in range(self.num_points):
|
| 242 |
+
grid_init[:, :, i, :] *= i + 1
|
| 243 |
+
|
| 244 |
+
self.sampling_offsets.bias.data = grid_init.view(-1)
|
| 245 |
+
constant_init(self.attention_weights, val=0., bias=0.)
|
| 246 |
+
xavier_init(self.value_proj, distribution='uniform', bias=0.)
|
| 247 |
+
xavier_init(self.output_proj, distribution='uniform', bias=0.)
|
| 248 |
+
self._is_init = True
|
| 249 |
+
|
| 250 |
+
@deprecated_api_warning({'residual': 'identity'},
|
| 251 |
+
cls_name='MultiScaleDeformableAttention')
|
| 252 |
+
def forward(self,
|
| 253 |
+
query,
|
| 254 |
+
key=None,
|
| 255 |
+
value=None,
|
| 256 |
+
identity=None,
|
| 257 |
+
query_pos=None,
|
| 258 |
+
key_padding_mask=None,
|
| 259 |
+
reference_points=None,
|
| 260 |
+
spatial_shapes=None,
|
| 261 |
+
level_start_index=None,
|
| 262 |
+
**kwargs):
|
| 263 |
+
"""Forward Function of MultiScaleDeformAttention.
|
| 264 |
+
|
| 265 |
+
Args:
|
| 266 |
+
query (Tensor): Query of Transformer with shape
|
| 267 |
+
(num_query, bs, embed_dims).
|
| 268 |
+
key (Tensor): The key tensor with shape
|
| 269 |
+
`(num_key, bs, embed_dims)`.
|
| 270 |
+
value (Tensor): The value tensor with shape
|
| 271 |
+
`(num_key, bs, embed_dims)`.
|
| 272 |
+
identity (Tensor): The tensor used for addition, with the
|
| 273 |
+
same shape as `query`. Default None. If None,
|
| 274 |
+
`query` will be used.
|
| 275 |
+
query_pos (Tensor): The positional encoding for `query`.
|
| 276 |
+
Default: None.
|
| 277 |
+
key_pos (Tensor): The positional encoding for `key`. Default
|
| 278 |
+
None.
|
| 279 |
+
reference_points (Tensor): The normalized reference
|
| 280 |
+
points with shape (bs, num_query, num_levels, 2),
|
| 281 |
+
all elements is range in [0, 1], top-left (0,0),
|
| 282 |
+
bottom-right (1, 1), including padding area.
|
| 283 |
+
or (N, Length_{query}, num_levels, 4), add
|
| 284 |
+
additional two dimensions is (w, h) to
|
| 285 |
+
form reference boxes.
|
| 286 |
+
key_padding_mask (Tensor): ByteTensor for `query`, with
|
| 287 |
+
shape [bs, num_key].
|
| 288 |
+
spatial_shapes (Tensor): Spatial shape of features in
|
| 289 |
+
different levels. With shape (num_levels, 2),
|
| 290 |
+
last dimension represents (h, w).
|
| 291 |
+
level_start_index (Tensor): The start index of each level.
|
| 292 |
+
A tensor has shape ``(num_levels, )`` and can be represented
|
| 293 |
+
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
|
| 294 |
+
|
| 295 |
+
Returns:
|
| 296 |
+
Tensor: forwarded results with shape [num_query, bs, embed_dims].
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
if value is None:
|
| 300 |
+
value = query
|
| 301 |
+
|
| 302 |
+
if identity is None:
|
| 303 |
+
identity = query
|
| 304 |
+
if query_pos is not None:
|
| 305 |
+
query = query + query_pos
|
| 306 |
+
if not self.batch_first:
|
| 307 |
+
# change to (bs, num_query ,embed_dims)
|
| 308 |
+
query = query.permute(1, 0, 2)
|
| 309 |
+
value = value.permute(1, 0, 2)
|
| 310 |
+
|
| 311 |
+
bs, num_query, _ = query.shape
|
| 312 |
+
bs, num_value, _ = value.shape
|
| 313 |
+
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
|
| 314 |
+
|
| 315 |
+
value = self.value_proj(value)
|
| 316 |
+
if key_padding_mask is not None:
|
| 317 |
+
value = value.masked_fill(key_padding_mask[..., None], 0.0)
|
| 318 |
+
value = value.view(bs, num_value, self.num_heads, -1)
|
| 319 |
+
sampling_offsets = self.sampling_offsets(query).view(
|
| 320 |
+
bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
|
| 321 |
+
attention_weights = self.attention_weights(query).view(
|
| 322 |
+
bs, num_query, self.num_heads, self.num_levels * self.num_points)
|
| 323 |
+
attention_weights = attention_weights.softmax(-1)
|
| 324 |
+
|
| 325 |
+
attention_weights = attention_weights.view(bs, num_query,
|
| 326 |
+
self.num_heads,
|
| 327 |
+
self.num_levels,
|
| 328 |
+
self.num_points)
|
| 329 |
+
if reference_points.shape[-1] == 2:
|
| 330 |
+
offset_normalizer = torch.stack(
|
| 331 |
+
[spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
|
| 332 |
+
sampling_locations = reference_points[:, :, None, :, None, :] \
|
| 333 |
+
+ sampling_offsets \
|
| 334 |
+
/ offset_normalizer[None, None, None, :, None, :]
|
| 335 |
+
elif reference_points.shape[-1] == 4:
|
| 336 |
+
sampling_locations = reference_points[:, :, None, :, None, :2] \
|
| 337 |
+
+ sampling_offsets / self.num_points \
|
| 338 |
+
* reference_points[:, :, None, :, None, 2:] \
|
| 339 |
+
* 0.5
|
| 340 |
+
else:
|
| 341 |
+
raise ValueError(
|
| 342 |
+
f'Last dim of reference_points must be'
|
| 343 |
+
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
|
| 344 |
+
if torch.cuda.is_available() and value.is_cuda:
|
| 345 |
+
output = MultiScaleDeformableAttnFunction.apply(
|
| 346 |
+
value, spatial_shapes, level_start_index, sampling_locations,
|
| 347 |
+
attention_weights, self.im2col_step)
|
| 348 |
+
else:
|
| 349 |
+
output = multi_scale_deformable_attn_pytorch(
|
| 350 |
+
value, spatial_shapes, sampling_locations, attention_weights)
|
| 351 |
+
|
| 352 |
+
output = self.output_proj(output)
|
| 353 |
+
|
| 354 |
+
if not self.batch_first:
|
| 355 |
+
# (num_query, bs ,embed_dims)
|
| 356 |
+
output = output.permute(1, 0, 2)
|
| 357 |
+
|
| 358 |
+
return self.dropout(output) + identity
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/nms.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
from annotator.mmpkg.mmcv.utils import deprecated_api_warning
|
| 7 |
+
from ..utils import ext_loader
|
| 8 |
+
|
| 9 |
+
ext_module = ext_loader.load_ext(
|
| 10 |
+
'_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated'])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# This function is modified from: https://github.com/pytorch/vision/
|
| 14 |
+
class NMSop(torch.autograd.Function):
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold,
|
| 18 |
+
max_num):
|
| 19 |
+
is_filtering_by_score = score_threshold > 0
|
| 20 |
+
if is_filtering_by_score:
|
| 21 |
+
valid_mask = scores > score_threshold
|
| 22 |
+
bboxes, scores = bboxes[valid_mask], scores[valid_mask]
|
| 23 |
+
valid_inds = torch.nonzero(
|
| 24 |
+
valid_mask, as_tuple=False).squeeze(dim=1)
|
| 25 |
+
|
| 26 |
+
inds = ext_module.nms(
|
| 27 |
+
bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
|
| 28 |
+
|
| 29 |
+
if max_num > 0:
|
| 30 |
+
inds = inds[:max_num]
|
| 31 |
+
if is_filtering_by_score:
|
| 32 |
+
inds = valid_inds[inds]
|
| 33 |
+
return inds
|
| 34 |
+
|
| 35 |
+
@staticmethod
|
| 36 |
+
def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold,
|
| 37 |
+
max_num):
|
| 38 |
+
from ..onnx import is_custom_op_loaded
|
| 39 |
+
has_custom_op = is_custom_op_loaded()
|
| 40 |
+
# TensorRT nms plugin is aligned with original nms in ONNXRuntime
|
| 41 |
+
is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'
|
| 42 |
+
if has_custom_op and (not is_trt_backend):
|
| 43 |
+
return g.op(
|
| 44 |
+
'mmcv::NonMaxSuppression',
|
| 45 |
+
bboxes,
|
| 46 |
+
scores,
|
| 47 |
+
iou_threshold_f=float(iou_threshold),
|
| 48 |
+
offset_i=int(offset))
|
| 49 |
+
else:
|
| 50 |
+
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
|
| 51 |
+
from ..onnx.onnx_utils.symbolic_helper import _size_helper
|
| 52 |
+
|
| 53 |
+
boxes = unsqueeze(g, bboxes, 0)
|
| 54 |
+
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
|
| 55 |
+
|
| 56 |
+
if max_num > 0:
|
| 57 |
+
max_num = g.op(
|
| 58 |
+
'Constant',
|
| 59 |
+
value_t=torch.tensor(max_num, dtype=torch.long))
|
| 60 |
+
else:
|
| 61 |
+
dim = g.op('Constant', value_t=torch.tensor(0))
|
| 62 |
+
max_num = _size_helper(g, bboxes, dim)
|
| 63 |
+
max_output_per_class = max_num
|
| 64 |
+
iou_threshold = g.op(
|
| 65 |
+
'Constant',
|
| 66 |
+
value_t=torch.tensor([iou_threshold], dtype=torch.float))
|
| 67 |
+
score_threshold = g.op(
|
| 68 |
+
'Constant',
|
| 69 |
+
value_t=torch.tensor([score_threshold], dtype=torch.float))
|
| 70 |
+
nms_out = g.op('NonMaxSuppression', boxes, scores,
|
| 71 |
+
max_output_per_class, iou_threshold,
|
| 72 |
+
score_threshold)
|
| 73 |
+
return squeeze(
|
| 74 |
+
g,
|
| 75 |
+
select(
|
| 76 |
+
g, nms_out, 1,
|
| 77 |
+
g.op(
|
| 78 |
+
'Constant',
|
| 79 |
+
value_t=torch.tensor([2], dtype=torch.long))), 1)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class SoftNMSop(torch.autograd.Function):
|
| 83 |
+
|
| 84 |
+
@staticmethod
|
| 85 |
+
def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
|
| 86 |
+
offset):
|
| 87 |
+
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
|
| 88 |
+
inds = ext_module.softnms(
|
| 89 |
+
boxes.cpu(),
|
| 90 |
+
scores.cpu(),
|
| 91 |
+
dets.cpu(),
|
| 92 |
+
iou_threshold=float(iou_threshold),
|
| 93 |
+
sigma=float(sigma),
|
| 94 |
+
min_score=float(min_score),
|
| 95 |
+
method=int(method),
|
| 96 |
+
offset=int(offset))
|
| 97 |
+
return dets, inds
|
| 98 |
+
|
| 99 |
+
@staticmethod
|
| 100 |
+
def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method,
|
| 101 |
+
offset):
|
| 102 |
+
from packaging import version
|
| 103 |
+
assert version.parse(torch.__version__) >= version.parse('1.7.0')
|
| 104 |
+
nms_out = g.op(
|
| 105 |
+
'mmcv::SoftNonMaxSuppression',
|
| 106 |
+
boxes,
|
| 107 |
+
scores,
|
| 108 |
+
iou_threshold_f=float(iou_threshold),
|
| 109 |
+
sigma_f=float(sigma),
|
| 110 |
+
min_score_f=float(min_score),
|
| 111 |
+
method_i=int(method),
|
| 112 |
+
offset_i=int(offset),
|
| 113 |
+
outputs=2)
|
| 114 |
+
return nms_out
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
|
| 118 |
+
def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
|
| 119 |
+
"""Dispatch to either CPU or GPU NMS implementations.
|
| 120 |
+
|
| 121 |
+
The input can be either torch tensor or numpy array. GPU NMS will be used
|
| 122 |
+
if the input is gpu tensor, otherwise CPU NMS
|
| 123 |
+
will be used. The returned type will always be the same as inputs.
|
| 124 |
+
|
| 125 |
+
Arguments:
|
| 126 |
+
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
|
| 127 |
+
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
|
| 128 |
+
iou_threshold (float): IoU threshold for NMS.
|
| 129 |
+
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
|
| 130 |
+
score_threshold (float): score threshold for NMS.
|
| 131 |
+
max_num (int): maximum number of boxes after NMS.
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
tuple: kept dets(boxes and scores) and indice, which is always the \
|
| 135 |
+
same data type as the input.
|
| 136 |
+
|
| 137 |
+
Example:
|
| 138 |
+
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
|
| 139 |
+
>>> [49.3, 32.9, 51.0, 35.3],
|
| 140 |
+
>>> [49.2, 31.8, 51.0, 35.4],
|
| 141 |
+
>>> [35.1, 11.5, 39.1, 15.7],
|
| 142 |
+
>>> [35.6, 11.8, 39.3, 14.2],
|
| 143 |
+
>>> [35.3, 11.5, 39.9, 14.5],
|
| 144 |
+
>>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32)
|
| 145 |
+
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\
|
| 146 |
+
dtype=np.float32)
|
| 147 |
+
>>> iou_threshold = 0.6
|
| 148 |
+
>>> dets, inds = nms(boxes, scores, iou_threshold)
|
| 149 |
+
>>> assert len(inds) == len(dets) == 3
|
| 150 |
+
"""
|
| 151 |
+
assert isinstance(boxes, (torch.Tensor, np.ndarray))
|
| 152 |
+
assert isinstance(scores, (torch.Tensor, np.ndarray))
|
| 153 |
+
is_numpy = False
|
| 154 |
+
if isinstance(boxes, np.ndarray):
|
| 155 |
+
is_numpy = True
|
| 156 |
+
boxes = torch.from_numpy(boxes)
|
| 157 |
+
if isinstance(scores, np.ndarray):
|
| 158 |
+
scores = torch.from_numpy(scores)
|
| 159 |
+
assert boxes.size(1) == 4
|
| 160 |
+
assert boxes.size(0) == scores.size(0)
|
| 161 |
+
assert offset in (0, 1)
|
| 162 |
+
|
| 163 |
+
if torch.__version__ == 'parrots':
|
| 164 |
+
indata_list = [boxes, scores]
|
| 165 |
+
indata_dict = {
|
| 166 |
+
'iou_threshold': float(iou_threshold),
|
| 167 |
+
'offset': int(offset)
|
| 168 |
+
}
|
| 169 |
+
inds = ext_module.nms(*indata_list, **indata_dict)
|
| 170 |
+
else:
|
| 171 |
+
inds = NMSop.apply(boxes, scores, iou_threshold, offset,
|
| 172 |
+
score_threshold, max_num)
|
| 173 |
+
dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
|
| 174 |
+
if is_numpy:
|
| 175 |
+
dets = dets.cpu().numpy()
|
| 176 |
+
inds = inds.cpu().numpy()
|
| 177 |
+
return dets, inds
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
|
| 181 |
+
def soft_nms(boxes,
|
| 182 |
+
scores,
|
| 183 |
+
iou_threshold=0.3,
|
| 184 |
+
sigma=0.5,
|
| 185 |
+
min_score=1e-3,
|
| 186 |
+
method='linear',
|
| 187 |
+
offset=0):
|
| 188 |
+
"""Dispatch to only CPU Soft NMS implementations.
|
| 189 |
+
|
| 190 |
+
The input can be either a torch tensor or numpy array.
|
| 191 |
+
The returned type will always be the same as inputs.
|
| 192 |
+
|
| 193 |
+
Arguments:
|
| 194 |
+
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
|
| 195 |
+
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
|
| 196 |
+
iou_threshold (float): IoU threshold for NMS.
|
| 197 |
+
sigma (float): hyperparameter for gaussian method
|
| 198 |
+
min_score (float): score filter threshold
|
| 199 |
+
method (str): either 'linear' or 'gaussian'
|
| 200 |
+
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
tuple: kept dets(boxes and scores) and indice, which is always the \
|
| 204 |
+
same data type as the input.
|
| 205 |
+
|
| 206 |
+
Example:
|
| 207 |
+
>>> boxes = np.array([[4., 3., 5., 3.],
|
| 208 |
+
>>> [4., 3., 5., 4.],
|
| 209 |
+
>>> [3., 1., 3., 1.],
|
| 210 |
+
>>> [3., 1., 3., 1.],
|
| 211 |
+
>>> [3., 1., 3., 1.],
|
| 212 |
+
>>> [3., 1., 3., 1.]], dtype=np.float32)
|
| 213 |
+
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32)
|
| 214 |
+
>>> iou_threshold = 0.6
|
| 215 |
+
>>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5)
|
| 216 |
+
>>> assert len(inds) == len(dets) == 5
|
| 217 |
+
"""
|
| 218 |
+
|
| 219 |
+
assert isinstance(boxes, (torch.Tensor, np.ndarray))
|
| 220 |
+
assert isinstance(scores, (torch.Tensor, np.ndarray))
|
| 221 |
+
is_numpy = False
|
| 222 |
+
if isinstance(boxes, np.ndarray):
|
| 223 |
+
is_numpy = True
|
| 224 |
+
boxes = torch.from_numpy(boxes)
|
| 225 |
+
if isinstance(scores, np.ndarray):
|
| 226 |
+
scores = torch.from_numpy(scores)
|
| 227 |
+
assert boxes.size(1) == 4
|
| 228 |
+
assert boxes.size(0) == scores.size(0)
|
| 229 |
+
assert offset in (0, 1)
|
| 230 |
+
method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
|
| 231 |
+
assert method in method_dict.keys()
|
| 232 |
+
|
| 233 |
+
if torch.__version__ == 'parrots':
|
| 234 |
+
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
|
| 235 |
+
indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()]
|
| 236 |
+
indata_dict = {
|
| 237 |
+
'iou_threshold': float(iou_threshold),
|
| 238 |
+
'sigma': float(sigma),
|
| 239 |
+
'min_score': min_score,
|
| 240 |
+
'method': method_dict[method],
|
| 241 |
+
'offset': int(offset)
|
| 242 |
+
}
|
| 243 |
+
inds = ext_module.softnms(*indata_list, **indata_dict)
|
| 244 |
+
else:
|
| 245 |
+
dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
|
| 246 |
+
float(iou_threshold), float(sigma),
|
| 247 |
+
float(min_score), method_dict[method],
|
| 248 |
+
int(offset))
|
| 249 |
+
|
| 250 |
+
dets = dets[:inds.size(0)]
|
| 251 |
+
|
| 252 |
+
if is_numpy:
|
| 253 |
+
dets = dets.cpu().numpy()
|
| 254 |
+
inds = inds.cpu().numpy()
|
| 255 |
+
return dets, inds
|
| 256 |
+
else:
|
| 257 |
+
return dets.to(device=boxes.device), inds.to(device=boxes.device)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
|
| 261 |
+
"""Performs non-maximum suppression in a batched fashion.
|
| 262 |
+
|
| 263 |
+
Modified from https://github.com/pytorch/vision/blob
|
| 264 |
+
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
|
| 265 |
+
In order to perform NMS independently per class, we add an offset to all
|
| 266 |
+
the boxes. The offset is dependent only on the class idx, and is large
|
| 267 |
+
enough so that boxes from different classes do not overlap.
|
| 268 |
+
|
| 269 |
+
Arguments:
|
| 270 |
+
boxes (torch.Tensor): boxes in shape (N, 4).
|
| 271 |
+
scores (torch.Tensor): scores in shape (N, ).
|
| 272 |
+
idxs (torch.Tensor): each index value correspond to a bbox cluster,
|
| 273 |
+
and NMS will not be applied between elements of different idxs,
|
| 274 |
+
shape (N, ).
|
| 275 |
+
nms_cfg (dict): specify nms type and other parameters like iou_thr.
|
| 276 |
+
Possible keys includes the following.
|
| 277 |
+
|
| 278 |
+
- iou_thr (float): IoU threshold used for NMS.
|
| 279 |
+
- split_thr (float): threshold number of boxes. In some cases the
|
| 280 |
+
number of boxes is large (e.g., 200k). To avoid OOM during
|
| 281 |
+
training, the users could set `split_thr` to a small value.
|
| 282 |
+
If the number of boxes is greater than the threshold, it will
|
| 283 |
+
perform NMS on each group of boxes separately and sequentially.
|
| 284 |
+
Defaults to 10000.
|
| 285 |
+
class_agnostic (bool): if true, nms is class agnostic,
|
| 286 |
+
i.e. IoU thresholding happens over all boxes,
|
| 287 |
+
regardless of the predicted class.
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
tuple: kept dets and indice.
|
| 291 |
+
"""
|
| 292 |
+
nms_cfg_ = nms_cfg.copy()
|
| 293 |
+
class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
|
| 294 |
+
if class_agnostic:
|
| 295 |
+
boxes_for_nms = boxes
|
| 296 |
+
else:
|
| 297 |
+
max_coordinate = boxes.max()
|
| 298 |
+
offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
|
| 299 |
+
boxes_for_nms = boxes + offsets[:, None]
|
| 300 |
+
|
| 301 |
+
nms_type = nms_cfg_.pop('type', 'nms')
|
| 302 |
+
nms_op = eval(nms_type)
|
| 303 |
+
|
| 304 |
+
split_thr = nms_cfg_.pop('split_thr', 10000)
|
| 305 |
+
# Won't split to multiple nms nodes when exporting to onnx
|
| 306 |
+
if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
|
| 307 |
+
dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
|
| 308 |
+
boxes = boxes[keep]
|
| 309 |
+
# -1 indexing works abnormal in TensorRT
|
| 310 |
+
# This assumes `dets` has 5 dimensions where
|
| 311 |
+
# the last dimension is score.
|
| 312 |
+
# TODO: more elegant way to handle the dimension issue.
|
| 313 |
+
# Some type of nms would reweight the score, such as SoftNMS
|
| 314 |
+
scores = dets[:, 4]
|
| 315 |
+
else:
|
| 316 |
+
max_num = nms_cfg_.pop('max_num', -1)
|
| 317 |
+
total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
|
| 318 |
+
# Some type of nms would reweight the score, such as SoftNMS
|
| 319 |
+
scores_after_nms = scores.new_zeros(scores.size())
|
| 320 |
+
for id in torch.unique(idxs):
|
| 321 |
+
mask = (idxs == id).nonzero(as_tuple=False).view(-1)
|
| 322 |
+
dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_)
|
| 323 |
+
total_mask[mask[keep]] = True
|
| 324 |
+
scores_after_nms[mask[keep]] = dets[:, -1]
|
| 325 |
+
keep = total_mask.nonzero(as_tuple=False).view(-1)
|
| 326 |
+
|
| 327 |
+
scores, inds = scores_after_nms[keep].sort(descending=True)
|
| 328 |
+
keep = keep[inds]
|
| 329 |
+
boxes = boxes[keep]
|
| 330 |
+
|
| 331 |
+
if max_num > 0:
|
| 332 |
+
keep = keep[:max_num]
|
| 333 |
+
boxes = boxes[:max_num]
|
| 334 |
+
scores = scores[:max_num]
|
| 335 |
+
|
| 336 |
+
return torch.cat([boxes, scores[:, None]], -1), keep
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def nms_match(dets, iou_threshold):
|
| 340 |
+
"""Matched dets into different groups by NMS.
|
| 341 |
+
|
| 342 |
+
NMS match is Similar to NMS but when a bbox is suppressed, nms match will
|
| 343 |
+
record the indice of suppressed bbox and form a group with the indice of
|
| 344 |
+
kept bbox. In each group, indice is sorted as score order.
|
| 345 |
+
|
| 346 |
+
Arguments:
|
| 347 |
+
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
|
| 348 |
+
iou_thr (float): IoU thresh for NMS.
|
| 349 |
+
|
| 350 |
+
Returns:
|
| 351 |
+
List[torch.Tensor | np.ndarray]: The outer list corresponds different
|
| 352 |
+
matched group, the inner Tensor corresponds the indices for a group
|
| 353 |
+
in score order.
|
| 354 |
+
"""
|
| 355 |
+
if dets.shape[0] == 0:
|
| 356 |
+
matched = []
|
| 357 |
+
else:
|
| 358 |
+
assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
|
| 359 |
+
f'but get {dets.shape}'
|
| 360 |
+
if isinstance(dets, torch.Tensor):
|
| 361 |
+
dets_t = dets.detach().cpu()
|
| 362 |
+
else:
|
| 363 |
+
dets_t = torch.from_numpy(dets)
|
| 364 |
+
indata_list = [dets_t]
|
| 365 |
+
indata_dict = {'iou_threshold': float(iou_threshold)}
|
| 366 |
+
matched = ext_module.nms_match(*indata_list, **indata_dict)
|
| 367 |
+
if torch.__version__ == 'parrots':
|
| 368 |
+
matched = matched.tolist()
|
| 369 |
+
|
| 370 |
+
if isinstance(dets, torch.Tensor):
|
| 371 |
+
return [dets.new_tensor(m, dtype=torch.long) for m in matched]
|
| 372 |
+
else:
|
| 373 |
+
return [np.array(m, dtype=np.int) for m in matched]
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def nms_rotated(dets, scores, iou_threshold, labels=None):
|
| 377 |
+
"""Performs non-maximum suppression (NMS) on the rotated boxes according to
|
| 378 |
+
their intersection-over-union (IoU).
|
| 379 |
+
|
| 380 |
+
Rotated NMS iteratively removes lower scoring rotated boxes which have an
|
| 381 |
+
IoU greater than iou_threshold with another (higher scoring) rotated box.
|
| 382 |
+
|
| 383 |
+
Args:
|
| 384 |
+
boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \
|
| 385 |
+
be in (x_ctr, y_ctr, width, height, angle_radian) format.
|
| 386 |
+
scores (Tensor): scores in shape (N, ).
|
| 387 |
+
iou_threshold (float): IoU thresh for NMS.
|
| 388 |
+
labels (Tensor): boxes' label in shape (N,).
|
| 389 |
+
|
| 390 |
+
Returns:
|
| 391 |
+
tuple: kept dets(boxes and scores) and indice, which is always the \
|
| 392 |
+
same data type as the input.
|
| 393 |
+
"""
|
| 394 |
+
if dets.shape[0] == 0:
|
| 395 |
+
return dets, None
|
| 396 |
+
multi_label = labels is not None
|
| 397 |
+
if multi_label:
|
| 398 |
+
dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1)
|
| 399 |
+
else:
|
| 400 |
+
dets_wl = dets
|
| 401 |
+
_, order = scores.sort(0, descending=True)
|
| 402 |
+
dets_sorted = dets_wl.index_select(0, order)
|
| 403 |
+
|
| 404 |
+
if torch.__version__ == 'parrots':
|
| 405 |
+
keep_inds = ext_module.nms_rotated(
|
| 406 |
+
dets_wl,
|
| 407 |
+
scores,
|
| 408 |
+
order,
|
| 409 |
+
dets_sorted,
|
| 410 |
+
iou_threshold=iou_threshold,
|
| 411 |
+
multi_label=multi_label)
|
| 412 |
+
else:
|
| 413 |
+
keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
|
| 414 |
+
iou_threshold, multi_label)
|
| 415 |
+
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
|
| 416 |
+
dim=1)
|
| 417 |
+
return dets, keep_inds
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/pixel_group.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from ..utils import ext_loader
|
| 6 |
+
|
| 7 |
+
ext_module = ext_loader.load_ext('_ext', ['pixel_group'])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
|
| 11 |
+
kernel_region_num, distance_threshold):
|
| 12 |
+
"""Group pixels into text instances, which is widely used text detection
|
| 13 |
+
methods.
|
| 14 |
+
|
| 15 |
+
Arguments:
|
| 16 |
+
score (np.array or Tensor): The foreground score with size hxw.
|
| 17 |
+
mask (np.array or Tensor): The foreground mask with size hxw.
|
| 18 |
+
embedding (np.array or Tensor): The embedding with size hxwxc to
|
| 19 |
+
distinguish instances.
|
| 20 |
+
kernel_label (np.array or Tensor): The instance kernel index with
|
| 21 |
+
size hxw.
|
| 22 |
+
kernel_contour (np.array or Tensor): The kernel contour with size hxw.
|
| 23 |
+
kernel_region_num (int): The instance kernel region number.
|
| 24 |
+
distance_threshold (float): The embedding distance threshold between
|
| 25 |
+
kernel and pixel in one instance.
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
pixel_assignment (List[List[float]]): The instance coordinate list.
|
| 29 |
+
Each element consists of averaged confidence, pixel number, and
|
| 30 |
+
coordinates (x_i, y_i for all pixels) in order.
|
| 31 |
+
"""
|
| 32 |
+
assert isinstance(score, (torch.Tensor, np.ndarray))
|
| 33 |
+
assert isinstance(mask, (torch.Tensor, np.ndarray))
|
| 34 |
+
assert isinstance(embedding, (torch.Tensor, np.ndarray))
|
| 35 |
+
assert isinstance(kernel_label, (torch.Tensor, np.ndarray))
|
| 36 |
+
assert isinstance(kernel_contour, (torch.Tensor, np.ndarray))
|
| 37 |
+
assert isinstance(kernel_region_num, int)
|
| 38 |
+
assert isinstance(distance_threshold, float)
|
| 39 |
+
|
| 40 |
+
if isinstance(score, np.ndarray):
|
| 41 |
+
score = torch.from_numpy(score)
|
| 42 |
+
if isinstance(mask, np.ndarray):
|
| 43 |
+
mask = torch.from_numpy(mask)
|
| 44 |
+
if isinstance(embedding, np.ndarray):
|
| 45 |
+
embedding = torch.from_numpy(embedding)
|
| 46 |
+
if isinstance(kernel_label, np.ndarray):
|
| 47 |
+
kernel_label = torch.from_numpy(kernel_label)
|
| 48 |
+
if isinstance(kernel_contour, np.ndarray):
|
| 49 |
+
kernel_contour = torch.from_numpy(kernel_contour)
|
| 50 |
+
|
| 51 |
+
if torch.__version__ == 'parrots':
|
| 52 |
+
label = ext_module.pixel_group(
|
| 53 |
+
score,
|
| 54 |
+
mask,
|
| 55 |
+
embedding,
|
| 56 |
+
kernel_label,
|
| 57 |
+
kernel_contour,
|
| 58 |
+
kernel_region_num=kernel_region_num,
|
| 59 |
+
distance_threshold=distance_threshold)
|
| 60 |
+
label = label.tolist()
|
| 61 |
+
label = label[0]
|
| 62 |
+
list_index = kernel_region_num
|
| 63 |
+
pixel_assignment = []
|
| 64 |
+
for x in range(kernel_region_num):
|
| 65 |
+
pixel_assignment.append(
|
| 66 |
+
np.array(
|
| 67 |
+
label[list_index:list_index + int(label[x])],
|
| 68 |
+
dtype=np.float))
|
| 69 |
+
list_index = list_index + int(label[x])
|
| 70 |
+
else:
|
| 71 |
+
pixel_assignment = ext_module.pixel_group(score, mask, embedding,
|
| 72 |
+
kernel_label, kernel_contour,
|
| 73 |
+
kernel_region_num,
|
| 74 |
+
distance_threshold)
|
| 75 |
+
return pixel_assignment
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/point_sample.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
|
| 2 |
+
|
| 3 |
+
from os import path as osp
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
import torch.nn as nn
|
| 7 |
+
import torch.nn.functional as F
|
| 8 |
+
from torch.nn.modules.utils import _pair
|
| 9 |
+
from torch.onnx.operators import shape_as_tensor
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def bilinear_grid_sample(im, grid, align_corners=False):
|
| 13 |
+
"""Given an input and a flow-field grid, computes the output using input
|
| 14 |
+
values and pixel locations from grid. Supported only bilinear interpolation
|
| 15 |
+
method to sample the input pixels.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
im (torch.Tensor): Input feature map, shape (N, C, H, W)
|
| 19 |
+
grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
|
| 20 |
+
align_corners {bool}: If set to True, the extrema (-1 and 1) are
|
| 21 |
+
considered as referring to the center points of the input’s
|
| 22 |
+
corner pixels. If set to False, they are instead considered as
|
| 23 |
+
referring to the corner points of the input’s corner pixels,
|
| 24 |
+
making the sampling more resolution agnostic.
|
| 25 |
+
Returns:
|
| 26 |
+
torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
|
| 27 |
+
"""
|
| 28 |
+
n, c, h, w = im.shape
|
| 29 |
+
gn, gh, gw, _ = grid.shape
|
| 30 |
+
assert n == gn
|
| 31 |
+
|
| 32 |
+
x = grid[:, :, :, 0]
|
| 33 |
+
y = grid[:, :, :, 1]
|
| 34 |
+
|
| 35 |
+
if align_corners:
|
| 36 |
+
x = ((x + 1) / 2) * (w - 1)
|
| 37 |
+
y = ((y + 1) / 2) * (h - 1)
|
| 38 |
+
else:
|
| 39 |
+
x = ((x + 1) * w - 1) / 2
|
| 40 |
+
y = ((y + 1) * h - 1) / 2
|
| 41 |
+
|
| 42 |
+
x = x.view(n, -1)
|
| 43 |
+
y = y.view(n, -1)
|
| 44 |
+
|
| 45 |
+
x0 = torch.floor(x).long()
|
| 46 |
+
y0 = torch.floor(y).long()
|
| 47 |
+
x1 = x0 + 1
|
| 48 |
+
y1 = y0 + 1
|
| 49 |
+
|
| 50 |
+
wa = ((x1 - x) * (y1 - y)).unsqueeze(1)
|
| 51 |
+
wb = ((x1 - x) * (y - y0)).unsqueeze(1)
|
| 52 |
+
wc = ((x - x0) * (y1 - y)).unsqueeze(1)
|
| 53 |
+
wd = ((x - x0) * (y - y0)).unsqueeze(1)
|
| 54 |
+
|
| 55 |
+
# Apply default for grid_sample function zero padding
|
| 56 |
+
im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0)
|
| 57 |
+
padded_h = h + 2
|
| 58 |
+
padded_w = w + 2
|
| 59 |
+
# save points positions after padding
|
| 60 |
+
x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1
|
| 61 |
+
|
| 62 |
+
# Clip coordinates to padded image size
|
| 63 |
+
x0 = torch.where(x0 < 0, torch.tensor(0), x0)
|
| 64 |
+
x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0)
|
| 65 |
+
x1 = torch.where(x1 < 0, torch.tensor(0), x1)
|
| 66 |
+
x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1)
|
| 67 |
+
y0 = torch.where(y0 < 0, torch.tensor(0), y0)
|
| 68 |
+
y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0)
|
| 69 |
+
y1 = torch.where(y1 < 0, torch.tensor(0), y1)
|
| 70 |
+
y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1)
|
| 71 |
+
|
| 72 |
+
im_padded = im_padded.view(n, c, -1)
|
| 73 |
+
|
| 74 |
+
x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
| 75 |
+
x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
| 76 |
+
x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
| 77 |
+
x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
| 78 |
+
|
| 79 |
+
Ia = torch.gather(im_padded, 2, x0_y0)
|
| 80 |
+
Ib = torch.gather(im_padded, 2, x0_y1)
|
| 81 |
+
Ic = torch.gather(im_padded, 2, x1_y0)
|
| 82 |
+
Id = torch.gather(im_padded, 2, x1_y1)
|
| 83 |
+
|
| 84 |
+
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def is_in_onnx_export_without_custom_ops():
|
| 88 |
+
from annotator.mmpkg.mmcv.ops import get_onnxruntime_op_path
|
| 89 |
+
ort_custom_op_path = get_onnxruntime_op_path()
|
| 90 |
+
return torch.onnx.is_in_onnx_export(
|
| 91 |
+
) and not osp.exists(ort_custom_op_path)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def normalize(grid):
|
| 95 |
+
"""Normalize input grid from [-1, 1] to [0, 1]
|
| 96 |
+
Args:
|
| 97 |
+
grid (Tensor): The grid to be normalize, range [-1, 1].
|
| 98 |
+
Returns:
|
| 99 |
+
Tensor: Normalized grid, range [0, 1].
|
| 100 |
+
"""
|
| 101 |
+
|
| 102 |
+
return (grid + 1.0) / 2.0
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def denormalize(grid):
|
| 106 |
+
"""Denormalize input grid from range [0, 1] to [-1, 1]
|
| 107 |
+
Args:
|
| 108 |
+
grid (Tensor): The grid to be denormalize, range [0, 1].
|
| 109 |
+
Returns:
|
| 110 |
+
Tensor: Denormalized grid, range [-1, 1].
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
return grid * 2.0 - 1.0
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def generate_grid(num_grid, size, device):
|
| 117 |
+
"""Generate regular square grid of points in [0, 1] x [0, 1] coordinate
|
| 118 |
+
space.
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
num_grid (int): The number of grids to sample, one for each region.
|
| 122 |
+
size (tuple(int, int)): The side size of the regular grid.
|
| 123 |
+
device (torch.device): Desired device of returned tensor.
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
|
| 127 |
+
contains coordinates for the regular grids.
|
| 128 |
+
"""
|
| 129 |
+
|
| 130 |
+
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
|
| 131 |
+
grid = F.affine_grid(
|
| 132 |
+
affine_trans, torch.Size((1, 1, *size)), align_corners=False)
|
| 133 |
+
grid = normalize(grid)
|
| 134 |
+
return grid.view(1, -1, 2).expand(num_grid, -1, -1)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
|
| 138 |
+
"""Convert roi based relative point coordinates to image based absolute
|
| 139 |
+
point coordinates.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
|
| 143 |
+
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
|
| 144 |
+
RoI, location, range (0, 1), shape (N, P, 2)
|
| 145 |
+
Returns:
|
| 146 |
+
Tensor: Image based absolute point coordinates, shape (N, P, 2)
|
| 147 |
+
"""
|
| 148 |
+
|
| 149 |
+
with torch.no_grad():
|
| 150 |
+
assert rel_roi_points.size(0) == rois.size(0)
|
| 151 |
+
assert rois.dim() == 2
|
| 152 |
+
assert rel_roi_points.dim() == 3
|
| 153 |
+
assert rel_roi_points.size(2) == 2
|
| 154 |
+
# remove batch idx
|
| 155 |
+
if rois.size(1) == 5:
|
| 156 |
+
rois = rois[:, 1:]
|
| 157 |
+
abs_img_points = rel_roi_points.clone()
|
| 158 |
+
# To avoid an error during exporting to onnx use independent
|
| 159 |
+
# variables instead inplace computation
|
| 160 |
+
xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0])
|
| 161 |
+
ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1])
|
| 162 |
+
xs += rois[:, None, 0]
|
| 163 |
+
ys += rois[:, None, 1]
|
| 164 |
+
abs_img_points = torch.stack([xs, ys], dim=2)
|
| 165 |
+
return abs_img_points
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def get_shape_from_feature_map(x):
|
| 169 |
+
"""Get spatial resolution of input feature map considering exporting to
|
| 170 |
+
onnx mode.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
x (torch.Tensor): Input tensor, shape (N, C, H, W)
|
| 174 |
+
Returns:
|
| 175 |
+
torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
|
| 176 |
+
"""
|
| 177 |
+
if torch.onnx.is_in_onnx_export():
|
| 178 |
+
img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(
|
| 179 |
+
x.device).float()
|
| 180 |
+
else:
|
| 181 |
+
img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(
|
| 182 |
+
x.device).float()
|
| 183 |
+
return img_shape
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
|
| 187 |
+
"""Convert image based absolute point coordinates to image based relative
|
| 188 |
+
coordinates for sampling.
|
| 189 |
+
|
| 190 |
+
Args:
|
| 191 |
+
abs_img_points (Tensor): Image based absolute point coordinates,
|
| 192 |
+
shape (N, P, 2)
|
| 193 |
+
img (tuple/Tensor): (height, width) of image or feature map.
|
| 194 |
+
spatial_scale (float): Scale points by this factor. Default: 1.
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
Tensor: Image based relative point coordinates for sampling,
|
| 198 |
+
shape (N, P, 2)
|
| 199 |
+
"""
|
| 200 |
+
|
| 201 |
+
assert (isinstance(img, tuple) and len(img) == 2) or \
|
| 202 |
+
(isinstance(img, torch.Tensor) and len(img.shape) == 4)
|
| 203 |
+
|
| 204 |
+
if isinstance(img, tuple):
|
| 205 |
+
h, w = img
|
| 206 |
+
scale = torch.tensor([w, h],
|
| 207 |
+
dtype=torch.float,
|
| 208 |
+
device=abs_img_points.device)
|
| 209 |
+
scale = scale.view(1, 1, 2)
|
| 210 |
+
else:
|
| 211 |
+
scale = get_shape_from_feature_map(img)
|
| 212 |
+
|
| 213 |
+
return abs_img_points / scale * spatial_scale
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def rel_roi_point_to_rel_img_point(rois,
|
| 217 |
+
rel_roi_points,
|
| 218 |
+
img,
|
| 219 |
+
spatial_scale=1.):
|
| 220 |
+
"""Convert roi based relative point coordinates to image based absolute
|
| 221 |
+
point coordinates.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
|
| 225 |
+
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
|
| 226 |
+
RoI, location, range (0, 1), shape (N, P, 2)
|
| 227 |
+
img (tuple/Tensor): (height, width) of image or feature map.
|
| 228 |
+
spatial_scale (float): Scale points by this factor. Default: 1.
|
| 229 |
+
|
| 230 |
+
Returns:
|
| 231 |
+
Tensor: Image based relative point coordinates for sampling,
|
| 232 |
+
shape (N, P, 2)
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
|
| 236 |
+
rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img,
|
| 237 |
+
spatial_scale)
|
| 238 |
+
|
| 239 |
+
return rel_img_point
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def point_sample(input, points, align_corners=False, **kwargs):
|
| 243 |
+
"""A wrapper around :func:`grid_sample` to support 3D point_coords tensors
|
| 244 |
+
Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
|
| 245 |
+
lie inside ``[0, 1] x [0, 1]`` square.
|
| 246 |
+
|
| 247 |
+
Args:
|
| 248 |
+
input (Tensor): Feature map, shape (N, C, H, W).
|
| 249 |
+
points (Tensor): Image based absolute point coordinates (normalized),
|
| 250 |
+
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
|
| 251 |
+
align_corners (bool): Whether align_corners. Default: False
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
Tensor: Features of `point` on `input`, shape (N, C, P) or
|
| 255 |
+
(N, C, Hgrid, Wgrid).
|
| 256 |
+
"""
|
| 257 |
+
|
| 258 |
+
add_dim = False
|
| 259 |
+
if points.dim() == 3:
|
| 260 |
+
add_dim = True
|
| 261 |
+
points = points.unsqueeze(2)
|
| 262 |
+
if is_in_onnx_export_without_custom_ops():
|
| 263 |
+
# If custom ops for onnx runtime not compiled use python
|
| 264 |
+
# implementation of grid_sample function to make onnx graph
|
| 265 |
+
# with supported nodes
|
| 266 |
+
output = bilinear_grid_sample(
|
| 267 |
+
input, denormalize(points), align_corners=align_corners)
|
| 268 |
+
else:
|
| 269 |
+
output = F.grid_sample(
|
| 270 |
+
input, denormalize(points), align_corners=align_corners, **kwargs)
|
| 271 |
+
if add_dim:
|
| 272 |
+
output = output.squeeze(3)
|
| 273 |
+
return output
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
class SimpleRoIAlign(nn.Module):
|
| 277 |
+
|
| 278 |
+
def __init__(self, output_size, spatial_scale, aligned=True):
|
| 279 |
+
"""Simple RoI align in PointRend, faster than standard RoIAlign.
|
| 280 |
+
|
| 281 |
+
Args:
|
| 282 |
+
output_size (tuple[int]): h, w
|
| 283 |
+
spatial_scale (float): scale the input boxes by this number
|
| 284 |
+
aligned (bool): if False, use the legacy implementation in
|
| 285 |
+
MMDetection, align_corners=True will be used in F.grid_sample.
|
| 286 |
+
If True, align the results more perfectly.
|
| 287 |
+
"""
|
| 288 |
+
|
| 289 |
+
super(SimpleRoIAlign, self).__init__()
|
| 290 |
+
self.output_size = _pair(output_size)
|
| 291 |
+
self.spatial_scale = float(spatial_scale)
|
| 292 |
+
# to be consistent with other RoI ops
|
| 293 |
+
self.use_torchvision = False
|
| 294 |
+
self.aligned = aligned
|
| 295 |
+
|
| 296 |
+
def forward(self, features, rois):
|
| 297 |
+
num_imgs = features.size(0)
|
| 298 |
+
num_rois = rois.size(0)
|
| 299 |
+
rel_roi_points = generate_grid(
|
| 300 |
+
num_rois, self.output_size, device=rois.device)
|
| 301 |
+
|
| 302 |
+
if torch.onnx.is_in_onnx_export():
|
| 303 |
+
rel_img_points = rel_roi_point_to_rel_img_point(
|
| 304 |
+
rois, rel_roi_points, features, self.spatial_scale)
|
| 305 |
+
rel_img_points = rel_img_points.reshape(num_imgs, -1,
|
| 306 |
+
*rel_img_points.shape[1:])
|
| 307 |
+
point_feats = point_sample(
|
| 308 |
+
features, rel_img_points, align_corners=not self.aligned)
|
| 309 |
+
point_feats = point_feats.transpose(1, 2)
|
| 310 |
+
else:
|
| 311 |
+
point_feats = []
|
| 312 |
+
for batch_ind in range(num_imgs):
|
| 313 |
+
# unravel batch dim
|
| 314 |
+
feat = features[batch_ind].unsqueeze(0)
|
| 315 |
+
inds = (rois[:, 0].long() == batch_ind)
|
| 316 |
+
if inds.any():
|
| 317 |
+
rel_img_points = rel_roi_point_to_rel_img_point(
|
| 318 |
+
rois[inds], rel_roi_points[inds], feat,
|
| 319 |
+
self.spatial_scale).unsqueeze(0)
|
| 320 |
+
point_feat = point_sample(
|
| 321 |
+
feat, rel_img_points, align_corners=not self.aligned)
|
| 322 |
+
point_feat = point_feat.squeeze(0).transpose(0, 1)
|
| 323 |
+
point_feats.append(point_feat)
|
| 324 |
+
|
| 325 |
+
point_feats = torch.cat(point_feats, dim=0)
|
| 326 |
+
|
| 327 |
+
channels = features.size(1)
|
| 328 |
+
roi_feats = point_feats.reshape(num_rois, channels, *self.output_size)
|
| 329 |
+
|
| 330 |
+
return roi_feats
|
| 331 |
+
|
| 332 |
+
def __repr__(self):
|
| 333 |
+
format_str = self.__class__.__name__
|
| 334 |
+
format_str += '(output_size={}, spatial_scale={}'.format(
|
| 335 |
+
self.output_size, self.spatial_scale)
|
| 336 |
+
return format_str
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_in_boxes.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
from ..utils import ext_loader
|
| 4 |
+
|
| 5 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 6 |
+
'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward',
|
| 7 |
+
'points_in_boxes_all_forward'
|
| 8 |
+
])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def points_in_boxes_part(points, boxes):
|
| 12 |
+
"""Find the box in which each point is (CUDA).
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
|
| 16 |
+
boxes (torch.Tensor): [B, T, 7],
|
| 17 |
+
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
|
| 18 |
+
LiDAR/DEPTH coordinate, (x, y, z) is the bottom center
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
|
| 22 |
+
"""
|
| 23 |
+
assert points.shape[0] == boxes.shape[0], \
|
| 24 |
+
'Points and boxes should have the same batch size, ' \
|
| 25 |
+
f'but got {points.shape[0]} and {boxes.shape[0]}'
|
| 26 |
+
assert boxes.shape[2] == 7, \
|
| 27 |
+
'boxes dimension should be 7, ' \
|
| 28 |
+
f'but got unexpected shape {boxes.shape[2]}'
|
| 29 |
+
assert points.shape[2] == 3, \
|
| 30 |
+
'points dimension should be 3, ' \
|
| 31 |
+
f'but got unexpected shape {points.shape[2]}'
|
| 32 |
+
batch_size, num_points, _ = points.shape
|
| 33 |
+
|
| 34 |
+
box_idxs_of_pts = points.new_zeros((batch_size, num_points),
|
| 35 |
+
dtype=torch.int).fill_(-1)
|
| 36 |
+
|
| 37 |
+
# If manually put the tensor 'points' or 'boxes' on a device
|
| 38 |
+
# which is not the current device, some temporary variables
|
| 39 |
+
# will be created on the current device in the cuda op,
|
| 40 |
+
# and the output will be incorrect.
|
| 41 |
+
# Therefore, we force the current device to be the same
|
| 42 |
+
# as the device of the tensors if it was not.
|
| 43 |
+
# Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305
|
| 44 |
+
# for the incorrect output before the fix.
|
| 45 |
+
points_device = points.get_device()
|
| 46 |
+
assert points_device == boxes.get_device(), \
|
| 47 |
+
'Points and boxes should be put on the same device'
|
| 48 |
+
if torch.cuda.current_device() != points_device:
|
| 49 |
+
torch.cuda.set_device(points_device)
|
| 50 |
+
|
| 51 |
+
ext_module.points_in_boxes_part_forward(boxes.contiguous(),
|
| 52 |
+
points.contiguous(),
|
| 53 |
+
box_idxs_of_pts)
|
| 54 |
+
|
| 55 |
+
return box_idxs_of_pts
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def points_in_boxes_cpu(points, boxes):
|
| 59 |
+
"""Find all boxes in which each point is (CPU). The CPU version of
|
| 60 |
+
:meth:`points_in_boxes_all`.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
points (torch.Tensor): [B, M, 3], [x, y, z] in
|
| 64 |
+
LiDAR/DEPTH coordinate
|
| 65 |
+
boxes (torch.Tensor): [B, T, 7],
|
| 66 |
+
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
|
| 67 |
+
(x, y, z) is the bottom center.
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
|
| 71 |
+
"""
|
| 72 |
+
assert points.shape[0] == boxes.shape[0], \
|
| 73 |
+
'Points and boxes should have the same batch size, ' \
|
| 74 |
+
f'but got {points.shape[0]} and {boxes.shape[0]}'
|
| 75 |
+
assert boxes.shape[2] == 7, \
|
| 76 |
+
'boxes dimension should be 7, ' \
|
| 77 |
+
f'but got unexpected shape {boxes.shape[2]}'
|
| 78 |
+
assert points.shape[2] == 3, \
|
| 79 |
+
'points dimension should be 3, ' \
|
| 80 |
+
f'but got unexpected shape {points.shape[2]}'
|
| 81 |
+
batch_size, num_points, _ = points.shape
|
| 82 |
+
num_boxes = boxes.shape[1]
|
| 83 |
+
|
| 84 |
+
point_indices = points.new_zeros((batch_size, num_boxes, num_points),
|
| 85 |
+
dtype=torch.int)
|
| 86 |
+
for b in range(batch_size):
|
| 87 |
+
ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(),
|
| 88 |
+
points[b].float().contiguous(),
|
| 89 |
+
point_indices[b])
|
| 90 |
+
point_indices = point_indices.transpose(1, 2)
|
| 91 |
+
|
| 92 |
+
return point_indices
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def points_in_boxes_all(points, boxes):
|
| 96 |
+
"""Find all boxes in which each point is (CUDA).
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
|
| 100 |
+
boxes (torch.Tensor): [B, T, 7],
|
| 101 |
+
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
|
| 102 |
+
(x, y, z) is the bottom center.
|
| 103 |
+
|
| 104 |
+
Returns:
|
| 105 |
+
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
|
| 106 |
+
"""
|
| 107 |
+
assert boxes.shape[0] == points.shape[0], \
|
| 108 |
+
'Points and boxes should have the same batch size, ' \
|
| 109 |
+
f'but got {boxes.shape[0]} and {boxes.shape[0]}'
|
| 110 |
+
assert boxes.shape[2] == 7, \
|
| 111 |
+
'boxes dimension should be 7, ' \
|
| 112 |
+
f'but got unexpected shape {boxes.shape[2]}'
|
| 113 |
+
assert points.shape[2] == 3, \
|
| 114 |
+
'points dimension should be 3, ' \
|
| 115 |
+
f'but got unexpected shape {points.shape[2]}'
|
| 116 |
+
batch_size, num_points, _ = points.shape
|
| 117 |
+
num_boxes = boxes.shape[1]
|
| 118 |
+
|
| 119 |
+
box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes),
|
| 120 |
+
dtype=torch.int).fill_(0)
|
| 121 |
+
|
| 122 |
+
# Same reason as line 25-32
|
| 123 |
+
points_device = points.get_device()
|
| 124 |
+
assert points_device == boxes.get_device(), \
|
| 125 |
+
'Points and boxes should be put on the same device'
|
| 126 |
+
if torch.cuda.current_device() != points_device:
|
| 127 |
+
torch.cuda.set_device(points_device)
|
| 128 |
+
|
| 129 |
+
ext_module.points_in_boxes_all_forward(boxes.contiguous(),
|
| 130 |
+
points.contiguous(),
|
| 131 |
+
box_idxs_of_pts)
|
| 132 |
+
|
| 133 |
+
return box_idxs_of_pts
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_sampler.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn as nn
|
| 5 |
+
|
| 6 |
+
from annotator.mmpkg.mmcv.runner import force_fp32
|
| 7 |
+
from .furthest_point_sample import (furthest_point_sample,
|
| 8 |
+
furthest_point_sample_with_dist)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def calc_square_dist(point_feat_a, point_feat_b, norm=True):
|
| 12 |
+
"""Calculating square distance between a and b.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
point_feat_a (Tensor): (B, N, C) Feature vector of each point.
|
| 16 |
+
point_feat_b (Tensor): (B, M, C) Feature vector of each point.
|
| 17 |
+
norm (Bool, optional): Whether to normalize the distance.
|
| 18 |
+
Default: True.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
Tensor: (B, N, M) Distance between each pair points.
|
| 22 |
+
"""
|
| 23 |
+
num_channel = point_feat_a.shape[-1]
|
| 24 |
+
# [bs, n, 1]
|
| 25 |
+
a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1)
|
| 26 |
+
# [bs, 1, m]
|
| 27 |
+
b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1)
|
| 28 |
+
|
| 29 |
+
corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2))
|
| 30 |
+
|
| 31 |
+
dist = a_square + b_square - 2 * corr_matrix
|
| 32 |
+
if norm:
|
| 33 |
+
dist = torch.sqrt(dist) / num_channel
|
| 34 |
+
return dist
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_sampler_cls(sampler_type):
|
| 38 |
+
"""Get the type and mode of points sampler.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
sampler_type (str): The type of points sampler.
|
| 42 |
+
The valid value are "D-FPS", "F-FPS", or "FS".
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
class: Points sampler type.
|
| 46 |
+
"""
|
| 47 |
+
sampler_mappings = {
|
| 48 |
+
'D-FPS': DFPSSampler,
|
| 49 |
+
'F-FPS': FFPSSampler,
|
| 50 |
+
'FS': FSSampler,
|
| 51 |
+
}
|
| 52 |
+
try:
|
| 53 |
+
return sampler_mappings[sampler_type]
|
| 54 |
+
except KeyError:
|
| 55 |
+
raise KeyError(
|
| 56 |
+
f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \
|
| 57 |
+
{sampler_type}')
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class PointsSampler(nn.Module):
|
| 61 |
+
"""Points sampling.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
num_point (list[int]): Number of sample points.
|
| 65 |
+
fps_mod_list (list[str], optional): Type of FPS method, valid mod
|
| 66 |
+
['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
|
| 67 |
+
F-FPS: using feature distances for FPS.
|
| 68 |
+
D-FPS: using Euclidean distances of points for FPS.
|
| 69 |
+
FS: using F-FPS and D-FPS simultaneously.
|
| 70 |
+
fps_sample_range_list (list[int], optional):
|
| 71 |
+
Range of points to apply FPS. Default: [-1].
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
def __init__(self,
|
| 75 |
+
num_point: List[int],
|
| 76 |
+
fps_mod_list: List[str] = ['D-FPS'],
|
| 77 |
+
fps_sample_range_list: List[int] = [-1]):
|
| 78 |
+
super().__init__()
|
| 79 |
+
# FPS would be applied to different fps_mod in the list,
|
| 80 |
+
# so the length of the num_point should be equal to
|
| 81 |
+
# fps_mod_list and fps_sample_range_list.
|
| 82 |
+
assert len(num_point) == len(fps_mod_list) == len(
|
| 83 |
+
fps_sample_range_list)
|
| 84 |
+
self.num_point = num_point
|
| 85 |
+
self.fps_sample_range_list = fps_sample_range_list
|
| 86 |
+
self.samplers = nn.ModuleList()
|
| 87 |
+
for fps_mod in fps_mod_list:
|
| 88 |
+
self.samplers.append(get_sampler_cls(fps_mod)())
|
| 89 |
+
self.fp16_enabled = False
|
| 90 |
+
|
| 91 |
+
@force_fp32()
|
| 92 |
+
def forward(self, points_xyz, features):
|
| 93 |
+
"""
|
| 94 |
+
Args:
|
| 95 |
+
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
| 96 |
+
features (Tensor): (B, C, N) Descriptors of the features.
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
Tensor: (B, npoint, sample_num) Indices of sampled points.
|
| 100 |
+
"""
|
| 101 |
+
indices = []
|
| 102 |
+
last_fps_end_index = 0
|
| 103 |
+
|
| 104 |
+
for fps_sample_range, sampler, npoint in zip(
|
| 105 |
+
self.fps_sample_range_list, self.samplers, self.num_point):
|
| 106 |
+
assert fps_sample_range < points_xyz.shape[1]
|
| 107 |
+
|
| 108 |
+
if fps_sample_range == -1:
|
| 109 |
+
sample_points_xyz = points_xyz[:, last_fps_end_index:]
|
| 110 |
+
if features is not None:
|
| 111 |
+
sample_features = features[:, :, last_fps_end_index:]
|
| 112 |
+
else:
|
| 113 |
+
sample_features = None
|
| 114 |
+
else:
|
| 115 |
+
sample_points_xyz = \
|
| 116 |
+
points_xyz[:, last_fps_end_index:fps_sample_range]
|
| 117 |
+
if features is not None:
|
| 118 |
+
sample_features = features[:, :, last_fps_end_index:
|
| 119 |
+
fps_sample_range]
|
| 120 |
+
else:
|
| 121 |
+
sample_features = None
|
| 122 |
+
|
| 123 |
+
fps_idx = sampler(sample_points_xyz.contiguous(), sample_features,
|
| 124 |
+
npoint)
|
| 125 |
+
|
| 126 |
+
indices.append(fps_idx + last_fps_end_index)
|
| 127 |
+
last_fps_end_index += fps_sample_range
|
| 128 |
+
indices = torch.cat(indices, dim=1)
|
| 129 |
+
|
| 130 |
+
return indices
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
class DFPSSampler(nn.Module):
|
| 134 |
+
"""Using Euclidean distances of points for FPS."""
|
| 135 |
+
|
| 136 |
+
def __init__(self):
|
| 137 |
+
super().__init__()
|
| 138 |
+
|
| 139 |
+
def forward(self, points, features, npoint):
|
| 140 |
+
"""Sampling points with D-FPS."""
|
| 141 |
+
fps_idx = furthest_point_sample(points.contiguous(), npoint)
|
| 142 |
+
return fps_idx
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
class FFPSSampler(nn.Module):
|
| 146 |
+
"""Using feature distances for FPS."""
|
| 147 |
+
|
| 148 |
+
def __init__(self):
|
| 149 |
+
super().__init__()
|
| 150 |
+
|
| 151 |
+
def forward(self, points, features, npoint):
|
| 152 |
+
"""Sampling points with F-FPS."""
|
| 153 |
+
assert features is not None, \
|
| 154 |
+
'feature input to FFPS_Sampler should not be None'
|
| 155 |
+
features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2)
|
| 156 |
+
features_dist = calc_square_dist(
|
| 157 |
+
features_for_fps, features_for_fps, norm=False)
|
| 158 |
+
fps_idx = furthest_point_sample_with_dist(features_dist, npoint)
|
| 159 |
+
return fps_idx
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
class FSSampler(nn.Module):
|
| 163 |
+
"""Using F-FPS and D-FPS simultaneously."""
|
| 164 |
+
|
| 165 |
+
def __init__(self):
|
| 166 |
+
super().__init__()
|
| 167 |
+
|
| 168 |
+
def forward(self, points, features, npoint):
|
| 169 |
+
"""Sampling points with FS_Sampling."""
|
| 170 |
+
assert features is not None, \
|
| 171 |
+
'feature input to FS_Sampler should not be None'
|
| 172 |
+
ffps_sampler = FFPSSampler()
|
| 173 |
+
dfps_sampler = DFPSSampler()
|
| 174 |
+
fps_idx_ffps = ffps_sampler(points, features, npoint)
|
| 175 |
+
fps_idx_dfps = dfps_sampler(points, features, npoint)
|
| 176 |
+
fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1)
|
| 177 |
+
return fps_idx
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/psa_mask.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
|
| 2 |
+
from torch import nn
|
| 3 |
+
from torch.autograd import Function
|
| 4 |
+
from torch.nn.modules.utils import _pair
|
| 5 |
+
|
| 6 |
+
from ..utils import ext_loader
|
| 7 |
+
|
| 8 |
+
ext_module = ext_loader.load_ext('_ext',
|
| 9 |
+
['psamask_forward', 'psamask_backward'])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class PSAMaskFunction(Function):
|
| 13 |
+
|
| 14 |
+
@staticmethod
|
| 15 |
+
def symbolic(g, input, psa_type, mask_size):
|
| 16 |
+
return g.op(
|
| 17 |
+
'mmcv::MMCVPSAMask',
|
| 18 |
+
input,
|
| 19 |
+
psa_type_i=psa_type,
|
| 20 |
+
mask_size_i=mask_size)
|
| 21 |
+
|
| 22 |
+
@staticmethod
|
| 23 |
+
def forward(ctx, input, psa_type, mask_size):
|
| 24 |
+
ctx.psa_type = psa_type
|
| 25 |
+
ctx.mask_size = _pair(mask_size)
|
| 26 |
+
ctx.save_for_backward(input)
|
| 27 |
+
|
| 28 |
+
h_mask, w_mask = ctx.mask_size
|
| 29 |
+
batch_size, channels, h_feature, w_feature = input.size()
|
| 30 |
+
assert channels == h_mask * w_mask
|
| 31 |
+
output = input.new_zeros(
|
| 32 |
+
(batch_size, h_feature * w_feature, h_feature, w_feature))
|
| 33 |
+
|
| 34 |
+
ext_module.psamask_forward(
|
| 35 |
+
input,
|
| 36 |
+
output,
|
| 37 |
+
psa_type=psa_type,
|
| 38 |
+
num_=batch_size,
|
| 39 |
+
h_feature=h_feature,
|
| 40 |
+
w_feature=w_feature,
|
| 41 |
+
h_mask=h_mask,
|
| 42 |
+
w_mask=w_mask,
|
| 43 |
+
half_h_mask=(h_mask - 1) // 2,
|
| 44 |
+
half_w_mask=(w_mask - 1) // 2)
|
| 45 |
+
return output
|
| 46 |
+
|
| 47 |
+
@staticmethod
|
| 48 |
+
def backward(ctx, grad_output):
|
| 49 |
+
input = ctx.saved_tensors[0]
|
| 50 |
+
psa_type = ctx.psa_type
|
| 51 |
+
h_mask, w_mask = ctx.mask_size
|
| 52 |
+
batch_size, channels, h_feature, w_feature = input.size()
|
| 53 |
+
grad_input = grad_output.new_zeros(
|
| 54 |
+
(batch_size, channels, h_feature, w_feature))
|
| 55 |
+
ext_module.psamask_backward(
|
| 56 |
+
grad_output,
|
| 57 |
+
grad_input,
|
| 58 |
+
psa_type=psa_type,
|
| 59 |
+
num_=batch_size,
|
| 60 |
+
h_feature=h_feature,
|
| 61 |
+
w_feature=w_feature,
|
| 62 |
+
h_mask=h_mask,
|
| 63 |
+
w_mask=w_mask,
|
| 64 |
+
half_h_mask=(h_mask - 1) // 2,
|
| 65 |
+
half_w_mask=(w_mask - 1) // 2)
|
| 66 |
+
return grad_input, None, None, None
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
psa_mask = PSAMaskFunction.apply
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class PSAMask(nn.Module):
|
| 73 |
+
|
| 74 |
+
def __init__(self, psa_type, mask_size=None):
|
| 75 |
+
super(PSAMask, self).__init__()
|
| 76 |
+
assert psa_type in ['collect', 'distribute']
|
| 77 |
+
if psa_type == 'collect':
|
| 78 |
+
psa_type_enum = 0
|
| 79 |
+
else:
|
| 80 |
+
psa_type_enum = 1
|
| 81 |
+
self.psa_type_enum = psa_type_enum
|
| 82 |
+
self.mask_size = mask_size
|
| 83 |
+
self.psa_type = psa_type
|
| 84 |
+
|
| 85 |
+
def forward(self, input):
|
| 86 |
+
return psa_mask(input, self.psa_type_enum, self.mask_size)
|
| 87 |
+
|
| 88 |
+
def __repr__(self):
|
| 89 |
+
s = self.__class__.__name__
|
| 90 |
+
s += f'(psa_type={self.psa_type}, '
|
| 91 |
+
s += f'mask_size={self.mask_size})'
|
| 92 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
from torch.autograd.function import once_differentiable
|
| 6 |
+
from torch.nn.modules.utils import _pair
|
| 7 |
+
|
| 8 |
+
from ..utils import deprecated_api_warning, ext_loader
|
| 9 |
+
|
| 10 |
+
ext_module = ext_loader.load_ext('_ext',
|
| 11 |
+
['roi_align_forward', 'roi_align_backward'])
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class RoIAlignFunction(Function):
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
|
| 18 |
+
pool_mode, aligned):
|
| 19 |
+
from ..onnx import is_custom_op_loaded
|
| 20 |
+
has_custom_op = is_custom_op_loaded()
|
| 21 |
+
if has_custom_op:
|
| 22 |
+
return g.op(
|
| 23 |
+
'mmcv::MMCVRoiAlign',
|
| 24 |
+
input,
|
| 25 |
+
rois,
|
| 26 |
+
output_height_i=output_size[0],
|
| 27 |
+
output_width_i=output_size[1],
|
| 28 |
+
spatial_scale_f=spatial_scale,
|
| 29 |
+
sampling_ratio_i=sampling_ratio,
|
| 30 |
+
mode_s=pool_mode,
|
| 31 |
+
aligned_i=aligned)
|
| 32 |
+
else:
|
| 33 |
+
from torch.onnx.symbolic_opset9 import sub, squeeze
|
| 34 |
+
from torch.onnx.symbolic_helper import _slice_helper
|
| 35 |
+
from torch.onnx import TensorProtoDataType
|
| 36 |
+
# batch_indices = rois[:, 0].long()
|
| 37 |
+
batch_indices = _slice_helper(
|
| 38 |
+
g, rois, axes=[1], starts=[0], ends=[1])
|
| 39 |
+
batch_indices = squeeze(g, batch_indices, 1)
|
| 40 |
+
batch_indices = g.op(
|
| 41 |
+
'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
|
| 42 |
+
# rois = rois[:, 1:]
|
| 43 |
+
rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
|
| 44 |
+
if aligned:
|
| 45 |
+
# rois -= 0.5/spatial_scale
|
| 46 |
+
aligned_offset = g.op(
|
| 47 |
+
'Constant',
|
| 48 |
+
value_t=torch.tensor([0.5 / spatial_scale],
|
| 49 |
+
dtype=torch.float32))
|
| 50 |
+
rois = sub(g, rois, aligned_offset)
|
| 51 |
+
# roi align
|
| 52 |
+
return g.op(
|
| 53 |
+
'RoiAlign',
|
| 54 |
+
input,
|
| 55 |
+
rois,
|
| 56 |
+
batch_indices,
|
| 57 |
+
output_height_i=output_size[0],
|
| 58 |
+
output_width_i=output_size[1],
|
| 59 |
+
spatial_scale_f=spatial_scale,
|
| 60 |
+
sampling_ratio_i=max(0, sampling_ratio),
|
| 61 |
+
mode_s=pool_mode)
|
| 62 |
+
|
| 63 |
+
@staticmethod
|
| 64 |
+
def forward(ctx,
|
| 65 |
+
input,
|
| 66 |
+
rois,
|
| 67 |
+
output_size,
|
| 68 |
+
spatial_scale=1.0,
|
| 69 |
+
sampling_ratio=0,
|
| 70 |
+
pool_mode='avg',
|
| 71 |
+
aligned=True):
|
| 72 |
+
ctx.output_size = _pair(output_size)
|
| 73 |
+
ctx.spatial_scale = spatial_scale
|
| 74 |
+
ctx.sampling_ratio = sampling_ratio
|
| 75 |
+
assert pool_mode in ('max', 'avg')
|
| 76 |
+
ctx.pool_mode = 0 if pool_mode == 'max' else 1
|
| 77 |
+
ctx.aligned = aligned
|
| 78 |
+
ctx.input_shape = input.size()
|
| 79 |
+
|
| 80 |
+
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
|
| 81 |
+
|
| 82 |
+
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
|
| 83 |
+
ctx.output_size[1])
|
| 84 |
+
output = input.new_zeros(output_shape)
|
| 85 |
+
if ctx.pool_mode == 0:
|
| 86 |
+
argmax_y = input.new_zeros(output_shape)
|
| 87 |
+
argmax_x = input.new_zeros(output_shape)
|
| 88 |
+
else:
|
| 89 |
+
argmax_y = input.new_zeros(0)
|
| 90 |
+
argmax_x = input.new_zeros(0)
|
| 91 |
+
|
| 92 |
+
ext_module.roi_align_forward(
|
| 93 |
+
input,
|
| 94 |
+
rois,
|
| 95 |
+
output,
|
| 96 |
+
argmax_y,
|
| 97 |
+
argmax_x,
|
| 98 |
+
aligned_height=ctx.output_size[0],
|
| 99 |
+
aligned_width=ctx.output_size[1],
|
| 100 |
+
spatial_scale=ctx.spatial_scale,
|
| 101 |
+
sampling_ratio=ctx.sampling_ratio,
|
| 102 |
+
pool_mode=ctx.pool_mode,
|
| 103 |
+
aligned=ctx.aligned)
|
| 104 |
+
|
| 105 |
+
ctx.save_for_backward(rois, argmax_y, argmax_x)
|
| 106 |
+
return output
|
| 107 |
+
|
| 108 |
+
@staticmethod
|
| 109 |
+
@once_differentiable
|
| 110 |
+
def backward(ctx, grad_output):
|
| 111 |
+
rois, argmax_y, argmax_x = ctx.saved_tensors
|
| 112 |
+
grad_input = grad_output.new_zeros(ctx.input_shape)
|
| 113 |
+
# complex head architecture may cause grad_output uncontiguous.
|
| 114 |
+
grad_output = grad_output.contiguous()
|
| 115 |
+
ext_module.roi_align_backward(
|
| 116 |
+
grad_output,
|
| 117 |
+
rois,
|
| 118 |
+
argmax_y,
|
| 119 |
+
argmax_x,
|
| 120 |
+
grad_input,
|
| 121 |
+
aligned_height=ctx.output_size[0],
|
| 122 |
+
aligned_width=ctx.output_size[1],
|
| 123 |
+
spatial_scale=ctx.spatial_scale,
|
| 124 |
+
sampling_ratio=ctx.sampling_ratio,
|
| 125 |
+
pool_mode=ctx.pool_mode,
|
| 126 |
+
aligned=ctx.aligned)
|
| 127 |
+
return grad_input, None, None, None, None, None, None
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
roi_align = RoIAlignFunction.apply
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
class RoIAlign(nn.Module):
|
| 134 |
+
"""RoI align pooling layer.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
output_size (tuple): h, w
|
| 138 |
+
spatial_scale (float): scale the input boxes by this number
|
| 139 |
+
sampling_ratio (int): number of inputs samples to take for each
|
| 140 |
+
output sample. 0 to take samples densely for current models.
|
| 141 |
+
pool_mode (str, 'avg' or 'max'): pooling mode in each bin.
|
| 142 |
+
aligned (bool): if False, use the legacy implementation in
|
| 143 |
+
MMDetection. If True, align the results more perfectly.
|
| 144 |
+
use_torchvision (bool): whether to use roi_align from torchvision.
|
| 145 |
+
|
| 146 |
+
Note:
|
| 147 |
+
The implementation of RoIAlign when aligned=True is modified from
|
| 148 |
+
https://github.com/facebookresearch/detectron2/
|
| 149 |
+
|
| 150 |
+
The meaning of aligned=True:
|
| 151 |
+
|
| 152 |
+
Given a continuous coordinate c, its two neighboring pixel
|
| 153 |
+
indices (in our pixel model) are computed by floor(c - 0.5) and
|
| 154 |
+
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
|
| 155 |
+
indices [0] and [1] (which are sampled from the underlying signal
|
| 156 |
+
at continuous coordinates 0.5 and 1.5). But the original roi_align
|
| 157 |
+
(aligned=False) does not subtract the 0.5 when computing
|
| 158 |
+
neighboring pixel indices and therefore it uses pixels with a
|
| 159 |
+
slightly incorrect alignment (relative to our pixel model) when
|
| 160 |
+
performing bilinear interpolation.
|
| 161 |
+
|
| 162 |
+
With `aligned=True`,
|
| 163 |
+
we first appropriately scale the ROI and then shift it by -0.5
|
| 164 |
+
prior to calling roi_align. This produces the correct neighbors;
|
| 165 |
+
|
| 166 |
+
The difference does not make a difference to the model's
|
| 167 |
+
performance if ROIAlign is used together with conv layers.
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
@deprecated_api_warning(
|
| 171 |
+
{
|
| 172 |
+
'out_size': 'output_size',
|
| 173 |
+
'sample_num': 'sampling_ratio'
|
| 174 |
+
},
|
| 175 |
+
cls_name='RoIAlign')
|
| 176 |
+
def __init__(self,
|
| 177 |
+
output_size,
|
| 178 |
+
spatial_scale=1.0,
|
| 179 |
+
sampling_ratio=0,
|
| 180 |
+
pool_mode='avg',
|
| 181 |
+
aligned=True,
|
| 182 |
+
use_torchvision=False):
|
| 183 |
+
super(RoIAlign, self).__init__()
|
| 184 |
+
|
| 185 |
+
self.output_size = _pair(output_size)
|
| 186 |
+
self.spatial_scale = float(spatial_scale)
|
| 187 |
+
self.sampling_ratio = int(sampling_ratio)
|
| 188 |
+
self.pool_mode = pool_mode
|
| 189 |
+
self.aligned = aligned
|
| 190 |
+
self.use_torchvision = use_torchvision
|
| 191 |
+
|
| 192 |
+
def forward(self, input, rois):
|
| 193 |
+
"""
|
| 194 |
+
Args:
|
| 195 |
+
input: NCHW images
|
| 196 |
+
rois: Bx5 boxes. First column is the index into N.\
|
| 197 |
+
The other 4 columns are xyxy.
|
| 198 |
+
"""
|
| 199 |
+
if self.use_torchvision:
|
| 200 |
+
from torchvision.ops import roi_align as tv_roi_align
|
| 201 |
+
if 'aligned' in tv_roi_align.__code__.co_varnames:
|
| 202 |
+
return tv_roi_align(input, rois, self.output_size,
|
| 203 |
+
self.spatial_scale, self.sampling_ratio,
|
| 204 |
+
self.aligned)
|
| 205 |
+
else:
|
| 206 |
+
if self.aligned:
|
| 207 |
+
rois -= rois.new_tensor([0.] +
|
| 208 |
+
[0.5 / self.spatial_scale] * 4)
|
| 209 |
+
return tv_roi_align(input, rois, self.output_size,
|
| 210 |
+
self.spatial_scale, self.sampling_ratio)
|
| 211 |
+
else:
|
| 212 |
+
return roi_align(input, rois, self.output_size, self.spatial_scale,
|
| 213 |
+
self.sampling_ratio, self.pool_mode, self.aligned)
|
| 214 |
+
|
| 215 |
+
def __repr__(self):
|
| 216 |
+
s = self.__class__.__name__
|
| 217 |
+
s += f'(output_size={self.output_size}, '
|
| 218 |
+
s += f'spatial_scale={self.spatial_scale}, '
|
| 219 |
+
s += f'sampling_ratio={self.sampling_ratio}, '
|
| 220 |
+
s += f'pool_mode={self.pool_mode}, '
|
| 221 |
+
s += f'aligned={self.aligned}, '
|
| 222 |
+
s += f'use_torchvision={self.use_torchvision})'
|
| 223 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align_rotated.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torch.autograd import Function
|
| 4 |
+
|
| 5 |
+
from ..utils import ext_loader
|
| 6 |
+
|
| 7 |
+
ext_module = ext_loader.load_ext(
|
| 8 |
+
'_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward'])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class RoIAlignRotatedFunction(Function):
|
| 12 |
+
|
| 13 |
+
@staticmethod
|
| 14 |
+
def symbolic(g, features, rois, out_size, spatial_scale, sample_num,
|
| 15 |
+
aligned, clockwise):
|
| 16 |
+
if isinstance(out_size, int):
|
| 17 |
+
out_h = out_size
|
| 18 |
+
out_w = out_size
|
| 19 |
+
elif isinstance(out_size, tuple):
|
| 20 |
+
assert len(out_size) == 2
|
| 21 |
+
assert isinstance(out_size[0], int)
|
| 22 |
+
assert isinstance(out_size[1], int)
|
| 23 |
+
out_h, out_w = out_size
|
| 24 |
+
else:
|
| 25 |
+
raise TypeError(
|
| 26 |
+
'"out_size" must be an integer or tuple of integers')
|
| 27 |
+
return g.op(
|
| 28 |
+
'mmcv::MMCVRoIAlignRotated',
|
| 29 |
+
features,
|
| 30 |
+
rois,
|
| 31 |
+
output_height_i=out_h,
|
| 32 |
+
output_width_i=out_h,
|
| 33 |
+
spatial_scale_f=spatial_scale,
|
| 34 |
+
sampling_ratio_i=sample_num,
|
| 35 |
+
aligned_i=aligned,
|
| 36 |
+
clockwise_i=clockwise)
|
| 37 |
+
|
| 38 |
+
@staticmethod
|
| 39 |
+
def forward(ctx,
|
| 40 |
+
features,
|
| 41 |
+
rois,
|
| 42 |
+
out_size,
|
| 43 |
+
spatial_scale,
|
| 44 |
+
sample_num=0,
|
| 45 |
+
aligned=True,
|
| 46 |
+
clockwise=False):
|
| 47 |
+
if isinstance(out_size, int):
|
| 48 |
+
out_h = out_size
|
| 49 |
+
out_w = out_size
|
| 50 |
+
elif isinstance(out_size, tuple):
|
| 51 |
+
assert len(out_size) == 2
|
| 52 |
+
assert isinstance(out_size[0], int)
|
| 53 |
+
assert isinstance(out_size[1], int)
|
| 54 |
+
out_h, out_w = out_size
|
| 55 |
+
else:
|
| 56 |
+
raise TypeError(
|
| 57 |
+
'"out_size" must be an integer or tuple of integers')
|
| 58 |
+
ctx.spatial_scale = spatial_scale
|
| 59 |
+
ctx.sample_num = sample_num
|
| 60 |
+
ctx.aligned = aligned
|
| 61 |
+
ctx.clockwise = clockwise
|
| 62 |
+
ctx.save_for_backward(rois)
|
| 63 |
+
ctx.feature_size = features.size()
|
| 64 |
+
|
| 65 |
+
batch_size, num_channels, data_height, data_width = features.size()
|
| 66 |
+
num_rois = rois.size(0)
|
| 67 |
+
|
| 68 |
+
output = features.new_zeros(num_rois, num_channels, out_h, out_w)
|
| 69 |
+
ext_module.roi_align_rotated_forward(
|
| 70 |
+
features,
|
| 71 |
+
rois,
|
| 72 |
+
output,
|
| 73 |
+
pooled_height=out_h,
|
| 74 |
+
pooled_width=out_w,
|
| 75 |
+
spatial_scale=spatial_scale,
|
| 76 |
+
sample_num=sample_num,
|
| 77 |
+
aligned=aligned,
|
| 78 |
+
clockwise=clockwise)
|
| 79 |
+
return output
|
| 80 |
+
|
| 81 |
+
@staticmethod
|
| 82 |
+
def backward(ctx, grad_output):
|
| 83 |
+
feature_size = ctx.feature_size
|
| 84 |
+
spatial_scale = ctx.spatial_scale
|
| 85 |
+
aligned = ctx.aligned
|
| 86 |
+
clockwise = ctx.clockwise
|
| 87 |
+
sample_num = ctx.sample_num
|
| 88 |
+
rois = ctx.saved_tensors[0]
|
| 89 |
+
assert feature_size is not None
|
| 90 |
+
batch_size, num_channels, data_height, data_width = feature_size
|
| 91 |
+
|
| 92 |
+
out_w = grad_output.size(3)
|
| 93 |
+
out_h = grad_output.size(2)
|
| 94 |
+
|
| 95 |
+
grad_input = grad_rois = None
|
| 96 |
+
|
| 97 |
+
if ctx.needs_input_grad[0]:
|
| 98 |
+
grad_input = rois.new_zeros(batch_size, num_channels, data_height,
|
| 99 |
+
data_width)
|
| 100 |
+
ext_module.roi_align_rotated_backward(
|
| 101 |
+
grad_output.contiguous(),
|
| 102 |
+
rois,
|
| 103 |
+
grad_input,
|
| 104 |
+
pooled_height=out_h,
|
| 105 |
+
pooled_width=out_w,
|
| 106 |
+
spatial_scale=spatial_scale,
|
| 107 |
+
sample_num=sample_num,
|
| 108 |
+
aligned=aligned,
|
| 109 |
+
clockwise=clockwise)
|
| 110 |
+
return grad_input, grad_rois, None, None, None, None, None
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
roi_align_rotated = RoIAlignRotatedFunction.apply
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class RoIAlignRotated(nn.Module):
|
| 117 |
+
"""RoI align pooling layer for rotated proposals.
|
| 118 |
+
|
| 119 |
+
It accepts a feature map of shape (N, C, H, W) and rois with shape
|
| 120 |
+
(n, 6) with each roi decoded as (batch_index, center_x, center_y,
|
| 121 |
+
w, h, angle). The angle is in radian.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
out_size (tuple): h, w
|
| 125 |
+
spatial_scale (float): scale the input boxes by this number
|
| 126 |
+
sample_num (int): number of inputs samples to take for each
|
| 127 |
+
output sample. 0 to take samples densely for current models.
|
| 128 |
+
aligned (bool): if False, use the legacy implementation in
|
| 129 |
+
MMDetection. If True, align the results more perfectly.
|
| 130 |
+
Default: True.
|
| 131 |
+
clockwise (bool): If True, the angle in each proposal follows a
|
| 132 |
+
clockwise fashion in image space, otherwise, the angle is
|
| 133 |
+
counterclockwise. Default: False.
|
| 134 |
+
|
| 135 |
+
Note:
|
| 136 |
+
The implementation of RoIAlign when aligned=True is modified from
|
| 137 |
+
https://github.com/facebookresearch/detectron2/
|
| 138 |
+
|
| 139 |
+
The meaning of aligned=True:
|
| 140 |
+
|
| 141 |
+
Given a continuous coordinate c, its two neighboring pixel
|
| 142 |
+
indices (in our pixel model) are computed by floor(c - 0.5) and
|
| 143 |
+
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
|
| 144 |
+
indices [0] and [1] (which are sampled from the underlying signal
|
| 145 |
+
at continuous coordinates 0.5 and 1.5). But the original roi_align
|
| 146 |
+
(aligned=False) does not subtract the 0.5 when computing
|
| 147 |
+
neighboring pixel indices and therefore it uses pixels with a
|
| 148 |
+
slightly incorrect alignment (relative to our pixel model) when
|
| 149 |
+
performing bilinear interpolation.
|
| 150 |
+
|
| 151 |
+
With `aligned=True`,
|
| 152 |
+
we first appropriately scale the ROI and then shift it by -0.5
|
| 153 |
+
prior to calling roi_align. This produces the correct neighbors;
|
| 154 |
+
|
| 155 |
+
The difference does not make a difference to the model's
|
| 156 |
+
performance if ROIAlign is used together with conv layers.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
def __init__(self,
|
| 160 |
+
out_size,
|
| 161 |
+
spatial_scale,
|
| 162 |
+
sample_num=0,
|
| 163 |
+
aligned=True,
|
| 164 |
+
clockwise=False):
|
| 165 |
+
super(RoIAlignRotated, self).__init__()
|
| 166 |
+
|
| 167 |
+
self.out_size = out_size
|
| 168 |
+
self.spatial_scale = float(spatial_scale)
|
| 169 |
+
self.sample_num = int(sample_num)
|
| 170 |
+
self.aligned = aligned
|
| 171 |
+
self.clockwise = clockwise
|
| 172 |
+
|
| 173 |
+
def forward(self, features, rois):
|
| 174 |
+
return RoIAlignRotatedFunction.apply(features, rois, self.out_size,
|
| 175 |
+
self.spatial_scale,
|
| 176 |
+
self.sample_num, self.aligned,
|
| 177 |
+
self.clockwise)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_pool.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
from torch.autograd.function import once_differentiable
|
| 6 |
+
from torch.nn.modules.utils import _pair
|
| 7 |
+
|
| 8 |
+
from ..utils import ext_loader
|
| 9 |
+
|
| 10 |
+
ext_module = ext_loader.load_ext('_ext',
|
| 11 |
+
['roi_pool_forward', 'roi_pool_backward'])
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class RoIPoolFunction(Function):
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
def symbolic(g, input, rois, output_size, spatial_scale):
|
| 18 |
+
return g.op(
|
| 19 |
+
'MaxRoiPool',
|
| 20 |
+
input,
|
| 21 |
+
rois,
|
| 22 |
+
pooled_shape_i=output_size,
|
| 23 |
+
spatial_scale_f=spatial_scale)
|
| 24 |
+
|
| 25 |
+
@staticmethod
|
| 26 |
+
def forward(ctx, input, rois, output_size, spatial_scale=1.0):
|
| 27 |
+
ctx.output_size = _pair(output_size)
|
| 28 |
+
ctx.spatial_scale = spatial_scale
|
| 29 |
+
ctx.input_shape = input.size()
|
| 30 |
+
|
| 31 |
+
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
|
| 32 |
+
|
| 33 |
+
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
|
| 34 |
+
ctx.output_size[1])
|
| 35 |
+
output = input.new_zeros(output_shape)
|
| 36 |
+
argmax = input.new_zeros(output_shape, dtype=torch.int)
|
| 37 |
+
|
| 38 |
+
ext_module.roi_pool_forward(
|
| 39 |
+
input,
|
| 40 |
+
rois,
|
| 41 |
+
output,
|
| 42 |
+
argmax,
|
| 43 |
+
pooled_height=ctx.output_size[0],
|
| 44 |
+
pooled_width=ctx.output_size[1],
|
| 45 |
+
spatial_scale=ctx.spatial_scale)
|
| 46 |
+
|
| 47 |
+
ctx.save_for_backward(rois, argmax)
|
| 48 |
+
return output
|
| 49 |
+
|
| 50 |
+
@staticmethod
|
| 51 |
+
@once_differentiable
|
| 52 |
+
def backward(ctx, grad_output):
|
| 53 |
+
rois, argmax = ctx.saved_tensors
|
| 54 |
+
grad_input = grad_output.new_zeros(ctx.input_shape)
|
| 55 |
+
|
| 56 |
+
ext_module.roi_pool_backward(
|
| 57 |
+
grad_output,
|
| 58 |
+
rois,
|
| 59 |
+
argmax,
|
| 60 |
+
grad_input,
|
| 61 |
+
pooled_height=ctx.output_size[0],
|
| 62 |
+
pooled_width=ctx.output_size[1],
|
| 63 |
+
spatial_scale=ctx.spatial_scale)
|
| 64 |
+
|
| 65 |
+
return grad_input, None, None, None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
roi_pool = RoIPoolFunction.apply
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class RoIPool(nn.Module):
|
| 72 |
+
|
| 73 |
+
def __init__(self, output_size, spatial_scale=1.0):
|
| 74 |
+
super(RoIPool, self).__init__()
|
| 75 |
+
|
| 76 |
+
self.output_size = _pair(output_size)
|
| 77 |
+
self.spatial_scale = float(spatial_scale)
|
| 78 |
+
|
| 79 |
+
def forward(self, input, rois):
|
| 80 |
+
return roi_pool(input, rois, self.output_size, self.spatial_scale)
|
| 81 |
+
|
| 82 |
+
def __repr__(self):
|
| 83 |
+
s = self.__class__.__name__
|
| 84 |
+
s += f'(output_size={self.output_size}, '
|
| 85 |
+
s += f'spatial_scale={self.spatial_scale})'
|
| 86 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch import nn as nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
|
| 6 |
+
import annotator.mmpkg.mmcv as mmcv
|
| 7 |
+
from ..utils import ext_loader
|
| 8 |
+
|
| 9 |
+
ext_module = ext_loader.load_ext(
|
| 10 |
+
'_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward'])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class RoIAwarePool3d(nn.Module):
|
| 14 |
+
"""Encode the geometry-specific features of each 3D proposal.
|
| 15 |
+
|
| 16 |
+
Please refer to `PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_ for more
|
| 17 |
+
details.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
out_size (int or tuple): The size of output features. n or
|
| 21 |
+
[n1, n2, n3].
|
| 22 |
+
max_pts_per_voxel (int, optional): The maximum number of points per
|
| 23 |
+
voxel. Default: 128.
|
| 24 |
+
mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'.
|
| 25 |
+
Default: 'max'.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
|
| 29 |
+
super().__init__()
|
| 30 |
+
|
| 31 |
+
self.out_size = out_size
|
| 32 |
+
self.max_pts_per_voxel = max_pts_per_voxel
|
| 33 |
+
assert mode in ['max', 'avg']
|
| 34 |
+
pool_mapping = {'max': 0, 'avg': 1}
|
| 35 |
+
self.mode = pool_mapping[mode]
|
| 36 |
+
|
| 37 |
+
def forward(self, rois, pts, pts_feature):
|
| 38 |
+
"""
|
| 39 |
+
Args:
|
| 40 |
+
rois (torch.Tensor): [N, 7], in LiDAR coordinate,
|
| 41 |
+
(x, y, z) is the bottom center of rois.
|
| 42 |
+
pts (torch.Tensor): [npoints, 3], coordinates of input points.
|
| 43 |
+
pts_feature (torch.Tensor): [npoints, C], features of input points.
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
return RoIAwarePool3dFunction.apply(rois, pts, pts_feature,
|
| 50 |
+
self.out_size,
|
| 51 |
+
self.max_pts_per_voxel, self.mode)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class RoIAwarePool3dFunction(Function):
|
| 55 |
+
|
| 56 |
+
@staticmethod
|
| 57 |
+
def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
|
| 58 |
+
mode):
|
| 59 |
+
"""
|
| 60 |
+
Args:
|
| 61 |
+
rois (torch.Tensor): [N, 7], in LiDAR coordinate,
|
| 62 |
+
(x, y, z) is the bottom center of rois.
|
| 63 |
+
pts (torch.Tensor): [npoints, 3], coordinates of input points.
|
| 64 |
+
pts_feature (torch.Tensor): [npoints, C], features of input points.
|
| 65 |
+
out_size (int or tuple): The size of output features. n or
|
| 66 |
+
[n1, n2, n3].
|
| 67 |
+
max_pts_per_voxel (int): The maximum number of points per voxel.
|
| 68 |
+
Default: 128.
|
| 69 |
+
mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average
|
| 70 |
+
pool).
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output
|
| 74 |
+
pooled features.
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
if isinstance(out_size, int):
|
| 78 |
+
out_x = out_y = out_z = out_size
|
| 79 |
+
else:
|
| 80 |
+
assert len(out_size) == 3
|
| 81 |
+
assert mmcv.is_tuple_of(out_size, int)
|
| 82 |
+
out_x, out_y, out_z = out_size
|
| 83 |
+
|
| 84 |
+
num_rois = rois.shape[0]
|
| 85 |
+
num_channels = pts_feature.shape[-1]
|
| 86 |
+
num_pts = pts.shape[0]
|
| 87 |
+
|
| 88 |
+
pooled_features = pts_feature.new_zeros(
|
| 89 |
+
(num_rois, out_x, out_y, out_z, num_channels))
|
| 90 |
+
argmax = pts_feature.new_zeros(
|
| 91 |
+
(num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int)
|
| 92 |
+
pts_idx_of_voxels = pts_feature.new_zeros(
|
| 93 |
+
(num_rois, out_x, out_y, out_z, max_pts_per_voxel),
|
| 94 |
+
dtype=torch.int)
|
| 95 |
+
|
| 96 |
+
ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax,
|
| 97 |
+
pts_idx_of_voxels, pooled_features,
|
| 98 |
+
mode)
|
| 99 |
+
|
| 100 |
+
ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode,
|
| 101 |
+
num_pts, num_channels)
|
| 102 |
+
return pooled_features
|
| 103 |
+
|
| 104 |
+
@staticmethod
|
| 105 |
+
def backward(ctx, grad_out):
|
| 106 |
+
ret = ctx.roiaware_pool3d_for_backward
|
| 107 |
+
pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret
|
| 108 |
+
|
| 109 |
+
grad_in = grad_out.new_zeros((num_pts, num_channels))
|
| 110 |
+
ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax,
|
| 111 |
+
grad_out.contiguous(), grad_in,
|
| 112 |
+
mode)
|
| 113 |
+
|
| 114 |
+
return None, None, grad_in, None, None, None
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch import nn as nn
|
| 2 |
+
from torch.autograd import Function
|
| 3 |
+
|
| 4 |
+
from ..utils import ext_loader
|
| 5 |
+
|
| 6 |
+
ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward'])
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class RoIPointPool3d(nn.Module):
|
| 10 |
+
"""Encode the geometry-specific features of each 3D proposal.
|
| 11 |
+
|
| 12 |
+
Please refer to `Paper of PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_
|
| 13 |
+
for more details.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
num_sampled_points (int, optional): Number of samples in each roi.
|
| 17 |
+
Default: 512.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, num_sampled_points=512):
|
| 21 |
+
super().__init__()
|
| 22 |
+
self.num_sampled_points = num_sampled_points
|
| 23 |
+
|
| 24 |
+
def forward(self, points, point_features, boxes3d):
|
| 25 |
+
"""
|
| 26 |
+
Args:
|
| 27 |
+
points (torch.Tensor): Input points whose shape is (B, N, C).
|
| 28 |
+
point_features (torch.Tensor): Features of input points whose shape
|
| 29 |
+
is (B, N, C).
|
| 30 |
+
boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
pooled_features (torch.Tensor): The output pooled features whose
|
| 34 |
+
shape is (B, M, 512, 3 + C).
|
| 35 |
+
pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
|
| 36 |
+
"""
|
| 37 |
+
return RoIPointPool3dFunction.apply(points, point_features, boxes3d,
|
| 38 |
+
self.num_sampled_points)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class RoIPointPool3dFunction(Function):
|
| 42 |
+
|
| 43 |
+
@staticmethod
|
| 44 |
+
def forward(ctx, points, point_features, boxes3d, num_sampled_points=512):
|
| 45 |
+
"""
|
| 46 |
+
Args:
|
| 47 |
+
points (torch.Tensor): Input points whose shape is (B, N, C).
|
| 48 |
+
point_features (torch.Tensor): Features of input points whose shape
|
| 49 |
+
is (B, N, C).
|
| 50 |
+
boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
|
| 51 |
+
num_sampled_points (int, optional): The num of sampled points.
|
| 52 |
+
Default: 512.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
pooled_features (torch.Tensor): The output pooled features whose
|
| 56 |
+
shape is (B, M, 512, 3 + C).
|
| 57 |
+
pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
|
| 58 |
+
"""
|
| 59 |
+
assert len(points.shape) == 3 and points.shape[2] == 3
|
| 60 |
+
batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[
|
| 61 |
+
1], point_features.shape[2]
|
| 62 |
+
pooled_boxes3d = boxes3d.view(batch_size, -1, 7)
|
| 63 |
+
pooled_features = point_features.new_zeros(
|
| 64 |
+
(batch_size, boxes_num, num_sampled_points, 3 + feature_len))
|
| 65 |
+
pooled_empty_flag = point_features.new_zeros(
|
| 66 |
+
(batch_size, boxes_num)).int()
|
| 67 |
+
|
| 68 |
+
ext_module.roipoint_pool3d_forward(points.contiguous(),
|
| 69 |
+
pooled_boxes3d.contiguous(),
|
| 70 |
+
point_features.contiguous(),
|
| 71 |
+
pooled_features, pooled_empty_flag)
|
| 72 |
+
|
| 73 |
+
return pooled_features, pooled_empty_flag
|
| 74 |
+
|
| 75 |
+
@staticmethod
|
| 76 |
+
def backward(ctx, grad_out):
|
| 77 |
+
raise NotImplementedError
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/saconv.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
|
| 6 |
+
from annotator.mmpkg.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init
|
| 7 |
+
from annotator.mmpkg.mmcv.ops.deform_conv import deform_conv2d
|
| 8 |
+
from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@CONV_LAYERS.register_module(name='SAC')
|
| 12 |
+
class SAConv2d(ConvAWS2d):
|
| 13 |
+
"""SAC (Switchable Atrous Convolution)
|
| 14 |
+
|
| 15 |
+
This is an implementation of SAC in DetectoRS
|
| 16 |
+
(https://arxiv.org/pdf/2006.02334.pdf).
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
in_channels (int): Number of channels in the input image
|
| 20 |
+
out_channels (int): Number of channels produced by the convolution
|
| 21 |
+
kernel_size (int or tuple): Size of the convolving kernel
|
| 22 |
+
stride (int or tuple, optional): Stride of the convolution. Default: 1
|
| 23 |
+
padding (int or tuple, optional): Zero-padding added to both sides of
|
| 24 |
+
the input. Default: 0
|
| 25 |
+
padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
|
| 26 |
+
``'replicate'`` or ``'circular'``. Default: ``'zeros'``
|
| 27 |
+
dilation (int or tuple, optional): Spacing between kernel elements.
|
| 28 |
+
Default: 1
|
| 29 |
+
groups (int, optional): Number of blocked connections from input
|
| 30 |
+
channels to output channels. Default: 1
|
| 31 |
+
bias (bool, optional): If ``True``, adds a learnable bias to the
|
| 32 |
+
output. Default: ``True``
|
| 33 |
+
use_deform: If ``True``, replace convolution with deformable
|
| 34 |
+
convolution. Default: ``False``.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self,
|
| 38 |
+
in_channels,
|
| 39 |
+
out_channels,
|
| 40 |
+
kernel_size,
|
| 41 |
+
stride=1,
|
| 42 |
+
padding=0,
|
| 43 |
+
dilation=1,
|
| 44 |
+
groups=1,
|
| 45 |
+
bias=True,
|
| 46 |
+
use_deform=False):
|
| 47 |
+
super().__init__(
|
| 48 |
+
in_channels,
|
| 49 |
+
out_channels,
|
| 50 |
+
kernel_size,
|
| 51 |
+
stride=stride,
|
| 52 |
+
padding=padding,
|
| 53 |
+
dilation=dilation,
|
| 54 |
+
groups=groups,
|
| 55 |
+
bias=bias)
|
| 56 |
+
self.use_deform = use_deform
|
| 57 |
+
self.switch = nn.Conv2d(
|
| 58 |
+
self.in_channels, 1, kernel_size=1, stride=stride, bias=True)
|
| 59 |
+
self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size()))
|
| 60 |
+
self.pre_context = nn.Conv2d(
|
| 61 |
+
self.in_channels, self.in_channels, kernel_size=1, bias=True)
|
| 62 |
+
self.post_context = nn.Conv2d(
|
| 63 |
+
self.out_channels, self.out_channels, kernel_size=1, bias=True)
|
| 64 |
+
if self.use_deform:
|
| 65 |
+
self.offset_s = nn.Conv2d(
|
| 66 |
+
self.in_channels,
|
| 67 |
+
18,
|
| 68 |
+
kernel_size=3,
|
| 69 |
+
padding=1,
|
| 70 |
+
stride=stride,
|
| 71 |
+
bias=True)
|
| 72 |
+
self.offset_l = nn.Conv2d(
|
| 73 |
+
self.in_channels,
|
| 74 |
+
18,
|
| 75 |
+
kernel_size=3,
|
| 76 |
+
padding=1,
|
| 77 |
+
stride=stride,
|
| 78 |
+
bias=True)
|
| 79 |
+
self.init_weights()
|
| 80 |
+
|
| 81 |
+
def init_weights(self):
|
| 82 |
+
constant_init(self.switch, 0, bias=1)
|
| 83 |
+
self.weight_diff.data.zero_()
|
| 84 |
+
constant_init(self.pre_context, 0)
|
| 85 |
+
constant_init(self.post_context, 0)
|
| 86 |
+
if self.use_deform:
|
| 87 |
+
constant_init(self.offset_s, 0)
|
| 88 |
+
constant_init(self.offset_l, 0)
|
| 89 |
+
|
| 90 |
+
def forward(self, x):
|
| 91 |
+
# pre-context
|
| 92 |
+
avg_x = F.adaptive_avg_pool2d(x, output_size=1)
|
| 93 |
+
avg_x = self.pre_context(avg_x)
|
| 94 |
+
avg_x = avg_x.expand_as(x)
|
| 95 |
+
x = x + avg_x
|
| 96 |
+
# switch
|
| 97 |
+
avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect')
|
| 98 |
+
avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0)
|
| 99 |
+
switch = self.switch(avg_x)
|
| 100 |
+
# sac
|
| 101 |
+
weight = self._get_weight(self.weight)
|
| 102 |
+
zero_bias = torch.zeros(
|
| 103 |
+
self.out_channels, device=weight.device, dtype=weight.dtype)
|
| 104 |
+
|
| 105 |
+
if self.use_deform:
|
| 106 |
+
offset = self.offset_s(avg_x)
|
| 107 |
+
out_s = deform_conv2d(x, offset, weight, self.stride, self.padding,
|
| 108 |
+
self.dilation, self.groups, 1)
|
| 109 |
+
else:
|
| 110 |
+
if (TORCH_VERSION == 'parrots'
|
| 111 |
+
or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
|
| 112 |
+
out_s = super().conv2d_forward(x, weight)
|
| 113 |
+
elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
|
| 114 |
+
# bias is a required argument of _conv_forward in torch 1.8.0
|
| 115 |
+
out_s = super()._conv_forward(x, weight, zero_bias)
|
| 116 |
+
else:
|
| 117 |
+
out_s = super()._conv_forward(x, weight)
|
| 118 |
+
ori_p = self.padding
|
| 119 |
+
ori_d = self.dilation
|
| 120 |
+
self.padding = tuple(3 * p for p in self.padding)
|
| 121 |
+
self.dilation = tuple(3 * d for d in self.dilation)
|
| 122 |
+
weight = weight + self.weight_diff
|
| 123 |
+
if self.use_deform:
|
| 124 |
+
offset = self.offset_l(avg_x)
|
| 125 |
+
out_l = deform_conv2d(x, offset, weight, self.stride, self.padding,
|
| 126 |
+
self.dilation, self.groups, 1)
|
| 127 |
+
else:
|
| 128 |
+
if (TORCH_VERSION == 'parrots'
|
| 129 |
+
or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
|
| 130 |
+
out_l = super().conv2d_forward(x, weight)
|
| 131 |
+
elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
|
| 132 |
+
# bias is a required argument of _conv_forward in torch 1.8.0
|
| 133 |
+
out_l = super()._conv_forward(x, weight, zero_bias)
|
| 134 |
+
else:
|
| 135 |
+
out_l = super()._conv_forward(x, weight)
|
| 136 |
+
|
| 137 |
+
out = switch * out_s + (1 - switch) * out_l
|
| 138 |
+
self.padding = ori_p
|
| 139 |
+
self.dilation = ori_d
|
| 140 |
+
# post-context
|
| 141 |
+
avg_x = F.adaptive_avg_pool2d(out, output_size=1)
|
| 142 |
+
avg_x = self.post_context(avg_x)
|
| 143 |
+
avg_x = avg_x.expand_as(out)
|
| 144 |
+
out = out + avg_x
|
| 145 |
+
return out
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/scatter_points.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch import nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
|
| 6 |
+
from ..utils import ext_loader
|
| 7 |
+
|
| 8 |
+
ext_module = ext_loader.load_ext(
|
| 9 |
+
'_ext',
|
| 10 |
+
['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward'])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class _DynamicScatter(Function):
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def forward(ctx, feats, coors, reduce_type='max'):
|
| 17 |
+
"""convert kitti points(N, >=3) to voxels.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
feats (torch.Tensor): [N, C]. Points features to be reduced
|
| 21 |
+
into voxels.
|
| 22 |
+
coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates
|
| 23 |
+
(specifically multi-dim voxel index) of each points.
|
| 24 |
+
reduce_type (str, optional): Reduce op. support 'max', 'sum' and
|
| 25 |
+
'mean'. Default: 'max'.
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
voxel_feats (torch.Tensor): [M, C]. Reduced features, input
|
| 29 |
+
features that shares the same voxel coordinates are reduced to
|
| 30 |
+
one row.
|
| 31 |
+
voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates.
|
| 32 |
+
"""
|
| 33 |
+
results = ext_module.dynamic_point_to_voxel_forward(
|
| 34 |
+
feats, coors, reduce_type)
|
| 35 |
+
(voxel_feats, voxel_coors, point2voxel_map,
|
| 36 |
+
voxel_points_count) = results
|
| 37 |
+
ctx.reduce_type = reduce_type
|
| 38 |
+
ctx.save_for_backward(feats, voxel_feats, point2voxel_map,
|
| 39 |
+
voxel_points_count)
|
| 40 |
+
ctx.mark_non_differentiable(voxel_coors)
|
| 41 |
+
return voxel_feats, voxel_coors
|
| 42 |
+
|
| 43 |
+
@staticmethod
|
| 44 |
+
def backward(ctx, grad_voxel_feats, grad_voxel_coors=None):
|
| 45 |
+
(feats, voxel_feats, point2voxel_map,
|
| 46 |
+
voxel_points_count) = ctx.saved_tensors
|
| 47 |
+
grad_feats = torch.zeros_like(feats)
|
| 48 |
+
# TODO: whether to use index put or use cuda_backward
|
| 49 |
+
# To use index put, need point to voxel index
|
| 50 |
+
ext_module.dynamic_point_to_voxel_backward(
|
| 51 |
+
grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats,
|
| 52 |
+
point2voxel_map, voxel_points_count, ctx.reduce_type)
|
| 53 |
+
return grad_feats, None, None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
dynamic_scatter = _DynamicScatter.apply
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class DynamicScatter(nn.Module):
|
| 60 |
+
"""Scatters points into voxels, used in the voxel encoder with dynamic
|
| 61 |
+
voxelization.
|
| 62 |
+
|
| 63 |
+
Note:
|
| 64 |
+
The CPU and GPU implementation get the same output, but have numerical
|
| 65 |
+
difference after summation and division (e.g., 5e-7).
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
voxel_size (list): list [x, y, z] size of three dimension.
|
| 69 |
+
point_cloud_range (list): The coordinate range of points, [x_min,
|
| 70 |
+
y_min, z_min, x_max, y_max, z_max].
|
| 71 |
+
average_points (bool): whether to use avg pooling to scatter points
|
| 72 |
+
into voxel.
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
def __init__(self, voxel_size, point_cloud_range, average_points: bool):
|
| 76 |
+
super().__init__()
|
| 77 |
+
|
| 78 |
+
self.voxel_size = voxel_size
|
| 79 |
+
self.point_cloud_range = point_cloud_range
|
| 80 |
+
self.average_points = average_points
|
| 81 |
+
|
| 82 |
+
def forward_single(self, points, coors):
|
| 83 |
+
"""Scatters points into voxels.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
points (torch.Tensor): Points to be reduced into voxels.
|
| 87 |
+
coors (torch.Tensor): Corresponding voxel coordinates (specifically
|
| 88 |
+
multi-dim voxel index) of each points.
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
voxel_feats (torch.Tensor): Reduced features, input features that
|
| 92 |
+
shares the same voxel coordinates are reduced to one row.
|
| 93 |
+
voxel_coors (torch.Tensor): Voxel coordinates.
|
| 94 |
+
"""
|
| 95 |
+
reduce = 'mean' if self.average_points else 'max'
|
| 96 |
+
return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce)
|
| 97 |
+
|
| 98 |
+
def forward(self, points, coors):
|
| 99 |
+
"""Scatters points/features into voxels.
|
| 100 |
+
|
| 101 |
+
Args:
|
| 102 |
+
points (torch.Tensor): Points to be reduced into voxels.
|
| 103 |
+
coors (torch.Tensor): Corresponding voxel coordinates (specifically
|
| 104 |
+
multi-dim voxel index) of each points.
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
voxel_feats (torch.Tensor): Reduced features, input features that
|
| 108 |
+
shares the same voxel coordinates are reduced to one row.
|
| 109 |
+
voxel_coors (torch.Tensor): Voxel coordinates.
|
| 110 |
+
"""
|
| 111 |
+
if coors.size(-1) == 3:
|
| 112 |
+
return self.forward_single(points, coors)
|
| 113 |
+
else:
|
| 114 |
+
batch_size = coors[-1, 0] + 1
|
| 115 |
+
voxels, voxel_coors = [], []
|
| 116 |
+
for i in range(batch_size):
|
| 117 |
+
inds = torch.where(coors[:, 0] == i)
|
| 118 |
+
voxel, voxel_coor = self.forward_single(
|
| 119 |
+
points[inds], coors[inds][:, 1:])
|
| 120 |
+
coor_pad = nn.functional.pad(
|
| 121 |
+
voxel_coor, (1, 0), mode='constant', value=i)
|
| 122 |
+
voxel_coors.append(coor_pad)
|
| 123 |
+
voxels.append(voxel)
|
| 124 |
+
features = torch.cat(voxels, dim=0)
|
| 125 |
+
feature_coors = torch.cat(voxel_coors, dim=0)
|
| 126 |
+
|
| 127 |
+
return features, feature_coors
|
| 128 |
+
|
| 129 |
+
def __repr__(self):
|
| 130 |
+
s = self.__class__.__name__ + '('
|
| 131 |
+
s += 'voxel_size=' + str(self.voxel_size)
|
| 132 |
+
s += ', point_cloud_range=' + str(self.point_cloud_range)
|
| 133 |
+
s += ', average_points=' + str(self.average_points)
|
| 134 |
+
s += ')'
|
| 135 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/sync_bn.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
import torch.distributed as dist
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
from torch.autograd import Function
|
| 6 |
+
from torch.autograd.function import once_differentiable
|
| 7 |
+
from torch.nn.modules.module import Module
|
| 8 |
+
from torch.nn.parameter import Parameter
|
| 9 |
+
|
| 10 |
+
from annotator.mmpkg.mmcv.cnn import NORM_LAYERS
|
| 11 |
+
from ..utils import ext_loader
|
| 12 |
+
|
| 13 |
+
ext_module = ext_loader.load_ext('_ext', [
|
| 14 |
+
'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output',
|
| 15 |
+
'sync_bn_backward_param', 'sync_bn_backward_data'
|
| 16 |
+
])
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class SyncBatchNormFunction(Function):
|
| 20 |
+
|
| 21 |
+
@staticmethod
|
| 22 |
+
def symbolic(g, input, running_mean, running_var, weight, bias, momentum,
|
| 23 |
+
eps, group, group_size, stats_mode):
|
| 24 |
+
return g.op(
|
| 25 |
+
'mmcv::MMCVSyncBatchNorm',
|
| 26 |
+
input,
|
| 27 |
+
running_mean,
|
| 28 |
+
running_var,
|
| 29 |
+
weight,
|
| 30 |
+
bias,
|
| 31 |
+
momentum_f=momentum,
|
| 32 |
+
eps_f=eps,
|
| 33 |
+
group_i=group,
|
| 34 |
+
group_size_i=group_size,
|
| 35 |
+
stats_mode=stats_mode)
|
| 36 |
+
|
| 37 |
+
@staticmethod
|
| 38 |
+
def forward(self, input, running_mean, running_var, weight, bias, momentum,
|
| 39 |
+
eps, group, group_size, stats_mode):
|
| 40 |
+
self.momentum = momentum
|
| 41 |
+
self.eps = eps
|
| 42 |
+
self.group = group
|
| 43 |
+
self.group_size = group_size
|
| 44 |
+
self.stats_mode = stats_mode
|
| 45 |
+
|
| 46 |
+
assert isinstance(
|
| 47 |
+
input, (torch.HalfTensor, torch.FloatTensor,
|
| 48 |
+
torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \
|
| 49 |
+
f'only support Half or Float Tensor, but {input.type()}'
|
| 50 |
+
output = torch.zeros_like(input)
|
| 51 |
+
input3d = input.flatten(start_dim=2)
|
| 52 |
+
output3d = output.view_as(input3d)
|
| 53 |
+
num_channels = input3d.size(1)
|
| 54 |
+
|
| 55 |
+
# ensure mean/var/norm/std are initialized as zeros
|
| 56 |
+
# ``torch.empty()`` does not guarantee that
|
| 57 |
+
mean = torch.zeros(
|
| 58 |
+
num_channels, dtype=torch.float, device=input3d.device)
|
| 59 |
+
var = torch.zeros(
|
| 60 |
+
num_channels, dtype=torch.float, device=input3d.device)
|
| 61 |
+
norm = torch.zeros_like(
|
| 62 |
+
input3d, dtype=torch.float, device=input3d.device)
|
| 63 |
+
std = torch.zeros(
|
| 64 |
+
num_channels, dtype=torch.float, device=input3d.device)
|
| 65 |
+
|
| 66 |
+
batch_size = input3d.size(0)
|
| 67 |
+
if batch_size > 0:
|
| 68 |
+
ext_module.sync_bn_forward_mean(input3d, mean)
|
| 69 |
+
batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype)
|
| 70 |
+
else:
|
| 71 |
+
# skip updating mean and leave it as zeros when the input is empty
|
| 72 |
+
batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype)
|
| 73 |
+
|
| 74 |
+
# synchronize mean and the batch flag
|
| 75 |
+
vec = torch.cat([mean, batch_flag])
|
| 76 |
+
if self.stats_mode == 'N':
|
| 77 |
+
vec *= batch_size
|
| 78 |
+
if self.group_size > 1:
|
| 79 |
+
dist.all_reduce(vec, group=self.group)
|
| 80 |
+
total_batch = vec[-1].detach()
|
| 81 |
+
mean = vec[:num_channels]
|
| 82 |
+
|
| 83 |
+
if self.stats_mode == 'default':
|
| 84 |
+
mean = mean / self.group_size
|
| 85 |
+
elif self.stats_mode == 'N':
|
| 86 |
+
mean = mean / total_batch.clamp(min=1)
|
| 87 |
+
else:
|
| 88 |
+
raise NotImplementedError
|
| 89 |
+
|
| 90 |
+
# leave var as zeros when the input is empty
|
| 91 |
+
if batch_size > 0:
|
| 92 |
+
ext_module.sync_bn_forward_var(input3d, mean, var)
|
| 93 |
+
|
| 94 |
+
if self.stats_mode == 'N':
|
| 95 |
+
var *= batch_size
|
| 96 |
+
if self.group_size > 1:
|
| 97 |
+
dist.all_reduce(var, group=self.group)
|
| 98 |
+
|
| 99 |
+
if self.stats_mode == 'default':
|
| 100 |
+
var /= self.group_size
|
| 101 |
+
elif self.stats_mode == 'N':
|
| 102 |
+
var /= total_batch.clamp(min=1)
|
| 103 |
+
else:
|
| 104 |
+
raise NotImplementedError
|
| 105 |
+
|
| 106 |
+
# if the total batch size over all the ranks is zero,
|
| 107 |
+
# we should not update the statistics in the current batch
|
| 108 |
+
update_flag = total_batch.clamp(max=1)
|
| 109 |
+
momentum = update_flag * self.momentum
|
| 110 |
+
ext_module.sync_bn_forward_output(
|
| 111 |
+
input3d,
|
| 112 |
+
mean,
|
| 113 |
+
var,
|
| 114 |
+
weight,
|
| 115 |
+
bias,
|
| 116 |
+
running_mean,
|
| 117 |
+
running_var,
|
| 118 |
+
norm,
|
| 119 |
+
std,
|
| 120 |
+
output3d,
|
| 121 |
+
eps=self.eps,
|
| 122 |
+
momentum=momentum,
|
| 123 |
+
group_size=self.group_size)
|
| 124 |
+
self.save_for_backward(norm, std, weight)
|
| 125 |
+
return output
|
| 126 |
+
|
| 127 |
+
@staticmethod
|
| 128 |
+
@once_differentiable
|
| 129 |
+
def backward(self, grad_output):
|
| 130 |
+
norm, std, weight = self.saved_tensors
|
| 131 |
+
grad_weight = torch.zeros_like(weight)
|
| 132 |
+
grad_bias = torch.zeros_like(weight)
|
| 133 |
+
grad_input = torch.zeros_like(grad_output)
|
| 134 |
+
grad_output3d = grad_output.flatten(start_dim=2)
|
| 135 |
+
grad_input3d = grad_input.view_as(grad_output3d)
|
| 136 |
+
|
| 137 |
+
batch_size = grad_input3d.size(0)
|
| 138 |
+
if batch_size > 0:
|
| 139 |
+
ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight,
|
| 140 |
+
grad_bias)
|
| 141 |
+
|
| 142 |
+
# all reduce
|
| 143 |
+
if self.group_size > 1:
|
| 144 |
+
dist.all_reduce(grad_weight, group=self.group)
|
| 145 |
+
dist.all_reduce(grad_bias, group=self.group)
|
| 146 |
+
grad_weight /= self.group_size
|
| 147 |
+
grad_bias /= self.group_size
|
| 148 |
+
|
| 149 |
+
if batch_size > 0:
|
| 150 |
+
ext_module.sync_bn_backward_data(grad_output3d, weight,
|
| 151 |
+
grad_weight, grad_bias, norm, std,
|
| 152 |
+
grad_input3d)
|
| 153 |
+
|
| 154 |
+
return grad_input, None, None, grad_weight, grad_bias, \
|
| 155 |
+
None, None, None, None, None
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
@NORM_LAYERS.register_module(name='MMSyncBN')
|
| 159 |
+
class SyncBatchNorm(Module):
|
| 160 |
+
"""Synchronized Batch Normalization.
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
num_features (int): number of features/chennels in input tensor
|
| 164 |
+
eps (float, optional): a value added to the denominator for numerical
|
| 165 |
+
stability. Defaults to 1e-5.
|
| 166 |
+
momentum (float, optional): the value used for the running_mean and
|
| 167 |
+
running_var computation. Defaults to 0.1.
|
| 168 |
+
affine (bool, optional): whether to use learnable affine parameters.
|
| 169 |
+
Defaults to True.
|
| 170 |
+
track_running_stats (bool, optional): whether to track the running
|
| 171 |
+
mean and variance during training. When set to False, this
|
| 172 |
+
module does not track such statistics, and initializes statistics
|
| 173 |
+
buffers ``running_mean`` and ``running_var`` as ``None``. When
|
| 174 |
+
these buffers are ``None``, this module always uses batch
|
| 175 |
+
statistics in both training and eval modes. Defaults to True.
|
| 176 |
+
group (int, optional): synchronization of stats happen within
|
| 177 |
+
each process group individually. By default it is synchronization
|
| 178 |
+
across the whole world. Defaults to None.
|
| 179 |
+
stats_mode (str, optional): The statistical mode. Available options
|
| 180 |
+
includes ``'default'`` and ``'N'``. Defaults to 'default'.
|
| 181 |
+
When ``stats_mode=='default'``, it computes the overall statistics
|
| 182 |
+
using those from each worker with equal weight, i.e., the
|
| 183 |
+
statistics are synchronized and simply divied by ``group``. This
|
| 184 |
+
mode will produce inaccurate statistics when empty tensors occur.
|
| 185 |
+
When ``stats_mode=='N'``, it compute the overall statistics using
|
| 186 |
+
the total number of batches in each worker ignoring the number of
|
| 187 |
+
group, i.e., the statistics are synchronized and then divied by
|
| 188 |
+
the total batch ``N``. This mode is beneficial when empty tensors
|
| 189 |
+
occur during training, as it average the total mean by the real
|
| 190 |
+
number of batch.
|
| 191 |
+
"""
|
| 192 |
+
|
| 193 |
+
def __init__(self,
|
| 194 |
+
num_features,
|
| 195 |
+
eps=1e-5,
|
| 196 |
+
momentum=0.1,
|
| 197 |
+
affine=True,
|
| 198 |
+
track_running_stats=True,
|
| 199 |
+
group=None,
|
| 200 |
+
stats_mode='default'):
|
| 201 |
+
super(SyncBatchNorm, self).__init__()
|
| 202 |
+
self.num_features = num_features
|
| 203 |
+
self.eps = eps
|
| 204 |
+
self.momentum = momentum
|
| 205 |
+
self.affine = affine
|
| 206 |
+
self.track_running_stats = track_running_stats
|
| 207 |
+
group = dist.group.WORLD if group is None else group
|
| 208 |
+
self.group = group
|
| 209 |
+
self.group_size = dist.get_world_size(group)
|
| 210 |
+
assert stats_mode in ['default', 'N'], \
|
| 211 |
+
f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"'
|
| 212 |
+
self.stats_mode = stats_mode
|
| 213 |
+
if self.affine:
|
| 214 |
+
self.weight = Parameter(torch.Tensor(num_features))
|
| 215 |
+
self.bias = Parameter(torch.Tensor(num_features))
|
| 216 |
+
else:
|
| 217 |
+
self.register_parameter('weight', None)
|
| 218 |
+
self.register_parameter('bias', None)
|
| 219 |
+
if self.track_running_stats:
|
| 220 |
+
self.register_buffer('running_mean', torch.zeros(num_features))
|
| 221 |
+
self.register_buffer('running_var', torch.ones(num_features))
|
| 222 |
+
self.register_buffer('num_batches_tracked',
|
| 223 |
+
torch.tensor(0, dtype=torch.long))
|
| 224 |
+
else:
|
| 225 |
+
self.register_buffer('running_mean', None)
|
| 226 |
+
self.register_buffer('running_var', None)
|
| 227 |
+
self.register_buffer('num_batches_tracked', None)
|
| 228 |
+
self.reset_parameters()
|
| 229 |
+
|
| 230 |
+
def reset_running_stats(self):
|
| 231 |
+
if self.track_running_stats:
|
| 232 |
+
self.running_mean.zero_()
|
| 233 |
+
self.running_var.fill_(1)
|
| 234 |
+
self.num_batches_tracked.zero_()
|
| 235 |
+
|
| 236 |
+
def reset_parameters(self):
|
| 237 |
+
self.reset_running_stats()
|
| 238 |
+
if self.affine:
|
| 239 |
+
self.weight.data.uniform_() # pytorch use ones_()
|
| 240 |
+
self.bias.data.zero_()
|
| 241 |
+
|
| 242 |
+
def forward(self, input):
|
| 243 |
+
if input.dim() < 2:
|
| 244 |
+
raise ValueError(
|
| 245 |
+
f'expected at least 2D input, got {input.dim()}D input')
|
| 246 |
+
if self.momentum is None:
|
| 247 |
+
exponential_average_factor = 0.0
|
| 248 |
+
else:
|
| 249 |
+
exponential_average_factor = self.momentum
|
| 250 |
+
|
| 251 |
+
if self.training and self.track_running_stats:
|
| 252 |
+
if self.num_batches_tracked is not None:
|
| 253 |
+
self.num_batches_tracked += 1
|
| 254 |
+
if self.momentum is None: # use cumulative moving average
|
| 255 |
+
exponential_average_factor = 1.0 / float(
|
| 256 |
+
self.num_batches_tracked)
|
| 257 |
+
else: # use exponential moving average
|
| 258 |
+
exponential_average_factor = self.momentum
|
| 259 |
+
|
| 260 |
+
if self.training or not self.track_running_stats:
|
| 261 |
+
return SyncBatchNormFunction.apply(
|
| 262 |
+
input, self.running_mean, self.running_var, self.weight,
|
| 263 |
+
self.bias, exponential_average_factor, self.eps, self.group,
|
| 264 |
+
self.group_size, self.stats_mode)
|
| 265 |
+
else:
|
| 266 |
+
return F.batch_norm(input, self.running_mean, self.running_var,
|
| 267 |
+
self.weight, self.bias, False,
|
| 268 |
+
exponential_average_factor, self.eps)
|
| 269 |
+
|
| 270 |
+
def __repr__(self):
|
| 271 |
+
s = self.__class__.__name__
|
| 272 |
+
s += f'({self.num_features}, '
|
| 273 |
+
s += f'eps={self.eps}, '
|
| 274 |
+
s += f'momentum={self.momentum}, '
|
| 275 |
+
s += f'affine={self.affine}, '
|
| 276 |
+
s += f'track_running_stats={self.track_running_stats}, '
|
| 277 |
+
s += f'group_size={self.group_size},'
|
| 278 |
+
s += f'stats_mode={self.stats_mode})'
|
| 279 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_interpolate.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Tuple
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
|
| 6 |
+
from ..utils import ext_loader
|
| 7 |
+
|
| 8 |
+
ext_module = ext_loader.load_ext(
|
| 9 |
+
'_ext', ['three_interpolate_forward', 'three_interpolate_backward'])
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ThreeInterpolate(Function):
|
| 13 |
+
"""Performs weighted linear interpolation on 3 features.
|
| 14 |
+
|
| 15 |
+
Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
|
| 16 |
+
for more details.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
@staticmethod
|
| 20 |
+
def forward(ctx, features: torch.Tensor, indices: torch.Tensor,
|
| 21 |
+
weight: torch.Tensor) -> torch.Tensor:
|
| 22 |
+
"""
|
| 23 |
+
Args:
|
| 24 |
+
features (Tensor): (B, C, M) Features descriptors to be
|
| 25 |
+
interpolated
|
| 26 |
+
indices (Tensor): (B, n, 3) index three nearest neighbors
|
| 27 |
+
of the target features in features
|
| 28 |
+
weight (Tensor): (B, n, 3) weights of interpolation
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Tensor: (B, C, N) tensor of the interpolated features
|
| 32 |
+
"""
|
| 33 |
+
assert features.is_contiguous()
|
| 34 |
+
assert indices.is_contiguous()
|
| 35 |
+
assert weight.is_contiguous()
|
| 36 |
+
|
| 37 |
+
B, c, m = features.size()
|
| 38 |
+
n = indices.size(1)
|
| 39 |
+
ctx.three_interpolate_for_backward = (indices, weight, m)
|
| 40 |
+
output = torch.cuda.FloatTensor(B, c, n)
|
| 41 |
+
|
| 42 |
+
ext_module.three_interpolate_forward(
|
| 43 |
+
features, indices, weight, output, b=B, c=c, m=m, n=n)
|
| 44 |
+
return output
|
| 45 |
+
|
| 46 |
+
@staticmethod
|
| 47 |
+
def backward(
|
| 48 |
+
ctx, grad_out: torch.Tensor
|
| 49 |
+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
| 50 |
+
"""
|
| 51 |
+
Args:
|
| 52 |
+
grad_out (Tensor): (B, C, N) tensor with gradients of outputs
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Tensor: (B, C, M) tensor with gradients of features
|
| 56 |
+
"""
|
| 57 |
+
idx, weight, m = ctx.three_interpolate_for_backward
|
| 58 |
+
B, c, n = grad_out.size()
|
| 59 |
+
|
| 60 |
+
grad_features = torch.cuda.FloatTensor(B, c, m).zero_()
|
| 61 |
+
grad_out_data = grad_out.data.contiguous()
|
| 62 |
+
|
| 63 |
+
ext_module.three_interpolate_backward(
|
| 64 |
+
grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m)
|
| 65 |
+
return grad_features, None, None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
three_interpolate = ThreeInterpolate.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_nn.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Tuple
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
|
| 6 |
+
from ..utils import ext_loader
|
| 7 |
+
|
| 8 |
+
ext_module = ext_loader.load_ext('_ext', ['three_nn_forward'])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ThreeNN(Function):
|
| 12 |
+
"""Find the top-3 nearest neighbors of the target set from the source set.
|
| 13 |
+
|
| 14 |
+
Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
|
| 15 |
+
for more details.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
@staticmethod
|
| 19 |
+
def forward(ctx, target: torch.Tensor,
|
| 20 |
+
source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
| 21 |
+
"""
|
| 22 |
+
Args:
|
| 23 |
+
target (Tensor): shape (B, N, 3), points set that needs to
|
| 24 |
+
find the nearest neighbors.
|
| 25 |
+
source (Tensor): shape (B, M, 3), points set that is used
|
| 26 |
+
to find the nearest neighbors of points in target set.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Tensor: shape (B, N, 3), L2 distance of each point in target
|
| 30 |
+
set to their corresponding nearest neighbors.
|
| 31 |
+
"""
|
| 32 |
+
target = target.contiguous()
|
| 33 |
+
source = source.contiguous()
|
| 34 |
+
|
| 35 |
+
B, N, _ = target.size()
|
| 36 |
+
m = source.size(1)
|
| 37 |
+
dist2 = torch.cuda.FloatTensor(B, N, 3)
|
| 38 |
+
idx = torch.cuda.IntTensor(B, N, 3)
|
| 39 |
+
|
| 40 |
+
ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m)
|
| 41 |
+
if torch.__version__ != 'parrots':
|
| 42 |
+
ctx.mark_non_differentiable(idx)
|
| 43 |
+
|
| 44 |
+
return torch.sqrt(dist2), idx
|
| 45 |
+
|
| 46 |
+
@staticmethod
|
| 47 |
+
def backward(ctx, a=None, b=None):
|
| 48 |
+
return None, None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
three_nn = ThreeNN.apply
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/tin_shift.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
# Code reference from "Temporal Interlacing Network"
|
| 3 |
+
# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py
|
| 4 |
+
# Hao Shao, Shengju Qian, Yu Liu
|
| 5 |
+
# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
import torch.nn as nn
|
| 9 |
+
from torch.autograd import Function
|
| 10 |
+
|
| 11 |
+
from ..utils import ext_loader
|
| 12 |
+
|
| 13 |
+
ext_module = ext_loader.load_ext('_ext',
|
| 14 |
+
['tin_shift_forward', 'tin_shift_backward'])
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TINShiftFunction(Function):
|
| 18 |
+
|
| 19 |
+
@staticmethod
|
| 20 |
+
def forward(ctx, input, shift):
|
| 21 |
+
C = input.size(2)
|
| 22 |
+
num_segments = shift.size(1)
|
| 23 |
+
if C // num_segments <= 0 or C % num_segments != 0:
|
| 24 |
+
raise ValueError('C should be a multiple of num_segments, '
|
| 25 |
+
f'but got C={C} and num_segments={num_segments}.')
|
| 26 |
+
|
| 27 |
+
ctx.save_for_backward(shift)
|
| 28 |
+
|
| 29 |
+
out = torch.zeros_like(input)
|
| 30 |
+
ext_module.tin_shift_forward(input, shift, out)
|
| 31 |
+
|
| 32 |
+
return out
|
| 33 |
+
|
| 34 |
+
@staticmethod
|
| 35 |
+
def backward(ctx, grad_output):
|
| 36 |
+
|
| 37 |
+
shift = ctx.saved_tensors[0]
|
| 38 |
+
data_grad_input = grad_output.new(*grad_output.size()).zero_()
|
| 39 |
+
shift_grad_input = shift.new(*shift.size()).zero_()
|
| 40 |
+
ext_module.tin_shift_backward(grad_output, shift, data_grad_input)
|
| 41 |
+
|
| 42 |
+
return data_grad_input, shift_grad_input
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
tin_shift = TINShiftFunction.apply
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class TINShift(nn.Module):
|
| 49 |
+
"""Temporal Interlace Shift.
|
| 50 |
+
|
| 51 |
+
Temporal Interlace shift is a differentiable temporal-wise frame shifting
|
| 52 |
+
which is proposed in "Temporal Interlacing Network"
|
| 53 |
+
|
| 54 |
+
Please refer to https://arxiv.org/abs/2001.06499 for more details.
|
| 55 |
+
Code is modified from https://github.com/mit-han-lab/temporal-shift-module
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
def forward(self, input, shift):
|
| 59 |
+
"""Perform temporal interlace shift.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
input (Tensor): Feature map with shape [N, num_segments, C, H * W].
|
| 63 |
+
shift (Tensor): Shift tensor with shape [N, num_segments].
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Feature map after temporal interlace shift.
|
| 67 |
+
"""
|
| 68 |
+
return tin_shift(input, shift)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/upfirdn2d.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
|
| 4 |
+
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
|
| 5 |
+
# Augmentation (ADA)
|
| 6 |
+
# =======================================================================
|
| 7 |
+
|
| 8 |
+
# 1. Definitions
|
| 9 |
+
|
| 10 |
+
# "Licensor" means any person or entity that distributes its Work.
|
| 11 |
+
|
| 12 |
+
# "Software" means the original work of authorship made available under
|
| 13 |
+
# this License.
|
| 14 |
+
|
| 15 |
+
# "Work" means the Software and any additions to or derivative works of
|
| 16 |
+
# the Software that are made available under this License.
|
| 17 |
+
|
| 18 |
+
# The terms "reproduce," "reproduction," "derivative works," and
|
| 19 |
+
# "distribution" have the meaning as provided under U.S. copyright law;
|
| 20 |
+
# provided, however, that for the purposes of this License, derivative
|
| 21 |
+
# works shall not include works that remain separable from, or merely
|
| 22 |
+
# link (or bind by name) to the interfaces of, the Work.
|
| 23 |
+
|
| 24 |
+
# Works, including the Software, are "made available" under this License
|
| 25 |
+
# by including in or with the Work either (a) a copyright notice
|
| 26 |
+
# referencing the applicability of this License to the Work, or (b) a
|
| 27 |
+
# copy of this License.
|
| 28 |
+
|
| 29 |
+
# 2. License Grants
|
| 30 |
+
|
| 31 |
+
# 2.1 Copyright Grant. Subject to the terms and conditions of this
|
| 32 |
+
# License, each Licensor grants to you a perpetual, worldwide,
|
| 33 |
+
# non-exclusive, royalty-free, copyright license to reproduce,
|
| 34 |
+
# prepare derivative works of, publicly display, publicly perform,
|
| 35 |
+
# sublicense and distribute its Work and any resulting derivative
|
| 36 |
+
# works in any form.
|
| 37 |
+
|
| 38 |
+
# 3. Limitations
|
| 39 |
+
|
| 40 |
+
# 3.1 Redistribution. You may reproduce or distribute the Work only
|
| 41 |
+
# if (a) you do so under this License, (b) you include a complete
|
| 42 |
+
# copy of this License with your distribution, and (c) you retain
|
| 43 |
+
# without modification any copyright, patent, trademark, or
|
| 44 |
+
# attribution notices that are present in the Work.
|
| 45 |
+
|
| 46 |
+
# 3.2 Derivative Works. You may specify that additional or different
|
| 47 |
+
# terms apply to the use, reproduction, and distribution of your
|
| 48 |
+
# derivative works of the Work ("Your Terms") only if (a) Your Terms
|
| 49 |
+
# provide that the use limitation in Section 3.3 applies to your
|
| 50 |
+
# derivative works, and (b) you identify the specific derivative
|
| 51 |
+
# works that are subject to Your Terms. Notwithstanding Your Terms,
|
| 52 |
+
# this License (including the redistribution requirements in Section
|
| 53 |
+
# 3.1) will continue to apply to the Work itself.
|
| 54 |
+
|
| 55 |
+
# 3.3 Use Limitation. The Work and any derivative works thereof only
|
| 56 |
+
# may be used or intended for use non-commercially. Notwithstanding
|
| 57 |
+
# the foregoing, NVIDIA and its affiliates may use the Work and any
|
| 58 |
+
# derivative works commercially. As used herein, "non-commercially"
|
| 59 |
+
# means for research or evaluation purposes only.
|
| 60 |
+
|
| 61 |
+
# 3.4 Patent Claims. If you bring or threaten to bring a patent claim
|
| 62 |
+
# against any Licensor (including any claim, cross-claim or
|
| 63 |
+
# counterclaim in a lawsuit) to enforce any patents that you allege
|
| 64 |
+
# are infringed by any Work, then your rights under this License from
|
| 65 |
+
# such Licensor (including the grant in Section 2.1) will terminate
|
| 66 |
+
# immediately.
|
| 67 |
+
|
| 68 |
+
# 3.5 Trademarks. This License does not grant any rights to use any
|
| 69 |
+
# Licensor’s or its affiliates’ names, logos, or trademarks, except
|
| 70 |
+
# as necessary to reproduce the notices described in this License.
|
| 71 |
+
|
| 72 |
+
# 3.6 Termination. If you violate any term of this License, then your
|
| 73 |
+
# rights under this License (including the grant in Section 2.1) will
|
| 74 |
+
# terminate immediately.
|
| 75 |
+
|
| 76 |
+
# 4. Disclaimer of Warranty.
|
| 77 |
+
|
| 78 |
+
# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
| 79 |
+
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
|
| 80 |
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
|
| 81 |
+
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
|
| 82 |
+
# THIS LICENSE.
|
| 83 |
+
|
| 84 |
+
# 5. Limitation of Liability.
|
| 85 |
+
|
| 86 |
+
# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
|
| 87 |
+
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
|
| 88 |
+
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
|
| 89 |
+
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
|
| 90 |
+
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
|
| 91 |
+
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
|
| 92 |
+
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
|
| 93 |
+
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
|
| 94 |
+
# THE POSSIBILITY OF SUCH DAMAGES.
|
| 95 |
+
|
| 96 |
+
# =======================================================================
|
| 97 |
+
|
| 98 |
+
import torch
|
| 99 |
+
from torch.autograd import Function
|
| 100 |
+
from torch.nn import functional as F
|
| 101 |
+
|
| 102 |
+
from annotator.mmpkg.mmcv.utils import to_2tuple
|
| 103 |
+
from ..utils import ext_loader
|
| 104 |
+
|
| 105 |
+
upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d'])
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class UpFirDn2dBackward(Function):
|
| 109 |
+
|
| 110 |
+
@staticmethod
|
| 111 |
+
def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad,
|
| 112 |
+
in_size, out_size):
|
| 113 |
+
|
| 114 |
+
up_x, up_y = up
|
| 115 |
+
down_x, down_y = down
|
| 116 |
+
g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad
|
| 117 |
+
|
| 118 |
+
grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
|
| 119 |
+
|
| 120 |
+
grad_input = upfirdn2d_ext.upfirdn2d(
|
| 121 |
+
grad_output,
|
| 122 |
+
grad_kernel,
|
| 123 |
+
up_x=down_x,
|
| 124 |
+
up_y=down_y,
|
| 125 |
+
down_x=up_x,
|
| 126 |
+
down_y=up_y,
|
| 127 |
+
pad_x0=g_pad_x0,
|
| 128 |
+
pad_x1=g_pad_x1,
|
| 129 |
+
pad_y0=g_pad_y0,
|
| 130 |
+
pad_y1=g_pad_y1)
|
| 131 |
+
grad_input = grad_input.view(in_size[0], in_size[1], in_size[2],
|
| 132 |
+
in_size[3])
|
| 133 |
+
|
| 134 |
+
ctx.save_for_backward(kernel)
|
| 135 |
+
|
| 136 |
+
pad_x0, pad_x1, pad_y0, pad_y1 = pad
|
| 137 |
+
|
| 138 |
+
ctx.up_x = up_x
|
| 139 |
+
ctx.up_y = up_y
|
| 140 |
+
ctx.down_x = down_x
|
| 141 |
+
ctx.down_y = down_y
|
| 142 |
+
ctx.pad_x0 = pad_x0
|
| 143 |
+
ctx.pad_x1 = pad_x1
|
| 144 |
+
ctx.pad_y0 = pad_y0
|
| 145 |
+
ctx.pad_y1 = pad_y1
|
| 146 |
+
ctx.in_size = in_size
|
| 147 |
+
ctx.out_size = out_size
|
| 148 |
+
|
| 149 |
+
return grad_input
|
| 150 |
+
|
| 151 |
+
@staticmethod
|
| 152 |
+
def backward(ctx, gradgrad_input):
|
| 153 |
+
kernel, = ctx.saved_tensors
|
| 154 |
+
|
| 155 |
+
gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2],
|
| 156 |
+
ctx.in_size[3], 1)
|
| 157 |
+
|
| 158 |
+
gradgrad_out = upfirdn2d_ext.upfirdn2d(
|
| 159 |
+
gradgrad_input,
|
| 160 |
+
kernel,
|
| 161 |
+
up_x=ctx.up_x,
|
| 162 |
+
up_y=ctx.up_y,
|
| 163 |
+
down_x=ctx.down_x,
|
| 164 |
+
down_y=ctx.down_y,
|
| 165 |
+
pad_x0=ctx.pad_x0,
|
| 166 |
+
pad_x1=ctx.pad_x1,
|
| 167 |
+
pad_y0=ctx.pad_y0,
|
| 168 |
+
pad_y1=ctx.pad_y1)
|
| 169 |
+
# gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0],
|
| 170 |
+
# ctx.out_size[1], ctx.in_size[3])
|
| 171 |
+
gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1],
|
| 172 |
+
ctx.out_size[0], ctx.out_size[1])
|
| 173 |
+
|
| 174 |
+
return gradgrad_out, None, None, None, None, None, None, None, None
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
class UpFirDn2d(Function):
|
| 178 |
+
|
| 179 |
+
@staticmethod
|
| 180 |
+
def forward(ctx, input, kernel, up, down, pad):
|
| 181 |
+
up_x, up_y = up
|
| 182 |
+
down_x, down_y = down
|
| 183 |
+
pad_x0, pad_x1, pad_y0, pad_y1 = pad
|
| 184 |
+
|
| 185 |
+
kernel_h, kernel_w = kernel.shape
|
| 186 |
+
batch, channel, in_h, in_w = input.shape
|
| 187 |
+
ctx.in_size = input.shape
|
| 188 |
+
|
| 189 |
+
input = input.reshape(-1, in_h, in_w, 1)
|
| 190 |
+
|
| 191 |
+
ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
|
| 192 |
+
|
| 193 |
+
out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
|
| 194 |
+
out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
|
| 195 |
+
ctx.out_size = (out_h, out_w)
|
| 196 |
+
|
| 197 |
+
ctx.up = (up_x, up_y)
|
| 198 |
+
ctx.down = (down_x, down_y)
|
| 199 |
+
ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)
|
| 200 |
+
|
| 201 |
+
g_pad_x0 = kernel_w - pad_x0 - 1
|
| 202 |
+
g_pad_y0 = kernel_h - pad_y0 - 1
|
| 203 |
+
g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
|
| 204 |
+
g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1
|
| 205 |
+
|
| 206 |
+
ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
|
| 207 |
+
|
| 208 |
+
out = upfirdn2d_ext.upfirdn2d(
|
| 209 |
+
input,
|
| 210 |
+
kernel,
|
| 211 |
+
up_x=up_x,
|
| 212 |
+
up_y=up_y,
|
| 213 |
+
down_x=down_x,
|
| 214 |
+
down_y=down_y,
|
| 215 |
+
pad_x0=pad_x0,
|
| 216 |
+
pad_x1=pad_x1,
|
| 217 |
+
pad_y0=pad_y0,
|
| 218 |
+
pad_y1=pad_y1)
|
| 219 |
+
# out = out.view(major, out_h, out_w, minor)
|
| 220 |
+
out = out.view(-1, channel, out_h, out_w)
|
| 221 |
+
|
| 222 |
+
return out
|
| 223 |
+
|
| 224 |
+
@staticmethod
|
| 225 |
+
def backward(ctx, grad_output):
|
| 226 |
+
kernel, grad_kernel = ctx.saved_tensors
|
| 227 |
+
|
| 228 |
+
grad_input = UpFirDn2dBackward.apply(
|
| 229 |
+
grad_output,
|
| 230 |
+
kernel,
|
| 231 |
+
grad_kernel,
|
| 232 |
+
ctx.up,
|
| 233 |
+
ctx.down,
|
| 234 |
+
ctx.pad,
|
| 235 |
+
ctx.g_pad,
|
| 236 |
+
ctx.in_size,
|
| 237 |
+
ctx.out_size,
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
return grad_input, None, None, None, None
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
|
| 244 |
+
"""UpFRIDn for 2d features.
|
| 245 |
+
|
| 246 |
+
UpFIRDn is short for upsample, apply FIR filter and downsample. More
|
| 247 |
+
details can be found in:
|
| 248 |
+
https://www.mathworks.com/help/signal/ref/upfirdn.html
|
| 249 |
+
|
| 250 |
+
Args:
|
| 251 |
+
input (Tensor): Tensor with shape of (n, c, h, w).
|
| 252 |
+
kernel (Tensor): Filter kernel.
|
| 253 |
+
up (int | tuple[int], optional): Upsampling factor. If given a number,
|
| 254 |
+
we will use this factor for the both height and width side.
|
| 255 |
+
Defaults to 1.
|
| 256 |
+
down (int | tuple[int], optional): Downsampling factor. If given a
|
| 257 |
+
number, we will use this factor for the both height and width side.
|
| 258 |
+
Defaults to 1.
|
| 259 |
+
pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or
|
| 260 |
+
(x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0).
|
| 261 |
+
|
| 262 |
+
Returns:
|
| 263 |
+
Tensor: Tensor after UpFIRDn.
|
| 264 |
+
"""
|
| 265 |
+
if input.device.type == 'cpu':
|
| 266 |
+
if len(pad) == 2:
|
| 267 |
+
pad = (pad[0], pad[1], pad[0], pad[1])
|
| 268 |
+
|
| 269 |
+
up = to_2tuple(up)
|
| 270 |
+
|
| 271 |
+
down = to_2tuple(down)
|
| 272 |
+
|
| 273 |
+
out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1],
|
| 274 |
+
pad[0], pad[1], pad[2], pad[3])
|
| 275 |
+
else:
|
| 276 |
+
_up = to_2tuple(up)
|
| 277 |
+
|
| 278 |
+
_down = to_2tuple(down)
|
| 279 |
+
|
| 280 |
+
if len(pad) == 4:
|
| 281 |
+
_pad = pad
|
| 282 |
+
elif len(pad) == 2:
|
| 283 |
+
_pad = (pad[0], pad[1], pad[0], pad[1])
|
| 284 |
+
|
| 285 |
+
out = UpFirDn2d.apply(input, kernel, _up, _down, _pad)
|
| 286 |
+
|
| 287 |
+
return out
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
|
| 291 |
+
pad_y0, pad_y1):
|
| 292 |
+
_, channel, in_h, in_w = input.shape
|
| 293 |
+
input = input.reshape(-1, in_h, in_w, 1)
|
| 294 |
+
|
| 295 |
+
_, in_h, in_w, minor = input.shape
|
| 296 |
+
kernel_h, kernel_w = kernel.shape
|
| 297 |
+
|
| 298 |
+
out = input.view(-1, in_h, 1, in_w, 1, minor)
|
| 299 |
+
out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
|
| 300 |
+
out = out.view(-1, in_h * up_y, in_w * up_x, minor)
|
| 301 |
+
|
| 302 |
+
out = F.pad(
|
| 303 |
+
out,
|
| 304 |
+
[0, 0,
|
| 305 |
+
max(pad_x0, 0),
|
| 306 |
+
max(pad_x1, 0),
|
| 307 |
+
max(pad_y0, 0),
|
| 308 |
+
max(pad_y1, 0)])
|
| 309 |
+
out = out[:,
|
| 310 |
+
max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0),
|
| 311 |
+
max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ]
|
| 312 |
+
|
| 313 |
+
out = out.permute(0, 3, 1, 2)
|
| 314 |
+
out = out.reshape(
|
| 315 |
+
[-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])
|
| 316 |
+
w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
|
| 317 |
+
out = F.conv2d(out, w)
|
| 318 |
+
out = out.reshape(
|
| 319 |
+
-1,
|
| 320 |
+
minor,
|
| 321 |
+
in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
|
| 322 |
+
in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
|
| 323 |
+
)
|
| 324 |
+
out = out.permute(0, 2, 3, 1)
|
| 325 |
+
out = out[:, ::down_y, ::down_x, :]
|
| 326 |
+
|
| 327 |
+
out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
|
| 328 |
+
out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
|
| 329 |
+
|
| 330 |
+
return out.view(-1, channel, out_h, out_w)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/voxelize.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch import nn
|
| 4 |
+
from torch.autograd import Function
|
| 5 |
+
from torch.nn.modules.utils import _pair
|
| 6 |
+
|
| 7 |
+
from ..utils import ext_loader
|
| 8 |
+
|
| 9 |
+
ext_module = ext_loader.load_ext(
|
| 10 |
+
'_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward'])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class _Voxelization(Function):
|
| 14 |
+
|
| 15 |
+
@staticmethod
|
| 16 |
+
def forward(ctx,
|
| 17 |
+
points,
|
| 18 |
+
voxel_size,
|
| 19 |
+
coors_range,
|
| 20 |
+
max_points=35,
|
| 21 |
+
max_voxels=20000):
|
| 22 |
+
"""Convert kitti points(N, >=3) to voxels.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points
|
| 26 |
+
and points[:, 3:] contain other information like reflectivity.
|
| 27 |
+
voxel_size (tuple or float): The size of voxel with the shape of
|
| 28 |
+
[3].
|
| 29 |
+
coors_range (tuple or float): The coordinate range of voxel with
|
| 30 |
+
the shape of [6].
|
| 31 |
+
max_points (int, optional): maximum points contained in a voxel. if
|
| 32 |
+
max_points=-1, it means using dynamic_voxelize. Default: 35.
|
| 33 |
+
max_voxels (int, optional): maximum voxels this function create.
|
| 34 |
+
for second, 20000 is a good choice. Users should shuffle points
|
| 35 |
+
before call this function because max_voxels may drop points.
|
| 36 |
+
Default: 20000.
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
voxels_out (torch.Tensor): Output voxels with the shape of [M,
|
| 40 |
+
max_points, ndim]. Only contain points and returned when
|
| 41 |
+
max_points != -1.
|
| 42 |
+
coors_out (torch.Tensor): Output coordinates with the shape of
|
| 43 |
+
[M, 3].
|
| 44 |
+
num_points_per_voxel_out (torch.Tensor): Num points per voxel with
|
| 45 |
+
the shape of [M]. Only returned when max_points != -1.
|
| 46 |
+
"""
|
| 47 |
+
if max_points == -1 or max_voxels == -1:
|
| 48 |
+
coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int)
|
| 49 |
+
ext_module.dynamic_voxelize_forward(points, coors, voxel_size,
|
| 50 |
+
coors_range, 3)
|
| 51 |
+
return coors
|
| 52 |
+
else:
|
| 53 |
+
voxels = points.new_zeros(
|
| 54 |
+
size=(max_voxels, max_points, points.size(1)))
|
| 55 |
+
coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int)
|
| 56 |
+
num_points_per_voxel = points.new_zeros(
|
| 57 |
+
size=(max_voxels, ), dtype=torch.int)
|
| 58 |
+
voxel_num = ext_module.hard_voxelize_forward(
|
| 59 |
+
points, voxels, coors, num_points_per_voxel, voxel_size,
|
| 60 |
+
coors_range, max_points, max_voxels, 3)
|
| 61 |
+
# select the valid voxels
|
| 62 |
+
voxels_out = voxels[:voxel_num]
|
| 63 |
+
coors_out = coors[:voxel_num]
|
| 64 |
+
num_points_per_voxel_out = num_points_per_voxel[:voxel_num]
|
| 65 |
+
return voxels_out, coors_out, num_points_per_voxel_out
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
voxelization = _Voxelization.apply
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class Voxelization(nn.Module):
|
| 72 |
+
"""Convert kitti points(N, >=3) to voxels.
|
| 73 |
+
|
| 74 |
+
Please refer to `PVCNN <https://arxiv.org/abs/1907.03739>`_ for more
|
| 75 |
+
details.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
voxel_size (tuple or float): The size of voxel with the shape of [3].
|
| 79 |
+
point_cloud_range (tuple or float): The coordinate range of voxel with
|
| 80 |
+
the shape of [6].
|
| 81 |
+
max_num_points (int): maximum points contained in a voxel. if
|
| 82 |
+
max_points=-1, it means using dynamic_voxelize.
|
| 83 |
+
max_voxels (int, optional): maximum voxels this function create.
|
| 84 |
+
for second, 20000 is a good choice. Users should shuffle points
|
| 85 |
+
before call this function because max_voxels may drop points.
|
| 86 |
+
Default: 20000.
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
def __init__(self,
|
| 90 |
+
voxel_size,
|
| 91 |
+
point_cloud_range,
|
| 92 |
+
max_num_points,
|
| 93 |
+
max_voxels=20000):
|
| 94 |
+
super().__init__()
|
| 95 |
+
|
| 96 |
+
self.voxel_size = voxel_size
|
| 97 |
+
self.point_cloud_range = point_cloud_range
|
| 98 |
+
self.max_num_points = max_num_points
|
| 99 |
+
if isinstance(max_voxels, tuple):
|
| 100 |
+
self.max_voxels = max_voxels
|
| 101 |
+
else:
|
| 102 |
+
self.max_voxels = _pair(max_voxels)
|
| 103 |
+
|
| 104 |
+
point_cloud_range = torch.tensor(
|
| 105 |
+
point_cloud_range, dtype=torch.float32)
|
| 106 |
+
voxel_size = torch.tensor(voxel_size, dtype=torch.float32)
|
| 107 |
+
grid_size = (point_cloud_range[3:] -
|
| 108 |
+
point_cloud_range[:3]) / voxel_size
|
| 109 |
+
grid_size = torch.round(grid_size).long()
|
| 110 |
+
input_feat_shape = grid_size[:2]
|
| 111 |
+
self.grid_size = grid_size
|
| 112 |
+
# the origin shape is as [x-len, y-len, z-len]
|
| 113 |
+
# [w, h, d] -> [d, h, w]
|
| 114 |
+
self.pcd_shape = [*input_feat_shape, 1][::-1]
|
| 115 |
+
|
| 116 |
+
def forward(self, input):
|
| 117 |
+
if self.training:
|
| 118 |
+
max_voxels = self.max_voxels[0]
|
| 119 |
+
else:
|
| 120 |
+
max_voxels = self.max_voxels[1]
|
| 121 |
+
|
| 122 |
+
return voxelization(input, self.voxel_size, self.point_cloud_range,
|
| 123 |
+
self.max_num_points, max_voxels)
|
| 124 |
+
|
| 125 |
+
def __repr__(self):
|
| 126 |
+
s = self.__class__.__name__ + '('
|
| 127 |
+
s += 'voxel_size=' + str(self.voxel_size)
|
| 128 |
+
s += ', point_cloud_range=' + str(self.point_cloud_range)
|
| 129 |
+
s += ', max_num_points=' + str(self.max_num_points)
|
| 130 |
+
s += ', max_voxels=' + str(self.max_voxels)
|
| 131 |
+
s += ')'
|
| 132 |
+
return s
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from .collate import collate
|
| 3 |
+
from .data_container import DataContainer
|
| 4 |
+
from .data_parallel import MMDataParallel
|
| 5 |
+
from .distributed import MMDistributedDataParallel
|
| 6 |
+
from .registry import MODULE_WRAPPERS
|
| 7 |
+
from .scatter_gather import scatter, scatter_kwargs
|
| 8 |
+
from .utils import is_module_wrapper
|
| 9 |
+
|
| 10 |
+
__all__ = [
|
| 11 |
+
'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel',
|
| 12 |
+
'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS'
|
| 13 |
+
]
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/_functions.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import torch
|
| 3 |
+
from torch.nn.parallel._functions import _get_stream
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def scatter(input, devices, streams=None):
|
| 7 |
+
"""Scatters tensor across multiple GPUs."""
|
| 8 |
+
if streams is None:
|
| 9 |
+
streams = [None] * len(devices)
|
| 10 |
+
|
| 11 |
+
if isinstance(input, list):
|
| 12 |
+
chunk_size = (len(input) - 1) // len(devices) + 1
|
| 13 |
+
outputs = [
|
| 14 |
+
scatter(input[i], [devices[i // chunk_size]],
|
| 15 |
+
[streams[i // chunk_size]]) for i in range(len(input))
|
| 16 |
+
]
|
| 17 |
+
return outputs
|
| 18 |
+
elif isinstance(input, torch.Tensor):
|
| 19 |
+
output = input.contiguous()
|
| 20 |
+
# TODO: copy to a pinned buffer first (if copying from CPU)
|
| 21 |
+
stream = streams[0] if output.numel() > 0 else None
|
| 22 |
+
if devices != [-1]:
|
| 23 |
+
with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
|
| 24 |
+
output = output.cuda(devices[0], non_blocking=True)
|
| 25 |
+
else:
|
| 26 |
+
# unsqueeze the first dimension thus the tensor's shape is the
|
| 27 |
+
# same as those scattered with GPU.
|
| 28 |
+
output = output.unsqueeze(0)
|
| 29 |
+
return output
|
| 30 |
+
else:
|
| 31 |
+
raise Exception(f'Unknown type {type(input)}.')
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def synchronize_stream(output, devices, streams):
|
| 35 |
+
if isinstance(output, list):
|
| 36 |
+
chunk_size = len(output) // len(devices)
|
| 37 |
+
for i in range(len(devices)):
|
| 38 |
+
for j in range(chunk_size):
|
| 39 |
+
synchronize_stream(output[i * chunk_size + j], [devices[i]],
|
| 40 |
+
[streams[i]])
|
| 41 |
+
elif isinstance(output, torch.Tensor):
|
| 42 |
+
if output.numel() != 0:
|
| 43 |
+
with torch.cuda.device(devices[0]):
|
| 44 |
+
main_stream = torch.cuda.current_stream()
|
| 45 |
+
main_stream.wait_stream(streams[0])
|
| 46 |
+
output.record_stream(main_stream)
|
| 47 |
+
else:
|
| 48 |
+
raise Exception(f'Unknown type {type(output)}.')
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def get_input_device(input):
|
| 52 |
+
if isinstance(input, list):
|
| 53 |
+
for item in input:
|
| 54 |
+
input_device = get_input_device(item)
|
| 55 |
+
if input_device != -1:
|
| 56 |
+
return input_device
|
| 57 |
+
return -1
|
| 58 |
+
elif isinstance(input, torch.Tensor):
|
| 59 |
+
return input.get_device() if input.is_cuda else -1
|
| 60 |
+
else:
|
| 61 |
+
raise Exception(f'Unknown type {type(input)}.')
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class Scatter:
|
| 65 |
+
|
| 66 |
+
@staticmethod
|
| 67 |
+
def forward(target_gpus, input):
|
| 68 |
+
input_device = get_input_device(input)
|
| 69 |
+
streams = None
|
| 70 |
+
if input_device == -1 and target_gpus != [-1]:
|
| 71 |
+
# Perform CPU to GPU copies in a background stream
|
| 72 |
+
streams = [_get_stream(device) for device in target_gpus]
|
| 73 |
+
|
| 74 |
+
outputs = scatter(input, target_gpus, streams)
|
| 75 |
+
# Synchronize with the copy stream
|
| 76 |
+
if streams is not None:
|
| 77 |
+
synchronize_stream(outputs, target_gpus, streams)
|
| 78 |
+
|
| 79 |
+
return tuple(outputs)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/collate.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from collections.abc import Mapping, Sequence
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
from torch.utils.data.dataloader import default_collate
|
| 7 |
+
|
| 8 |
+
from .data_container import DataContainer
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def collate(batch, samples_per_gpu=1):
|
| 12 |
+
"""Puts each data field into a tensor/DataContainer with outer dimension
|
| 13 |
+
batch size.
|
| 14 |
+
|
| 15 |
+
Extend default_collate to add support for
|
| 16 |
+
:type:`~mmcv.parallel.DataContainer`. There are 3 cases.
|
| 17 |
+
|
| 18 |
+
1. cpu_only = True, e.g., meta data
|
| 19 |
+
2. cpu_only = False, stack = True, e.g., images tensors
|
| 20 |
+
3. cpu_only = False, stack = False, e.g., gt bboxes
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
if not isinstance(batch, Sequence):
|
| 24 |
+
raise TypeError(f'{batch.dtype} is not supported.')
|
| 25 |
+
|
| 26 |
+
if isinstance(batch[0], DataContainer):
|
| 27 |
+
stacked = []
|
| 28 |
+
if batch[0].cpu_only:
|
| 29 |
+
for i in range(0, len(batch), samples_per_gpu):
|
| 30 |
+
stacked.append(
|
| 31 |
+
[sample.data for sample in batch[i:i + samples_per_gpu]])
|
| 32 |
+
return DataContainer(
|
| 33 |
+
stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
|
| 34 |
+
elif batch[0].stack:
|
| 35 |
+
for i in range(0, len(batch), samples_per_gpu):
|
| 36 |
+
assert isinstance(batch[i].data, torch.Tensor)
|
| 37 |
+
|
| 38 |
+
if batch[i].pad_dims is not None:
|
| 39 |
+
ndim = batch[i].dim()
|
| 40 |
+
assert ndim > batch[i].pad_dims
|
| 41 |
+
max_shape = [0 for _ in range(batch[i].pad_dims)]
|
| 42 |
+
for dim in range(1, batch[i].pad_dims + 1):
|
| 43 |
+
max_shape[dim - 1] = batch[i].size(-dim)
|
| 44 |
+
for sample in batch[i:i + samples_per_gpu]:
|
| 45 |
+
for dim in range(0, ndim - batch[i].pad_dims):
|
| 46 |
+
assert batch[i].size(dim) == sample.size(dim)
|
| 47 |
+
for dim in range(1, batch[i].pad_dims + 1):
|
| 48 |
+
max_shape[dim - 1] = max(max_shape[dim - 1],
|
| 49 |
+
sample.size(-dim))
|
| 50 |
+
padded_samples = []
|
| 51 |
+
for sample in batch[i:i + samples_per_gpu]:
|
| 52 |
+
pad = [0 for _ in range(batch[i].pad_dims * 2)]
|
| 53 |
+
for dim in range(1, batch[i].pad_dims + 1):
|
| 54 |
+
pad[2 * dim -
|
| 55 |
+
1] = max_shape[dim - 1] - sample.size(-dim)
|
| 56 |
+
padded_samples.append(
|
| 57 |
+
F.pad(
|
| 58 |
+
sample.data, pad, value=sample.padding_value))
|
| 59 |
+
stacked.append(default_collate(padded_samples))
|
| 60 |
+
elif batch[i].pad_dims is None:
|
| 61 |
+
stacked.append(
|
| 62 |
+
default_collate([
|
| 63 |
+
sample.data
|
| 64 |
+
for sample in batch[i:i + samples_per_gpu]
|
| 65 |
+
]))
|
| 66 |
+
else:
|
| 67 |
+
raise ValueError(
|
| 68 |
+
'pad_dims should be either None or integers (1-3)')
|
| 69 |
+
|
| 70 |
+
else:
|
| 71 |
+
for i in range(0, len(batch), samples_per_gpu):
|
| 72 |
+
stacked.append(
|
| 73 |
+
[sample.data for sample in batch[i:i + samples_per_gpu]])
|
| 74 |
+
return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
|
| 75 |
+
elif isinstance(batch[0], Sequence):
|
| 76 |
+
transposed = zip(*batch)
|
| 77 |
+
return [collate(samples, samples_per_gpu) for samples in transposed]
|
| 78 |
+
elif isinstance(batch[0], Mapping):
|
| 79 |
+
return {
|
| 80 |
+
key: collate([d[key] for d in batch], samples_per_gpu)
|
| 81 |
+
for key in batch[0]
|
| 82 |
+
}
|
| 83 |
+
else:
|
| 84 |
+
return default_collate(batch)
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/data_container.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
import functools
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def assert_tensor_type(func):
|
| 8 |
+
|
| 9 |
+
@functools.wraps(func)
|
| 10 |
+
def wrapper(*args, **kwargs):
|
| 11 |
+
if not isinstance(args[0].data, torch.Tensor):
|
| 12 |
+
raise AttributeError(
|
| 13 |
+
f'{args[0].__class__.__name__} has no attribute '
|
| 14 |
+
f'{func.__name__} for type {args[0].datatype}')
|
| 15 |
+
return func(*args, **kwargs)
|
| 16 |
+
|
| 17 |
+
return wrapper
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class DataContainer:
|
| 21 |
+
"""A container for any type of objects.
|
| 22 |
+
|
| 23 |
+
Typically tensors will be stacked in the collate function and sliced along
|
| 24 |
+
some dimension in the scatter function. This behavior has some limitations.
|
| 25 |
+
1. All tensors have to be the same size.
|
| 26 |
+
2. Types are limited (numpy array or Tensor).
|
| 27 |
+
|
| 28 |
+
We design `DataContainer` and `MMDataParallel` to overcome these
|
| 29 |
+
limitations. The behavior can be either of the following.
|
| 30 |
+
|
| 31 |
+
- copy to GPU, pad all tensors to the same size and stack them
|
| 32 |
+
- copy to GPU without stacking
|
| 33 |
+
- leave the objects as is and pass it to the model
|
| 34 |
+
- pad_dims specifies the number of last few dimensions to do padding
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self,
|
| 38 |
+
data,
|
| 39 |
+
stack=False,
|
| 40 |
+
padding_value=0,
|
| 41 |
+
cpu_only=False,
|
| 42 |
+
pad_dims=2):
|
| 43 |
+
self._data = data
|
| 44 |
+
self._cpu_only = cpu_only
|
| 45 |
+
self._stack = stack
|
| 46 |
+
self._padding_value = padding_value
|
| 47 |
+
assert pad_dims in [None, 1, 2, 3]
|
| 48 |
+
self._pad_dims = pad_dims
|
| 49 |
+
|
| 50 |
+
def __repr__(self):
|
| 51 |
+
return f'{self.__class__.__name__}({repr(self.data)})'
|
| 52 |
+
|
| 53 |
+
def __len__(self):
|
| 54 |
+
return len(self._data)
|
| 55 |
+
|
| 56 |
+
@property
|
| 57 |
+
def data(self):
|
| 58 |
+
return self._data
|
| 59 |
+
|
| 60 |
+
@property
|
| 61 |
+
def datatype(self):
|
| 62 |
+
if isinstance(self.data, torch.Tensor):
|
| 63 |
+
return self.data.type()
|
| 64 |
+
else:
|
| 65 |
+
return type(self.data)
|
| 66 |
+
|
| 67 |
+
@property
|
| 68 |
+
def cpu_only(self):
|
| 69 |
+
return self._cpu_only
|
| 70 |
+
|
| 71 |
+
@property
|
| 72 |
+
def stack(self):
|
| 73 |
+
return self._stack
|
| 74 |
+
|
| 75 |
+
@property
|
| 76 |
+
def padding_value(self):
|
| 77 |
+
return self._padding_value
|
| 78 |
+
|
| 79 |
+
@property
|
| 80 |
+
def pad_dims(self):
|
| 81 |
+
return self._pad_dims
|
| 82 |
+
|
| 83 |
+
@assert_tensor_type
|
| 84 |
+
def size(self, *args, **kwargs):
|
| 85 |
+
return self.data.size(*args, **kwargs)
|
| 86 |
+
|
| 87 |
+
@assert_tensor_type
|
| 88 |
+
def dim(self):
|
| 89 |
+
return self.data.dim()
|
microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/data_parallel.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
| 2 |
+
from itertools import chain
|
| 3 |
+
|
| 4 |
+
from torch.nn.parallel import DataParallel
|
| 5 |
+
|
| 6 |
+
from .scatter_gather import scatter_kwargs
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class MMDataParallel(DataParallel):
|
| 10 |
+
"""The DataParallel module that supports DataContainer.
|
| 11 |
+
|
| 12 |
+
MMDataParallel has two main differences with PyTorch DataParallel:
|
| 13 |
+
|
| 14 |
+
- It supports a custom type :class:`DataContainer` which allows more
|
| 15 |
+
flexible control of input data during both GPU and CPU inference.
|
| 16 |
+
- It implement two more APIs ``train_step()`` and ``val_step()``.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
module (:class:`nn.Module`): Module to be encapsulated.
|
| 20 |
+
device_ids (list[int]): Device IDS of modules to be scattered to.
|
| 21 |
+
Defaults to None when GPU is not available.
|
| 22 |
+
output_device (str | int): Device ID for output. Defaults to None.
|
| 23 |
+
dim (int): Dimension used to scatter the data. Defaults to 0.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, *args, dim=0, **kwargs):
|
| 27 |
+
super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs)
|
| 28 |
+
self.dim = dim
|
| 29 |
+
|
| 30 |
+
def forward(self, *inputs, **kwargs):
|
| 31 |
+
"""Override the original forward function.
|
| 32 |
+
|
| 33 |
+
The main difference lies in the CPU inference where the data in
|
| 34 |
+
:class:`DataContainers` will still be gathered.
|
| 35 |
+
"""
|
| 36 |
+
if not self.device_ids:
|
| 37 |
+
# We add the following line thus the module could gather and
|
| 38 |
+
# convert data containers as those in GPU inference
|
| 39 |
+
inputs, kwargs = self.scatter(inputs, kwargs, [-1])
|
| 40 |
+
return self.module(*inputs[0], **kwargs[0])
|
| 41 |
+
else:
|
| 42 |
+
return super().forward(*inputs, **kwargs)
|
| 43 |
+
|
| 44 |
+
def scatter(self, inputs, kwargs, device_ids):
|
| 45 |
+
return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
|
| 46 |
+
|
| 47 |
+
def train_step(self, *inputs, **kwargs):
|
| 48 |
+
if not self.device_ids:
|
| 49 |
+
# We add the following line thus the module could gather and
|
| 50 |
+
# convert data containers as those in GPU inference
|
| 51 |
+
inputs, kwargs = self.scatter(inputs, kwargs, [-1])
|
| 52 |
+
return self.module.train_step(*inputs[0], **kwargs[0])
|
| 53 |
+
|
| 54 |
+
assert len(self.device_ids) == 1, \
|
| 55 |
+
('MMDataParallel only supports single GPU training, if you need to'
|
| 56 |
+
' train with multiple GPUs, please use MMDistributedDataParallel'
|
| 57 |
+
'instead.')
|
| 58 |
+
|
| 59 |
+
for t in chain(self.module.parameters(), self.module.buffers()):
|
| 60 |
+
if t.device != self.src_device_obj:
|
| 61 |
+
raise RuntimeError(
|
| 62 |
+
'module must have its parameters and buffers '
|
| 63 |
+
f'on device {self.src_device_obj} (device_ids[0]) but '
|
| 64 |
+
f'found one of them on device: {t.device}')
|
| 65 |
+
|
| 66 |
+
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
|
| 67 |
+
return self.module.train_step(*inputs[0], **kwargs[0])
|
| 68 |
+
|
| 69 |
+
def val_step(self, *inputs, **kwargs):
|
| 70 |
+
if not self.device_ids:
|
| 71 |
+
# We add the following line thus the module could gather and
|
| 72 |
+
# convert data containers as those in GPU inference
|
| 73 |
+
inputs, kwargs = self.scatter(inputs, kwargs, [-1])
|
| 74 |
+
return self.module.val_step(*inputs[0], **kwargs[0])
|
| 75 |
+
|
| 76 |
+
assert len(self.device_ids) == 1, \
|
| 77 |
+
('MMDataParallel only supports single GPU training, if you need to'
|
| 78 |
+
' train with multiple GPUs, please use MMDistributedDataParallel'
|
| 79 |
+
' instead.')
|
| 80 |
+
|
| 81 |
+
for t in chain(self.module.parameters(), self.module.buffers()):
|
| 82 |
+
if t.device != self.src_device_obj:
|
| 83 |
+
raise RuntimeError(
|
| 84 |
+
'module must have its parameters and buffers '
|
| 85 |
+
f'on device {self.src_device_obj} (device_ids[0]) but '
|
| 86 |
+
f'found one of them on device: {t.device}')
|
| 87 |
+
|
| 88 |
+
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
|
| 89 |
+
return self.module.val_step(*inputs[0], **kwargs[0])
|