| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """Point feature utils for Mask2former.""" |
| |
|
| | import torch |
| | from torch.nn import functional as F |
| |
|
| |
|
| | def point_sample(inputs, point_coords, **kwargs): |
| | """ |
| | A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors. |
| | Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside |
| | [0, 1] x [0, 1] square. |
| | |
| | Args: |
| | inputs (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid. |
| | point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains |
| | [0, 1] x [0, 1] normalized point coordinates. |
| | |
| | Returns: |
| | output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains |
| | features for points in `point_coords`. The features are obtained via bilinear |
| | interplation from `inputs` the same way as :function:`torch.nn.functional.grid_sample`. |
| | """ |
| | add_dim = False |
| | if point_coords.dim() == 3: |
| | add_dim = True |
| | point_coords = point_coords.unsqueeze(2) |
| | output = F.grid_sample(inputs, 2.0 * point_coords - 1.0, **kwargs) |
| | if add_dim: |
| | output = output.squeeze(3) |
| | return output |
| |
|
| |
|
| | def get_uncertain_point_coords_with_randomness( |
| | coarse_logits, uncertainty_func, num_points, oversample_ratio, importance_sample_ratio |
| | ): |
| | """ |
| | Sample points in [0, 1] x [0, 1] coordinate space based on their uncertainty. The unceratinties |
| | are calculated for each point using 'uncertainty_func' function that takes point's logit |
| | prediction as input. |
| | See PointRend paper for details. |
| | |
| | Args: |
| | coarse_logits (Tensor): A tensor of shape (N, C, Hmask, Wmask) or (N, 1, Hmask, Wmask) for |
| | class-specific or class-agnostic prediction. |
| | uncertainty_func: A function that takes a Tensor of shape (N, C, P) or (N, 1, P) that |
| | contains logit predictions for P points and returns their uncertainties as a Tensor of |
| | shape (N, 1, P). |
| | num_points (int): The number of points P to sample. |
| | oversample_ratio (int): Oversampling parameter. |
| | importance_sample_ratio (float): Ratio of points that are sampled via importnace sampling. |
| | |
| | Returns: |
| | point_coords (Tensor): A tensor of shape (N, P, 2) that contains the coordinates of P |
| | sampled points. |
| | """ |
| | assert oversample_ratio >= 1 |
| | assert 0 <= importance_sample_ratio <= 1 |
| | num_boxes = coarse_logits.shape[0] |
| | num_sampled = int(num_points * oversample_ratio) |
| | point_coords = torch.rand(num_boxes, num_sampled, 2, device=coarse_logits.device) |
| | point_logits = point_sample(coarse_logits, point_coords, align_corners=False) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | point_uncertainties = uncertainty_func(point_logits) |
| | num_uncertain_points = int(importance_sample_ratio * num_points) |
| | num_random_points = num_points - num_uncertain_points |
| | idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] |
| | shift = num_sampled * torch.arange(num_boxes, dtype=torch.long, device=coarse_logits.device) |
| | idx += shift[:, None] |
| | point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( |
| | num_boxes, num_uncertain_points, 2 |
| | ) |
| | if num_random_points > 0: |
| | point_coords = torch.cat( |
| | [ |
| | point_coords, |
| | torch.rand(num_boxes, num_random_points, 2, device=coarse_logits.device), |
| | ], |
| | dim=1, |
| | ) |
| | return point_coords |
| |
|
| |
|
| | def get_uncertain_point_coords_on_grid(uncertainty_map, num_points): |
| | """ |
| | Find `num_points` most uncertain points from `uncertainty_map` grid. |
| | |
| | Args: |
| | uncertainty_map (Tensor): A tensor of shape (N, 1, H, W) that contains uncertainty |
| | values for a set of points on a regular H x W grid. |
| | num_points (int): The number of points P to select. |
| | |
| | Returns: |
| | point_indices (Tensor): A tensor of shape (N, P) that contains indices from |
| | [0, H x W) of the most uncertain points. |
| | point_coords (Tensor): A tensor of shape (N, P, 2) that contains [0, 1] x [0, 1] normalized |
| | coordinates of the most uncertain points from the H x W grid. |
| | """ |
| | R, _, H, W = uncertainty_map.shape |
| | h_step = 1.0 / float(H) |
| | w_step = 1.0 / float(W) |
| |
|
| | num_points = min(H * W, num_points) |
| | point_indices = torch.topk(uncertainty_map.view(R, H * W), k=num_points, dim=1)[1] |
| | point_coords = torch.zeros(R, num_points, 2, dtype=torch.float, device=uncertainty_map.device) |
| | point_coords[:, :, 0] = w_step / 2.0 + (point_indices % W).to(torch.float) * w_step |
| | point_coords[:, :, 1] = h_step / 2.0 + (point_indices // W).to(torch.float) * h_step |
| | return point_indices, point_coords |
| |
|