| | |
| | import math |
| | from typing import List, Tuple |
| | import torch |
| |
|
| | from detectron2.layers.rotated_boxes import pairwise_iou_rotated |
| |
|
| | from .boxes import Boxes |
| |
|
| |
|
| | class RotatedBoxes(Boxes): |
| | """ |
| | This structure stores a list of rotated boxes as a Nx5 torch.Tensor. |
| | It supports some common methods about boxes |
| | (`area`, `clip`, `nonempty`, etc), |
| | and also behaves like a Tensor |
| | (support indexing, `to(device)`, `.device`, and iteration over all boxes) |
| | """ |
| |
|
| | def __init__(self, tensor: torch.Tensor): |
| | """ |
| | Args: |
| | tensor (Tensor[float]): a Nx5 matrix. Each row is |
| | (x_center, y_center, width, height, angle), |
| | in which angle is represented in degrees. |
| | While there's no strict range restriction for it, |
| | the recommended principal range is between [-180, 180) degrees. |
| | |
| | Assume we have a horizontal box B = (x_center, y_center, width, height), |
| | where width is along the x-axis and height is along the y-axis. |
| | The rotated box B_rot (x_center, y_center, width, height, angle) |
| | can be seen as: |
| | |
| | 1. When angle == 0: |
| | B_rot == B |
| | 2. When angle > 0: |
| | B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW; |
| | 3. When angle < 0: |
| | B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW. |
| | |
| | Mathematically, since the right-handed coordinate system for image space |
| | is (y, x), where y is top->down and x is left->right, the 4 vertices of the |
| | rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from |
| | the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4) |
| | in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians, |
| | :math:`(y_c, x_c)` is the center of the rectangle): |
| | |
| | .. math:: |
| | |
| | yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c, |
| | |
| | xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c, |
| | |
| | which is the standard rigid-body rotation transformation. |
| | |
| | Intuitively, the angle is |
| | (1) the rotation angle from y-axis in image space |
| | to the height vector (top->down in the box's local coordinate system) |
| | of the box in CCW, and |
| | (2) the rotation angle from x-axis in image space |
| | to the width vector (left->right in the box's local coordinate system) |
| | of the box in CCW. |
| | |
| | More intuitively, consider the following horizontal box ABCD represented |
| | in (x1, y1, x2, y2): (3, 2, 7, 4), |
| | covering the [3, 7] x [2, 4] region of the continuous coordinate system |
| | which looks like this: |
| | |
| | .. code:: none |
| | |
| | O--------> x |
| | | |
| | | A---B |
| | | | | |
| | | D---C |
| | | |
| | v y |
| | |
| | Note that each capital letter represents one 0-dimensional geometric point |
| | instead of a 'square pixel' here. |
| | |
| | In the example above, using (x, y) to represent a point we have: |
| | |
| | .. math:: |
| | |
| | O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4) |
| | |
| | We name vector AB = vector DC as the width vector in box's local coordinate system, and |
| | vector AD = vector BC as the height vector in box's local coordinate system. Initially, |
| | when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis |
| | in the image space, respectively. |
| | |
| | For better illustration, we denote the center of the box as E, |
| | |
| | .. code:: none |
| | |
| | O--------> x |
| | | |
| | | A---B |
| | | | E | |
| | | D---C |
| | | |
| | v y |
| | |
| | where the center E = ((3+7)/2, (2+4)/2) = (5, 3). |
| | |
| | Also, |
| | |
| | .. math:: |
| | |
| | width = |AB| = |CD| = 7 - 3 = 4, |
| | height = |AD| = |BC| = 4 - 2 = 2. |
| | |
| | Therefore, the corresponding representation for the same shape in rotated box in |
| | (x_center, y_center, width, height, angle) format is: |
| | |
| | (5, 3, 4, 2, 0), |
| | |
| | Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees |
| | CCW (counter-clockwise) by definition. It looks like this: |
| | |
| | .. code:: none |
| | |
| | O--------> x |
| | | B-C |
| | | | | |
| | | |E| |
| | | | | |
| | | A-D |
| | v y |
| | |
| | The center E is still located at the same point (5, 3), while the vertices |
| | ABCD are rotated by 90 degrees CCW with regard to E: |
| | A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5) |
| | |
| | Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to |
| | vector AD or vector BC (the top->down height vector in box's local coordinate system), |
| | or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right |
| | width vector in box's local coordinate system). |
| | |
| | .. math:: |
| | |
| | width = |AB| = |CD| = 5 - 1 = 4, |
| | height = |AD| = |BC| = 6 - 4 = 2. |
| | |
| | Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise) |
| | by definition? It looks like this: |
| | |
| | .. code:: none |
| | |
| | O--------> x |
| | | D-A |
| | | | | |
| | | |E| |
| | | | | |
| | | C-B |
| | v y |
| | |
| | The center E is still located at the same point (5, 3), while the vertices |
| | ABCD are rotated by 90 degrees CW with regard to E: |
| | A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1) |
| | |
| | .. math:: |
| | |
| | width = |AB| = |CD| = 5 - 1 = 4, |
| | height = |AD| = |BC| = 6 - 4 = 2. |
| | |
| | This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU |
| | will be 1. However, these two will generate different RoI Pooling results and |
| | should not be treated as an identical box. |
| | |
| | On the other hand, it's easy to see that (X, Y, W, H, A) is identical to |
| | (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be |
| | identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is |
| | equivalent to rotating the same shape 90 degrees CW. |
| | |
| | We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180): |
| | |
| | .. code:: none |
| | |
| | O--------> x |
| | | |
| | | C---D |
| | | | E | |
| | | B---A |
| | | |
| | v y |
| | |
| | .. math:: |
| | |
| | A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2), |
| | |
| | width = |AB| = |CD| = 7 - 3 = 4, |
| | height = |AD| = |BC| = 4 - 2 = 2. |
| | |
| | Finally, this is a very inaccurate (heavily quantized) illustration of |
| | how (5, 3, 4, 2, 60) looks like in case anyone wonders: |
| | |
| | .. code:: none |
| | |
| | O--------> x |
| | | B\ |
| | | / C |
| | | /E / |
| | | A / |
| | | `D |
| | v y |
| | |
| | It's still a rectangle with center of (5, 3), width of 4 and height of 2, |
| | but its angle (and thus orientation) is somewhere between |
| | (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90). |
| | """ |
| | device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") |
| | tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) |
| | if tensor.numel() == 0: |
| | |
| | |
| | tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device) |
| | assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size() |
| |
|
| | self.tensor = tensor |
| |
|
| | def clone(self) -> "RotatedBoxes": |
| | """ |
| | Clone the RotatedBoxes. |
| | |
| | Returns: |
| | RotatedBoxes |
| | """ |
| | return RotatedBoxes(self.tensor.clone()) |
| |
|
| | def to(self, device: torch.device): |
| | |
| | return RotatedBoxes(self.tensor.to(device=device)) |
| |
|
| | def area(self) -> torch.Tensor: |
| | """ |
| | Computes the area of all the boxes. |
| | |
| | Returns: |
| | torch.Tensor: a vector with areas of each box. |
| | """ |
| | box = self.tensor |
| | area = box[:, 2] * box[:, 3] |
| | return area |
| |
|
| | |
| | def normalize_angles(self) -> None: |
| | """ |
| | Restrict angles to the range of [-180, 180) degrees |
| | """ |
| | angle_tensor = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0 |
| | self.tensor = torch.cat((self.tensor[:, :4], angle_tensor[:, None]), dim=1) |
| |
|
| | def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None: |
| | """ |
| | Clip (in place) the boxes by limiting x coordinates to the range [0, width] |
| | and y coordinates to the range [0, height]. |
| | |
| | For RRPN: |
| | Only clip boxes that are almost horizontal with a tolerance of |
| | clip_angle_threshold to maintain backward compatibility. |
| | |
| | Rotated boxes beyond this threshold are not clipped for two reasons: |
| | |
| | 1. There are potentially multiple ways to clip a rotated box to make it |
| | fit within the image. |
| | 2. It's tricky to make the entire rectangular box fit within the image |
| | and still be able to not leave out pixels of interest. |
| | |
| | Therefore we rely on ops like RoIAlignRotated to safely handle this. |
| | |
| | Args: |
| | box_size (height, width): The clipping box's size. |
| | clip_angle_threshold: |
| | Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees), |
| | we do the clipping as horizontal boxes. |
| | """ |
| | h, w = box_size |
| |
|
| | |
| | self.normalize_angles() |
| |
|
| | idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0] |
| |
|
| | |
| | x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0 |
| | y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0 |
| | x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0 |
| | y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0 |
| |
|
| | |
| | x1.clamp_(min=0, max=w) |
| | y1.clamp_(min=0, max=h) |
| | x2.clamp_(min=0, max=w) |
| | y2.clamp_(min=0, max=h) |
| |
|
| | |
| | self.tensor[idx, 0] = (x1 + x2) / 2.0 |
| | self.tensor[idx, 1] = (y1 + y2) / 2.0 |
| | |
| | self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1) |
| | self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1) |
| |
|
| | def nonempty(self, threshold: float = 0.0) -> torch.Tensor: |
| | """ |
| | Find boxes that are non-empty. |
| | A box is considered empty, if either of its side is no larger than threshold. |
| | |
| | Returns: |
| | Tensor: a binary vector which represents |
| | whether each box is empty (False) or non-empty (True). |
| | """ |
| | box = self.tensor |
| | widths = box[:, 2] |
| | heights = box[:, 3] |
| | keep = (widths > threshold) & (heights > threshold) |
| | return keep |
| |
|
| | def __getitem__(self, item) -> "RotatedBoxes": |
| | """ |
| | Returns: |
| | RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing. |
| | |
| | The following usage are allowed: |
| | |
| | 1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box. |
| | 2. `new_boxes = boxes[2:10]`: return a slice of boxes. |
| | 3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor |
| | with `length = len(boxes)`. Nonzero elements in the vector will be selected. |
| | |
| | Note that the returned RotatedBoxes might share storage with this RotatedBoxes, |
| | subject to Pytorch's indexing semantics. |
| | """ |
| | if isinstance(item, int): |
| | return RotatedBoxes(self.tensor[item].view(1, -1)) |
| | b = self.tensor[item] |
| | assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format( |
| | item |
| | ) |
| | return RotatedBoxes(b) |
| |
|
| | def __len__(self) -> int: |
| | return self.tensor.shape[0] |
| |
|
| | def __repr__(self) -> str: |
| | return "RotatedBoxes(" + str(self.tensor) + ")" |
| |
|
| | def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: |
| | """ |
| | Args: |
| | box_size (height, width): Size of the reference box covering |
| | [0, width] x [0, height] |
| | boundary_threshold (int): Boxes that extend beyond the reference box |
| | boundary by more than boundary_threshold are considered "outside". |
| | |
| | For RRPN, it might not be necessary to call this function since it's common |
| | for rotated box to extend to outside of the image boundaries |
| | (the clip function only clips the near-horizontal boxes) |
| | |
| | Returns: |
| | a binary vector, indicating whether each box is inside the reference box. |
| | """ |
| | height, width = box_size |
| |
|
| | cnt_x = self.tensor[..., 0] |
| | cnt_y = self.tensor[..., 1] |
| | half_w = self.tensor[..., 2] / 2.0 |
| | half_h = self.tensor[..., 3] / 2.0 |
| | a = self.tensor[..., 4] |
| | c = torch.abs(torch.cos(a * math.pi / 180.0)) |
| | s = torch.abs(torch.sin(a * math.pi / 180.0)) |
| | |
| | max_rect_dx = c * half_w + s * half_h |
| | max_rect_dy = c * half_h + s * half_w |
| |
|
| | inds_inside = ( |
| | (cnt_x - max_rect_dx >= -boundary_threshold) |
| | & (cnt_y - max_rect_dy >= -boundary_threshold) |
| | & (cnt_x + max_rect_dx < width + boundary_threshold) |
| | & (cnt_y + max_rect_dy < height + boundary_threshold) |
| | ) |
| |
|
| | return inds_inside |
| |
|
| | def get_centers(self) -> torch.Tensor: |
| | """ |
| | Returns: |
| | The box centers in a Nx2 array of (x, y). |
| | """ |
| | return self.tensor[:, :2] |
| |
|
| | def scale(self, scale_x: float, scale_y: float) -> None: |
| | """ |
| | Scale the rotated box with horizontal and vertical scaling factors |
| | Note: when scale_factor_x != scale_factor_y, |
| | the rotated box does not preserve the rectangular shape when the angle |
| | is not a multiple of 90 degrees under resize transformation. |
| | Instead, the shape is a parallelogram (that has skew) |
| | Here we make an approximation by fitting a rotated rectangle to the parallelogram. |
| | """ |
| | self.tensor[:, 0] *= scale_x |
| | self.tensor[:, 1] *= scale_y |
| | theta = self.tensor[:, 4] * math.pi / 180.0 |
| | c = torch.cos(theta) |
| | s = torch.sin(theta) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi |
| |
|
| | @classmethod |
| | def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes": |
| | """ |
| | Concatenates a list of RotatedBoxes into a single RotatedBoxes |
| | |
| | Arguments: |
| | boxes_list (list[RotatedBoxes]) |
| | |
| | Returns: |
| | RotatedBoxes: the concatenated RotatedBoxes |
| | """ |
| | assert isinstance(boxes_list, (list, tuple)) |
| | if len(boxes_list) == 0: |
| | return cls(torch.empty(0)) |
| | assert all([isinstance(box, RotatedBoxes) for box in boxes_list]) |
| |
|
| | |
| | cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) |
| | return cat_boxes |
| |
|
| | @property |
| | def device(self) -> torch.device: |
| | return self.tensor.device |
| |
|
| | @torch.jit.unused |
| | def __iter__(self): |
| | """ |
| | Yield a box as a Tensor of shape (5,) at a time. |
| | """ |
| | yield from self.tensor |
| |
|
| |
|
| | def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None: |
| | """ |
| | Given two lists of rotated boxes of size N and M, |
| | compute the IoU (intersection over union) |
| | between **all** N x M pairs of boxes. |
| | The box order must be (x_center, y_center, width, height, angle). |
| | |
| | Args: |
| | boxes1, boxes2 (RotatedBoxes): |
| | two `RotatedBoxes`. Contains N & M rotated boxes, respectively. |
| | |
| | Returns: |
| | Tensor: IoU, sized [N,M]. |
| | """ |
| |
|
| | return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor) |
| |
|