Spaces:

Mjolnir65
/

FasterRCNN

Sleeping

App Files Files Community

FasterRCNN / detection /anchor_utils.py

Mjolnir65

uploaded the weights

0e51835 verified 10 months ago

raw

history blame contribute delete

11.9 kB

	import math
	from typing import List, Optional

	import torch
	from torch import nn, Tensor

	from .image_list import ImageList


	class AnchorGenerator(nn.Module):
	"""
	Module that generates anchors for a set of feature maps and
	image sizes.

	The module support computing anchors at multiple sizes and aspect ratios
	per feature map. This module assumes aspect ratio = height / width for
	each anchor.

	sizes and aspect_ratios should have the same number of elements, and it should
	correspond to the number of feature maps.

	sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
	and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
	per spatial location for feature map i.

	Args:
	sizes (Tuple[Tuple[int]]):
	aspect_ratios (Tuple[Tuple[float]]):
	"""

	__annotations__ = {
	"cell_anchors": List[torch.Tensor],
	}

	def __init__(
	self,
	sizes=((128, 256, 512),),
	aspect_ratios=((0.5, 1.0, 2.0),),
	):
	super().__init__()

	if not isinstance(sizes[0], (list, tuple)):
	# TODO change this
	sizes = tuple((s,) for s in sizes)
	if not isinstance(aspect_ratios[0], (list, tuple)):
	aspect_ratios = (aspect_ratios,) * len(sizes)

	self.sizes = sizes
	self.aspect_ratios = aspect_ratios
	self.cell_anchors = [
	self.generate_anchors(size, aspect_ratio) for size, aspect_ratio in zip(sizes, aspect_ratios)
	]

	# TODO: https://github.com/pytorch/pytorch/issues/26792
	# For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values.
	# (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios)
	# This method assumes aspect ratio = height / width for an anchor.
	def generate_anchors(
	self,
	scales: List[int],
	aspect_ratios: List[float],
	dtype: torch.dtype = torch.float32,
	device: torch.device = torch.device("cpu"),
	) -> Tensor:
	scales = torch.as_tensor(scales, dtype=dtype, device=device)
	aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
	h_ratios = torch.sqrt(aspect_ratios)
	w_ratios = 1 / h_ratios

	ws = (w_ratios[:, None] * scales[None, :]).view(-1)
	hs = (h_ratios[:, None] * scales[None, :]).view(-1)

	base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2
	return base_anchors.round()

	def set_cell_anchors(self, dtype: torch.dtype, device: torch.device):
	self.cell_anchors = [cell_anchor.to(dtype=dtype, device=device) for cell_anchor in self.cell_anchors]

	def num_anchors_per_location(self) -> List[int]:
	return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]

	# For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
	# output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
	def grid_anchors(self, grid_sizes: List[List[int]], strides: List[List[Tensor]]) -> List[Tensor]:
	anchors = []
	cell_anchors = self.cell_anchors
	torch._assert(cell_anchors is not None, "cell_anchors should not be None")
	torch._assert(
	len(grid_sizes) == len(strides) == len(cell_anchors),
	"Anchors should be Tuple[Tuple[int]] because each feature "
	"map could potentially have different sizes and aspect ratios. "
	"There needs to be a match between the number of "
	"feature maps passed and the number of sizes / aspect ratios specified.",
	)

	for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
	grid_height, grid_width = size
	stride_height, stride_width = stride
	device = base_anchors.device

	# For output anchor, compute [x_center, y_center, x_center, y_center]
	shifts_x = torch.arange(0, grid_width, dtype=torch.int32, device=device) * stride_width
	shifts_y = torch.arange(0, grid_height, dtype=torch.int32, device=device) * stride_height
	shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij")
	shift_x = shift_x.reshape(-1)
	shift_y = shift_y.reshape(-1)
	shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)

	# For every (base anchor, output anchor) pair,
	# offset each zero-centered base anchor by the center of the output anchor.
	anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))

	return anchors

	def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
	grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
	image_size = image_list.tensors.shape[-2:]
	dtype, device = feature_maps[0].dtype, feature_maps[0].device
	strides = [
	[
	torch.empty((), dtype=torch.int64, device=device).fill_(image_size[0] // g[0]),
	torch.empty((), dtype=torch.int64, device=device).fill_(image_size[1] // g[1]),
	]
	for g in grid_sizes
	]
	self.set_cell_anchors(dtype, device)
	anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
	anchors: List[List[torch.Tensor]] = []
	for _ in range(len(image_list.image_sizes)):
	anchors_in_image = [anchors_per_feature_map for anchors_per_feature_map in anchors_over_all_feature_maps]
	anchors.append(anchors_in_image)
	anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
	return anchors


	class DefaultBoxGenerator(nn.Module):
	"""
	This module generates the default boxes of SSD for a set of feature maps and image sizes.

	Args:
	aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map.
	min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation
	of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
	max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation
	of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
	scales (List[float]], optional): The scales of the default boxes. If not provided it will be estimated using
	the ``min_ratio`` and ``max_ratio`` parameters.
	steps (List[int]], optional): It's a hyper-parameter that affects the tiling of default boxes. If not provided
	it will be estimated from the data.
	clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping
	is applied while the boxes are encoded in format ``(cx, cy, w, h)``.
	"""

	def __init__(
	self,
	aspect_ratios: List[List[int]],
	min_ratio: float = 0.15,
	max_ratio: float = 0.9,
	scales: Optional[List[float]] = None,
	steps: Optional[List[int]] = None,
	clip: bool = True,
	):
	super().__init__()
	if steps is not None and len(aspect_ratios) != len(steps):
	raise ValueError("aspect_ratios and steps should have the same length")
	self.aspect_ratios = aspect_ratios
	self.steps = steps
	self.clip = clip
	num_outputs = len(aspect_ratios)

	# Estimation of default boxes scales
	if scales is None:
	if num_outputs > 1:
	range_ratio = max_ratio - min_ratio
	self.scales = [min_ratio + range_ratio * k / (num_outputs - 1.0) for k in range(num_outputs)]
	self.scales.append(1.0)
	else:
	self.scales = [min_ratio, max_ratio]
	else:
	self.scales = scales

	self._wh_pairs = self._generate_wh_pairs(num_outputs)

	def _generate_wh_pairs(
	self, num_outputs: int, dtype: torch.dtype = torch.float32, device: torch.device = torch.device("cpu")
	) -> List[Tensor]:
	_wh_pairs: List[Tensor] = []
	for k in range(num_outputs):
	# Adding the 2 default width-height pairs for aspect ratio 1 and scale s'k
	s_k = self.scales[k]
	s_prime_k = math.sqrt(self.scales[k] * self.scales[k + 1])
	wh_pairs = [[s_k, s_k], [s_prime_k, s_prime_k]]

	# Adding 2 pairs for each aspect ratio of the feature map k
	for ar in self.aspect_ratios[k]:
	sq_ar = math.sqrt(ar)
	w = self.scales[k] * sq_ar
	h = self.scales[k] / sq_ar
	wh_pairs.extend([[w, h], [h, w]])

	_wh_pairs.append(torch.as_tensor(wh_pairs, dtype=dtype, device=device))
	return _wh_pairs

	def num_anchors_per_location(self) -> List[int]:
	# Estimate num of anchors based on aspect ratios: 2 default boxes + 2 * ratios of feaure map.
	return [2 + 2 * len(r) for r in self.aspect_ratios]

	# Default Boxes calculation based on page 6 of SSD paper
	def _grid_default_boxes(
	self, grid_sizes: List[List[int]], image_size: List[int], dtype: torch.dtype = torch.float32
	) -> Tensor:
	default_boxes = []
	for k, f_k in enumerate(grid_sizes):
	# Now add the default boxes for each width-height pair
	if self.steps is not None:
	x_f_k = image_size[1] / self.steps[k]
	y_f_k = image_size[0] / self.steps[k]
	else:
	y_f_k, x_f_k = f_k

	shifts_x = ((torch.arange(0, f_k[1]) + 0.5) / x_f_k).to(dtype=dtype)
	shifts_y = ((torch.arange(0, f_k[0]) + 0.5) / y_f_k).to(dtype=dtype)
	shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij")
	shift_x = shift_x.reshape(-1)
	shift_y = shift_y.reshape(-1)

	shifts = torch.stack((shift_x, shift_y) * len(self._wh_pairs[k]), dim=-1).reshape(-1, 2)
	# Clipping the default boxes while the boxes are encoded in format (cx, cy, w, h)
	_wh_pair = self._wh_pairs[k].clamp(min=0, max=1) if self.clip else self._wh_pairs[k]
	wh_pairs = _wh_pair.repeat((f_k[0] * f_k[1]), 1)

	default_box = torch.cat((shifts, wh_pairs), dim=1)

	default_boxes.append(default_box)

	return torch.cat(default_boxes, dim=0)

	def __repr__(self) -> str:
	s = (
	f"{self.__class__.__name__}("
	f"aspect_ratios={self.aspect_ratios}"
	f", clip={self.clip}"
	f", scales={self.scales}"
	f", steps={self.steps}"
	")"
	)
	return s

	def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
	grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
	image_size = image_list.tensors.shape[-2:]
	dtype, device = feature_maps[0].dtype, feature_maps[0].device
	default_boxes = self._grid_default_boxes(grid_sizes, image_size, dtype=dtype)
	default_boxes = default_boxes.to(device)

	dboxes = []
	x_y_size = torch.tensor([image_size[1], image_size[0]], device=default_boxes.device)
	for _ in image_list.image_sizes:
	dboxes_in_image = default_boxes
	dboxes_in_image = torch.cat(
	[
	(dboxes_in_image[:, :2] - 0.5 * dboxes_in_image[:, 2:]) * x_y_size,
	(dboxes_in_image[:, :2] + 0.5 * dboxes_in_image[:, 2:]) * x_y_size,
	],
	-1,
	)
	dboxes.append(dboxes_in_image)
	return dboxes