Spaces:

STMicroelectronics
/

stm32-modelzoo-app

Running

App Files Files Community

stm32-modelzoo-app / object_detection /tf /src /utils /anchor_boxes_utils.py

FBAGSTM

STM32 AI Experimentation Hub

747451d 22 days ago

raw

history blame contribute delete

13.1 kB

	# /*---------------------------------------------------------------------------------------------
	# * Copyright (c) 2022 STMicroelectronics.
	# * All rights reserved.
	# * This software is licensed under terms that can be found in the LICENSE file in
	# * the root directory of this software component.
	# * If no LICENSE file comes with this software, it is provided AS-IS.
	# --------------------------------------------------------------------------------------------/

	import numpy as np


	def get_sizes_ratios_ssd_v1(input_shape: tuple = None) -> tuple:
	"""
	Returns a tuple of sizes and ratios based on the input_shape.

	Args:
	input_shape (tuple): The input shape of the model in the format (height, width, channels).

	Returns:
	tuple: A tuple of sizes and ratios in the format (sizes, ratios).

	Raises:
	None

	"""
	# Define sizes and ratios based on the input_shape
	if input_shape[0] == 192:
	sizes = [[0.26, 0.33], [0.42, 0.49], [0.58, 0.66], [0.74, 0.82], [0.9, 0.98]]
	ratios = [[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
	[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
	[1.0, 2.0, 0.5, 1.0 / 3]]
	elif input_shape[0] == 224:
	sizes = [[0.1, 0.16], [0.26, 0.33], [0.42, 0.49], [0.58, 0.66], [0.74, 0.82], [0.9, 0.98]]
	ratios = [[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
	[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
	[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3]]
	else:
	sizes = [[0.1, 0.16], [0.26, 0.33], [0.42, 0.49], [0.58, 0.66], [0.74, 0.82], [0.9, 0.98]]
	ratios = [[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
	[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
	[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3]]

	# Return the sizes and ratios as a tuple
	return sizes, ratios


	def get_sizes_ratios_ssd_v2(input_shape: tuple = None) -> tuple:

	def _sizes_creation(target_max_res,base_sizes,t_min_s,res_range,slope):

	# target_max_res : must be in the res_range
	# base_sizes=[[0.26,0.33], [0.42,0.49], [0.58,0.66], [0.74,0.82], [0.9,0.98]]
	# t_min_s = [0.026,0.033]
	# slope = 2
	# res_range = [24,32,52]

	ab_x = _affine_search([base_sizes[0][0],t_min_s[0]],res_range,slope) # [a,b,c]
	ab_y = _affine_search([base_sizes[0][1],t_min_s[1]],res_range,slope) # [a,b,c]

	arr_res = np.array([1/(target_max_res**slope),1])

	target_max_s = np.array(base_sizes[-1])
	target_min_s = np.stack([ab_x,ab_y]) @ arr_res
	target_len = len(base_sizes)

	target_sizes = np.linspace(target_min_s,target_max_s,target_len)

	return target_sizes

	def _affine_search(s,r,slope):

	r = np.array(r)

	R = np.stack([1/(r**slope),np.ones_like(r)],axis=1)

	# knowing that -> R@x = s then : x = R^-1 @ s

	return np.linalg.inv(R) @ s

	fmap_sizes_dict = {'192': [24, 12, 6, 3,2],
	'224': [28, 14, 7, 4,2],
	'256': [32, 16, 8, 4,2],
	'288': [36, 18, 9, 5,3],
	'320': [40, 20, 10, 5,3],
	'352': [44, 22, 11, 6,3],
	'384': [48, 24, 12, 6,3],
	'416': [52, 26, 13, 7,4]}

	# Check that the model input shape is supported
	if str(input_shape[0]) not in fmap_sizes_dict or str(input_shape[1]) not in fmap_sizes_dict:
	supported_shapes = [(int(k), int(k), 3) for k in fmap_sizes_dict.keys()]
	raise ValueError(f"\nInput shape ({input_shape[1]}, {input_shape[0]}, 3) "
	"is not supported for `ssd_mobilenet_v2_fpnlite` models.\n"
	f"Supported shapes: {supported_shapes}\n"
	"Please check the 'training.model' section of your configuration file.")

	base_sizes = [[0.26, 0.33],
	[0.42, 0.49],
	[0.58, 0.66],
	[0.74, 0.82],
	[0.90, 0.98]]

	min_sizes_max_res = [0.06, 0.09]

	max_fmap_res = fmap_sizes_dict[str(input_shape[0])][0]

	res_range = [fmap_sizes_dict['192'][0],fmap_sizes_dict['416'][0]]

	sizes = _sizes_creation(max_fmap_res, base_sizes, min_sizes_max_res, res_range, slope = 5)

	ratios = [[1.0, 2.0, 0.5, 1.0 / 3]]*len(sizes)

	# Return the sizes and ratios as a tuple
	return sizes, ratios


	def get_fmap_sizes(model_type, input_shape):

	fmap_sizes_dict = {
	'st_ssd_mobilenet_v1': {
	'192': [24, 12, 6, 3, 1],
	'224': [32, 16, 8, 4, 2, 1],
	'256': [32, 16, 8, 4, 2, 1]
	},
	'ssd_mobilenet_v2_fpnlite': {
	'192': [24, 12, 6, 3, 2],
	'224': [28, 14, 7, 4, 2],
	'256': [32, 16, 8, 4, 2],
	'288': [36, 18, 9, 5, 3],
	'320': [40, 20, 10, 5, 3],
	'352': [44, 22, 11, 6, 3],
	'384': [48, 24, 12, 6, 3],
	'416': [52, 26, 13, 7, 4]
	}
	}

	fmap_widths = np.array(fmap_sizes_dict[model_type][str(input_shape[0])])
	fmap_heights = np.array(fmap_sizes_dict[model_type][str(input_shape[1])])

	fmap_sizes = None
	if model_type == 'st_ssd_mobilenet_v1':
	fmap_sizes = np.stack([fmap_widths, fmap_heights],axis=-1)
	elif model_type == 'ssd_mobilenet_v2_fpnlite':
	fmap_sizes = np.stack([fmap_widths, fmap_heights],axis=-1)

	return fmap_sizes


	def gen_anchors(fmap, img_width, img_height, sizes, ratios, normalize=True, clip=False):
	"""
	Generate anchor boxes for a feature map
	sizes = [s1, s2, ..., sm], ratios = [r1, r2, ..., rn], n_anchors = n + m - 1, only consider [s1, r1], [s1, r2], ..., [s1, rn], [s2, r1], ..., [sm, r1]
	Arguments:
	fmap: feature map
	img_width: image width
	img_height: image height
	sizes: [s1, s2, ..., sm]
	ratios: [r1, r2, ..., rn]
	normalize: normalize to image sizes
	Returns:
	list of anchor boxes
	"""
	_, fmap_height, fmap_width, _ = fmap.shape
	fmap_height = int(fmap_height)
	fmap_width = int(fmap_width)

	res_img = min(img_width, img_height)
	n_anchors = len(sizes) + len(ratios) - 1

	# compute the box widths and heights for all anchor boxes
	wh_list = []
	for ratio in ratios:
	box_w = res_img * sizes[0] * np.sqrt(ratio)
	box_h = res_img * sizes[0] / np.sqrt(ratio)
	wh_list.append((box_w, box_h))

	for i in range(len(sizes)):
	if i == 0:
	continue
	box_w = res_img * sizes[i] * np.sqrt(ratios[0])
	box_h = res_img * sizes[i] / np.sqrt(ratios[0])
	wh_list.append((box_w, box_h))

	wh_list = np.asarray(wh_list)

	step_height = img_height / fmap_height
	step_width = img_width / fmap_width

	offset_height = 0.5
	offset_width = 0.5

	# compute the grid of anchor box center points
	cy = np.linspace(
	offset_height * step_height,
	(offset_height + fmap_height - 1) * step_height,
	fmap_height)
	cx = np.linspace(
	offset_width * step_width,
	(offset_width + fmap_width - 1) * step_width,
	fmap_width)
	cx_grid, cy_grid = np.meshgrid(cx, cy)
	cx_grid = np.expand_dims(cx_grid, -1)
	cy_grid = np.expand_dims(cy_grid, -1)

	# anchors: (fmap_height, fmap_width, n_anchors, 4), 4 elements including
	# (cx, cy, w, h)
	anchors = np.zeros((fmap_height, fmap_width, n_anchors, 4))

	anchors[:, :, :, 0] = np.tile(cx_grid, (1, 1, n_anchors)) # set cx
	anchors[:, :, :, 1] = np.tile(cy_grid, (1, 1, n_anchors)) # set cy
	anchors[:, :, :, 2] = wh_list[:, 0] # set w
	anchors[:, :, :, 3] = wh_list[:, 1] # set h

	# convert (cx, cy, w, h) to (xmin, ymin, xmax, ymax)
	anchors1 = np.copy(anchors).astype(np.float32)
	anchors1[:, :, :, 0] = anchors[:, :, :, 0] - \
	anchors[:, :, :, 2] / 2.0 # set xmin
	anchors1[:, :, :, 1] = anchors[:, :, :, 1] - \
	anchors[:, :, :, 3] / 2.0 # set ymin
	anchors1[:, :, :, 2] = anchors[:, :, :, 0] + \
	anchors[:, :, :, 2] / 2.0 # set xmax
	anchors1[:, :, :, 3] = anchors[:, :, :, 1] + \
	anchors[:, :, :, 3] / 2.0 # set ymax

	# clip the coordinates to lie within the image boundaries
	if clip:
	x_coords = anchors1[:, :, :, [0, 2]]
	x_coords[x_coords >= img_width] = img_width - 1
	x_coords[x_coords < 0] = 0
	anchors1[:, :, :, [0, 2]] = x_coords

	y_coords = anchors1[:, :, :, [1, 3]]
	y_coords[y_coords >= img_height] = img_height - 1
	y_coords[y_coords < 0] = 0
	anchors1[:, :, :, [1, 3]] = y_coords

	if normalize:
	anchors1[:, :, :, [0, 2]] /= img_width
	anchors1[:, :, :, [1, 3]] /= img_height

	return anchors1


	def _gen_anchors_fmap(fmap_size, img_width, img_height,
	sizes, ratios, normalize=True, clip=False):
	"""
	Generate anchor boxes for a given feature map size
	Arguments:
	fmap_size: feature map size
	img_width: image width
	img_height: image height
	sizes: [s1, s2, ..., sm]
	ratios: [r1, r2, ..., rn]
	clip: clip to image boundary
	normalize: normalize to image sizes
	Returns:
	list of anchor boxes
	"""
	fmap_height, fmap_width = fmap_size
	fmap_height = int(fmap_height)
	fmap_width = int(fmap_width)

	res_img = min(img_width, img_height)
	n_anchors = len(sizes) + len(ratios) - 1

	# compute the box widths and heights for all anchor boxes
	wh_list = []
	for ratio in ratios:
	box_w = res_img * sizes[0] * np.sqrt(ratio)
	box_h = res_img * sizes[0] / np.sqrt(ratio)
	wh_list.append((box_w, box_h))

	for i in range(len(sizes)):
	if i == 0:
	continue
	box_w = res_img * sizes[i] * np.sqrt(ratios[0])
	box_h = res_img * sizes[i] / np.sqrt(ratios[0])
	wh_list.append((box_w, box_h))

	wh_list = np.asarray(wh_list)

	step_height = img_height / fmap_height
	step_width = img_width / fmap_width

	offset_height = 0.5
	offset_width = 0.5

	# compute the grid of anchor box center points
	cy = np.linspace(
	offset_height * step_height,
	(offset_height + fmap_height - 1) * step_height,
	fmap_height)
	cx = np.linspace(
	offset_width * step_width,
	(offset_width + fmap_width - 1) * step_width,
	fmap_width)
	cx_grid, cy_grid = np.meshgrid(cx, cy)
	cx_grid = np.expand_dims(cx_grid, -1)
	cy_grid = np.expand_dims(cy_grid, -1)

	# anchors: (fmap_height, fmap_width, n_anchors, 4), 4 elements including
	# (cx, cy, w, h)
	anchors = np.zeros((fmap_height, fmap_width, n_anchors, 4))

	anchors[:, :, :, 0] = np.tile(cx_grid, (1, 1, n_anchors)) # set cx
	anchors[:, :, :, 1] = np.tile(cy_grid, (1, 1, n_anchors)) # set cy
	anchors[:, :, :, 2] = wh_list[:, 0] # set w
	anchors[:, :, :, 3] = wh_list[:, 1] # set h

	# convert (cx, cy, w, h) to (xmin, ymin, xmax, ymax)
	anchors1 = np.copy(anchors).astype(np.float32)
	anchors1[:, :, :, 0] = anchors[:, :, :, 0] - \
	anchors[:, :, :, 2] / 2.0 # set xmin
	anchors1[:, :, :, 1] = anchors[:, :, :, 1] - \
	anchors[:, :, :, 3] / 2.0 # set ymin
	anchors1[:, :, :, 2] = anchors[:, :, :, 0] + \
	anchors[:, :, :, 2] / 2.0 # set xmax
	anchors1[:, :, :, 3] = anchors[:, :, :, 1] + \
	anchors[:, :, :, 3] / 2.0 # set ymax

	# clip the coordinates to lie within the image boundaries
	if clip:
	x_coords = anchors1[:, :, :, [0, 2]]
	x_coords[x_coords >= img_width] = img_width - 1
	x_coords[x_coords < 0] = 0
	anchors1[:, :, :, [0, 2]] = x_coords

	y_coords = anchors1[:, :, :, [1, 3]]
	y_coords[y_coords >= img_height] = img_height - 1
	y_coords[y_coords < 0] = 0
	anchors1[:, :, :, [1, 3]] = y_coords

	if normalize:
	anchors1[:, :, :, [0, 2]] /= img_width
	anchors1[:, :, :, [1, 3]] /= img_height

	return anchors1


	def get_anchor_boxes(fmap_sizes,
	image_size,
	sizes=None,
	ratios=None,
	normalize=True,
	clip_boxes=False):

	anchor_boxes = []
	for i in range(len(fmap_sizes)):
	bboxes_fmap = _gen_anchors_fmap(
	fmap_sizes[i],
	image_size[0],
	image_size[1],
	sizes[i],
	ratios[i],
	normalize=normalize,
	clip=clip_boxes)

	dims = bboxes_fmap.shape
	anchor_boxes += np.reshape(bboxes_fmap, (dims[0] * dims[1] * dims[2], 4)).tolist()

	return np.array(anchor_boxes, dtype=np.float32)