Spaces:

STMicroelectronics
/

stm32-modelzoo-app

Running

File size: 13,133 Bytes

747451d

# /*---------------------------------------------------------------------------------------------
#  * Copyright (c) 2022 STMicroelectronics.
#  * All rights reserved.
#  * This software is licensed under terms that can be found in the LICENSE file in
#  * the root directory of this software component.
#  * If no LICENSE file comes with this software, it is provided AS-IS.
#  *--------------------------------------------------------------------------------------------*/

import numpy as np


def get_sizes_ratios_ssd_v1(input_shape: tuple = None) -> tuple:
    """
    Returns a tuple of sizes and ratios based on the input_shape.

    Args:
        input_shape (tuple): The input shape of the model in the format (height, width, channels).

    Returns:
        tuple: A tuple of sizes and ratios in the format (sizes, ratios).

    Raises:
        None

    """
    # Define sizes and ratios based on the input_shape
    if input_shape[0] == 192:
        sizes = [[0.26, 0.33], [0.42, 0.49], [0.58, 0.66], [0.74, 0.82], [0.9, 0.98]]
        ratios = [[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
                  [1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
                  [1.0, 2.0, 0.5, 1.0 / 3]]
    elif input_shape[0] == 224:
        sizes = [[0.1, 0.16], [0.26, 0.33], [0.42, 0.49], [0.58, 0.66], [0.74, 0.82], [0.9, 0.98]]
        ratios = [[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
                  [1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
                  [1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3]]
    else:
        sizes = [[0.1, 0.16], [0.26, 0.33], [0.42, 0.49], [0.58, 0.66], [0.74, 0.82], [0.9, 0.98]]
        ratios = [[1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
                  [1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3],
                  [1.0, 2.0, 0.5, 1.0 / 3], [1.0, 2.0, 0.5, 1.0 / 3]]
        
    # Return the sizes and ratios as a tuple
    return sizes, ratios


def get_sizes_ratios_ssd_v2(input_shape: tuple = None) -> tuple:

    def _sizes_creation(target_max_res,base_sizes,t_min_s,res_range,slope):

        # target_max_res : must be in the res_range
        # base_sizes=[[0.26,0.33], [0.42,0.49], [0.58,0.66], [0.74,0.82], [0.9,0.98]]
        # t_min_s = [0.026,0.033]
        # slope = 2
        # res_range = [24,32,52]

        ab_x = _affine_search([base_sizes[0][0],t_min_s[0]],res_range,slope) # [a,b,c]
        ab_y = _affine_search([base_sizes[0][1],t_min_s[1]],res_range,slope) # [a,b,c]

        arr_res = np.array([1/(target_max_res**slope),1])

        target_max_s = np.array(base_sizes[-1])
        target_min_s = np.stack([ab_x,ab_y]) @ arr_res
        target_len   = len(base_sizes)

        target_sizes = np.linspace(target_min_s,target_max_s,target_len)

        return target_sizes
    
    def _affine_search(s,r,slope):

        r = np.array(r)

        R = np.stack([1/(r**slope),np.ones_like(r)],axis=1)

        # knowing that -> R@x = s then : x = R^-1 @ s

        return np.linalg.inv(R) @ s
    
    fmap_sizes_dict = {'192': [24, 12, 6, 3,2],
                       '224': [28, 14, 7, 4,2],
                       '256': [32, 16, 8, 4,2],
                       '288': [36, 18, 9, 5,3],
                       '320': [40, 20, 10, 5,3],
                       '352': [44, 22, 11, 6,3],
                       '384': [48, 24, 12, 6,3],
                       '416': [52, 26, 13, 7,4]}

    # Check that the model input shape is supported
    if str(input_shape[0]) not in fmap_sizes_dict or str(input_shape[1]) not in fmap_sizes_dict:
        supported_shapes = [(int(k), int(k), 3) for k in fmap_sizes_dict.keys()]        
        raise ValueError(f"\nInput shape ({input_shape[1]}, {input_shape[0]}, 3) "
                         "is not supported for `ssd_mobilenet_v2_fpnlite` models.\n"
                         f"Supported shapes: {supported_shapes}\n"
                         "Please check the 'training.model' section of your configuration file.")
        
    base_sizes = [[0.26, 0.33],
                  [0.42, 0.49],
                  [0.58, 0.66],
                  [0.74, 0.82],
                  [0.90, 0.98]]

    min_sizes_max_res = [0.06, 0.09]

    max_fmap_res = fmap_sizes_dict[str(input_shape[0])][0]
    
    res_range = [fmap_sizes_dict['192'][0],fmap_sizes_dict['416'][0]]
    
    sizes  = _sizes_creation(max_fmap_res, base_sizes, min_sizes_max_res, res_range, slope = 5)

    ratios = [[1.0, 2.0, 0.5, 1.0 / 3]]*len(sizes)

    # Return the sizes and ratios as a tuple
    return sizes, ratios


def get_fmap_sizes(model_type, input_shape):
    
    fmap_sizes_dict = {
            'st_ssd_mobilenet_v1': {
                    '192': [24, 12, 6, 3, 1],
                    '224': [32, 16, 8, 4, 2, 1],
                    '256': [32, 16, 8, 4, 2, 1]
            },
            'ssd_mobilenet_v2_fpnlite': {
                    '192': [24, 12, 6, 3, 2],
                    '224': [28, 14, 7, 4, 2],
                    '256': [32, 16, 8, 4, 2],
                    '288': [36, 18, 9, 5, 3],
                    '320': [40, 20, 10, 5, 3],
                    '352': [44, 22, 11, 6, 3],
                    '384': [48, 24, 12, 6, 3],
                    '416': [52, 26, 13, 7, 4]
            }
    }
  
    fmap_widths  = np.array(fmap_sizes_dict[model_type][str(input_shape[0])])
    fmap_heights = np.array(fmap_sizes_dict[model_type][str(input_shape[1])])

    fmap_sizes = None
    if model_type == 'st_ssd_mobilenet_v1':
        fmap_sizes = np.stack([fmap_widths, fmap_heights],axis=-1)
    elif model_type == 'ssd_mobilenet_v2_fpnlite':
        fmap_sizes = np.stack([fmap_widths, fmap_heights],axis=-1)
    
    return fmap_sizes


def gen_anchors(fmap, img_width, img_height, sizes, ratios, normalize=True, clip=False):
    """
    Generate anchor boxes for a feature map
    sizes = [s1, s2, ..., sm], ratios = [r1, r2, ..., rn], n_anchors = n + m - 1, only consider [s1, r1], [s1, r2], ..., [s1, rn], [s2, r1], ..., [sm, r1]
    Arguments:
        fmap: feature map
        img_width: image width
        img_height: image height
        sizes: [s1, s2, ..., sm]
        ratios: [r1, r2, ..., rn]
        normalize: normalize to image sizes
    Returns:
        list of anchor boxes
    """
    _, fmap_height, fmap_width, _ = fmap.shape
    fmap_height = int(fmap_height)
    fmap_width = int(fmap_width)

    res_img = min(img_width, img_height)
    n_anchors = len(sizes) + len(ratios) - 1

    # compute the box widths and heights for all anchor boxes
    wh_list = []
    for ratio in ratios:
        box_w = res_img * sizes[0] * np.sqrt(ratio)
        box_h = res_img * sizes[0] / np.sqrt(ratio)
        wh_list.append((box_w, box_h))

    for i in range(len(sizes)):
        if i == 0:
            continue
        box_w = res_img * sizes[i] * np.sqrt(ratios[0])
        box_h = res_img * sizes[i] / np.sqrt(ratios[0])
        wh_list.append((box_w, box_h))

    wh_list = np.asarray(wh_list)

    step_height = img_height / fmap_height
    step_width = img_width / fmap_width

    offset_height = 0.5
    offset_width = 0.5

    # compute the grid of anchor box center points
    cy = np.linspace(
        offset_height * step_height,
        (offset_height + fmap_height - 1) * step_height,
        fmap_height)
    cx = np.linspace(
        offset_width * step_width,
        (offset_width + fmap_width - 1) * step_width,
        fmap_width)
    cx_grid, cy_grid = np.meshgrid(cx, cy)
    cx_grid = np.expand_dims(cx_grid, -1)
    cy_grid = np.expand_dims(cy_grid, -1)

    # anchors: (fmap_height, fmap_width, n_anchors, 4), 4 elements including
    # (cx, cy, w, h)
    anchors = np.zeros((fmap_height, fmap_width, n_anchors, 4))

    anchors[:, :, :, 0] = np.tile(cx_grid, (1, 1, n_anchors))  # set cx
    anchors[:, :, :, 1] = np.tile(cy_grid, (1, 1, n_anchors))  # set cy
    anchors[:, :, :, 2] = wh_list[:, 0]  # set w
    anchors[:, :, :, 3] = wh_list[:, 1]  # set h

    # convert (cx, cy, w, h) to (xmin, ymin, xmax, ymax)
    anchors1 = np.copy(anchors).astype(np.float32)
    anchors1[:, :, :, 0] = anchors[:, :, :, 0] - \
                           anchors[:, :, :, 2] / 2.0  # set xmin
    anchors1[:, :, :, 1] = anchors[:, :, :, 1] - \
                           anchors[:, :, :, 3] / 2.0  # set ymin
    anchors1[:, :, :, 2] = anchors[:, :, :, 0] + \
                           anchors[:, :, :, 2] / 2.0  # set xmax
    anchors1[:, :, :, 3] = anchors[:, :, :, 1] + \
                           anchors[:, :, :, 3] / 2.0  # set ymax

    # clip the coordinates to lie within the image boundaries
    if clip:
        x_coords = anchors1[:, :, :, [0, 2]]
        x_coords[x_coords >= img_width] = img_width - 1
        x_coords[x_coords < 0] = 0
        anchors1[:, :, :, [0, 2]] = x_coords

        y_coords = anchors1[:, :, :, [1, 3]]
        y_coords[y_coords >= img_height] = img_height - 1
        y_coords[y_coords < 0] = 0
        anchors1[:, :, :, [1, 3]] = y_coords

    if normalize:
        anchors1[:, :, :, [0, 2]] /= img_width
        anchors1[:, :, :, [1, 3]] /= img_height

    return anchors1


def _gen_anchors_fmap(fmap_size, img_width, img_height,
                     sizes, ratios, normalize=True, clip=False):
    """
    Generate anchor boxes for a given feature map size
    Arguments:
        fmap_size: feature map size
        img_width: image width
        img_height: image height
        sizes: [s1, s2, ..., sm]
        ratios: [r1, r2, ..., rn]
        clip: clip to image boundary
        normalize: normalize to image sizes
    Returns:
        list of anchor boxes
    """
    fmap_height, fmap_width = fmap_size
    fmap_height = int(fmap_height)
    fmap_width = int(fmap_width)

    res_img = min(img_width, img_height)
    n_anchors = len(sizes) + len(ratios) - 1

    # compute the box widths and heights for all anchor boxes
    wh_list = []
    for ratio in ratios:
        box_w = res_img * sizes[0] * np.sqrt(ratio)
        box_h = res_img * sizes[0] / np.sqrt(ratio)
        wh_list.append((box_w, box_h))

    for i in range(len(sizes)):
        if i == 0:
            continue
        box_w = res_img * sizes[i] * np.sqrt(ratios[0])
        box_h = res_img * sizes[i] / np.sqrt(ratios[0])
        wh_list.append((box_w, box_h))

    wh_list = np.asarray(wh_list)

    step_height = img_height / fmap_height
    step_width = img_width / fmap_width

    offset_height = 0.5
    offset_width = 0.5

    # compute the grid of anchor box center points
    cy = np.linspace(
        offset_height * step_height,
        (offset_height + fmap_height - 1) * step_height,
        fmap_height)
    cx = np.linspace(
        offset_width * step_width,
        (offset_width + fmap_width - 1) * step_width,
        fmap_width)
    cx_grid, cy_grid = np.meshgrid(cx, cy)
    cx_grid = np.expand_dims(cx_grid, -1)
    cy_grid = np.expand_dims(cy_grid, -1)

    # anchors: (fmap_height, fmap_width, n_anchors, 4), 4 elements including
    # (cx, cy, w, h)
    anchors = np.zeros((fmap_height, fmap_width, n_anchors, 4))

    anchors[:, :, :, 0] = np.tile(cx_grid, (1, 1, n_anchors))  # set cx
    anchors[:, :, :, 1] = np.tile(cy_grid, (1, 1, n_anchors))  # set cy
    anchors[:, :, :, 2] = wh_list[:, 0]  # set w
    anchors[:, :, :, 3] = wh_list[:, 1]  # set h

    # convert (cx, cy, w, h) to (xmin, ymin, xmax, ymax)
    anchors1 = np.copy(anchors).astype(np.float32)
    anchors1[:, :, :, 0] = anchors[:, :, :, 0] - \
                           anchors[:, :, :, 2] / 2.0  # set xmin
    anchors1[:, :, :, 1] = anchors[:, :, :, 1] - \
                           anchors[:, :, :, 3] / 2.0  # set ymin
    anchors1[:, :, :, 2] = anchors[:, :, :, 0] + \
                           anchors[:, :, :, 2] / 2.0  # set xmax
    anchors1[:, :, :, 3] = anchors[:, :, :, 1] + \
                           anchors[:, :, :, 3] / 2.0  # set ymax

    # clip the coordinates to lie within the image boundaries
    if clip:
        x_coords = anchors1[:, :, :, [0, 2]]
        x_coords[x_coords >= img_width] = img_width - 1
        x_coords[x_coords < 0] = 0
        anchors1[:, :, :, [0, 2]] = x_coords

        y_coords = anchors1[:, :, :, [1, 3]]
        y_coords[y_coords >= img_height] = img_height - 1
        y_coords[y_coords < 0] = 0
        anchors1[:, :, :, [1, 3]] = y_coords

    if normalize:
        anchors1[:, :, :, [0, 2]] /= img_width
        anchors1[:, :, :, [1, 3]] /= img_height

    return anchors1


def get_anchor_boxes(fmap_sizes,
                     image_size,
                     sizes=None,
                     ratios=None,
                     normalize=True,
                     clip_boxes=False):

    anchor_boxes = []
    for i in range(len(fmap_sizes)):
        bboxes_fmap = _gen_anchors_fmap(
            fmap_sizes[i],
            image_size[0],
            image_size[1],
            sizes[i],
            ratios[i],
            normalize=normalize,
            clip=clip_boxes)
        
        dims = bboxes_fmap.shape
        anchor_boxes += np.reshape(bboxes_fmap, (dims[0] * dims[1] * dims[2], 4)).tolist()

    return np.array(anchor_boxes, dtype=np.float32)