Spaces:

STMicroelectronics
/

stm32-modelzoo-app

Running

File size: 17,560 Bytes

747451d

# /*---------------------------------------------------------------------------------------------
#  * Copyright (c) 2022-2023 STMicroelectronics.
#  * All rights reserved.
#  *
#  * This software is licensed under terms that can be found in the LICENSE file in
#  * the root directory of this software component.
#  * If no LICENSE file comes with this software, it is provided AS-IS.
#  *--------------------------------------------------------------------------------------------*/

import tensorflow as tf

def check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name=None):
  """

  Function checking fill mode and value and interpolation method for a given augmentation function.

  Raise an error if parameter value is not allowed

  

    Args:

        fill_mode (str): fill mode method in tensorflow keras ("wrap", "nearest"...)

        interpolation (str): interpolation method. Support "nearest" and "bilinear"

        fill_value (float): pixel value in fill mode

        function_name (str): augmentation function name

    

    Returns:   

  """
  if fill_mode not in ("reflect", "wrap", "constant", "nearest"):
    raise ValueError(
        f"Argument `fill_mode` of function `{function_name}`: supported values are 'reflect', "
        f"'wrap', 'constant' and 'nearest'. Received {fill_mode}")
        
  if interpolation not in ("nearest", "bilinear"):
    raise ValueError(
         f"Argument `interpolation` of function `{function_name}`: supported values "
         f"are 'nearest' and 'bilinear'. Received {interpolation}")
       
  if type(fill_value) not in (int, float) or fill_value < -1.:
    raise ValueError(
         f"Argument `fill_value` of function `{function_name}`: expecting float values "
         f"greater than or equal to -1. Received {fill_value}")

def generate_coordinates(tensor_shape):
    """

    Create a list of indices for each dimension of the tensor

    

        Args:

            tensor_shape (tuple): tuple of 4 elements for all dimensions including batch

        Returns:

            a tf.Tensor with the generated coordinates

    """
    indices = [tf.range(tensor_shape[0]),tf.range(tensor_shape[1]),tf.range(tensor_shape[2]),tf.range(tensor_shape[3])]

    # Use tf.meshgrid to generate the grid of coordinates
    coordinates = tf.stack(tf.meshgrid(indices[0],indices[1],indices[2],indices[3],indexing='ij'), axis=-1) # INT32
    coordinates = tf.reshape(coordinates,[-1,tensor_shape[1]*tensor_shape[2]*tensor_shape[3],4]) # (batch, width*height*channel, 2) INT32
    coordinates = tf.cast(coordinates,tf.float32)

    return coordinates # shape: (batch, width*height*channel, 4) FLOAT32

def image_projective_transform(images, output_shape, fill_value, transforms, fill_mode, interpolation):
    """

    This function is here because tf.raw_ops.ImageProjectiveTransformV3() is not compatible with XLA_GPU compilation while this function works on GPU.

    Definition :

        If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the output point (x, y) to a transformed input point

        (x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k), where k = c0 x + c1 y + 1.

        If the transformed point lays outside of the input image, the output pixel is set to fill_value.

     

    The function returns the transformed image.



        Args:

            images (tf.Tensor): batch of input images

            output_shape (tuple): shape of the output. Not used so far

            fill_value (float): filled pixel value

            transforms (np.array): transformation matrix to be applied on image

            fill_mode: method for filling when image is augmented ("wrap", "reflect"...)

            interpolation: interpolation method such as "nearest" or "bilinear"

        Returns: 

            images after transformation (tf.Tensor)

    """

    # Definition of a0, a1, a2, b0, b1, b2, c0 and c1 variables, shape: (batch, ) FLOAT32
    (a0, a1, a2, b0, b1, b2, c0, c1) = (transforms[:,0][...,None],
                                        transforms[:,1][...,None],
                                        transforms[:,2][...,None],
                                        transforms[:,3][...,None],
                                        transforms[:,4][...,None],
                                        transforms[:,5][...,None],
                                        transforms[:,6][...,None],
                                        transforms[:,7][...,None])

    # Get the shape of the input batch of images
    im_shape = tf.shape(images) # shape: (4,) INT32

    # Creation of the Tensor containing the coordinates of each pixel in the batch of images
    init_coordinates = generate_coordinates(im_shape) # shape: (batch, width*height*channel, 4) FLOAT32

    b = init_coordinates[:,:,0] # shape: (batch, width*height*channel) FLOAT32
    x = init_coordinates[:,:,1] # shape: (batch, width*height*channel) FLOAT32
    y = init_coordinates[:,:,2] # shape: (batch, width*height*channel) FLOAT32
    c = init_coordinates[:,:,3] # shape: (batch, width*height*channel) FLOAT32

    k = c1*x + c0*y + 1         # shape: (batch, width*height*channel) FLOAT32

    (x_prime, y_prime) = ((b1 * x + b0 * y + b2) / k, (a1 * x + a0 * y + a2) / k) # tuple of shape: (batch, width*height*channel) FLOAT32

    if fill_mode=='reflect'.upper() or fill_mode=='wrap'.upper():
        
        x_prime = tf.math.floormod(x_prime,tf.cast(im_shape[1]-1,tf.float32)) # shape: (batch, width*height*channel) FLOAT32
        y_prime = tf.math.floormod(y_prime,tf.cast(im_shape[2]-1,tf.float32)) # shape: (batch, width*height*channel) FLOAT32

    trans_coordinates = tf.stack([b,x_prime,y_prime,c],axis=-1) # shape: (batch, width*height*channel, 4) FLOAT32
    trans_coordinates = tf.cast(trans_coordinates,tf.int32)     # shape: (batch, width*height*channel, 4) INT32
    trans_coordinates = tf.reshape(trans_coordinates,[-1,4])    # shape: (batch*width*height*channel, 4) INT32

    ll_x = trans_coordinates[:,1]>=0               # shape: (batch*width*height*channel) BOOL
    ul_x = trans_coordinates[:,1]<=(im_shape[1]-1) # shape: (batch*width*height*channel) BOOL

    ll_y = trans_coordinates[:,2]>=0               # shape: (batch*width*height*channel) BOOL
    ul_y = trans_coordinates[:,2]<=(im_shape[2]-1) # shape: (batch*width*height*channel) BOOL

    xbmask = tf.logical_and(ll_x,ul_x)     # shape: (batch*width*height*channel) BOOL
    ybmask = tf.logical_and(ll_y,ul_y)     # shape: (batch*width*height*channel) BOOL
    bmask  = tf.logical_and(xbmask,ybmask) # shape: (batch*width*height*channel) BOOL

    # Create a mask for the out of bound coordinates fill the final images with fill_values
    mask = tf.cast(bmask,dtype=trans_coordinates.dtype) # shape: (batch*width*height*channel) INT32

    trans_coordinates *= mask[...,None]                 # shape: (batch*width*height*channel, 4) INT32

    mask = tf.cast(bmask,dtype=images.dtype)            # shape: (batch*width*height*channel) IMAGES_DTYPE

    mask = tf.reshape(mask,im_shape)                    # shape: (batch, width, height, channel) IMAGES_DTYPE

    fill_mask = (1-mask)*tf.cast(fill_value,dtype=images.dtype) # shape: (batch, width, height, channel) IMAGES_DTYPE
    
    # Gather pixels that are located in the original Tensor with the help of the transformed coordinates to form the new Tensor
    transformed_image = tf.gather_nd(images,trans_coordinates) # shape: (batch*width*height*channel) FLOAT32

    transformed_image = tf.reshape(transformed_image,im_shape) # shape: (batch, width, height, channel) FLOAT32

    transformed_image = transformed_image*mask + fill_mask     # shape: (batch, width, height, channel) FLOAT32
   
    return transformed_image
         

def transform_images(

            images,

            transforms,

            fill_mode='reflect',

            fill_value=0.0,

            interpolation='bilinear'):
    """

    The function returns the transformed images.



        Args:

            images (tf.Tensor): batch of input images

            transforms (np.array): transformation matrix to be applied on image

            fill_mode: method for filling when image is augmented ("wrap", "reflect"...)

            fill_value (float): filled pixel value

            interpolation: interpolation method such as "nearest" or "bilinear"

        Returns: 

            images after transformation (tf.Tensor)

    """

    output_shape = tf.shape(images)[1:3]

    return image_projective_transform(
            images=images,
            output_shape=output_shape,
            fill_value=fill_value,
            transforms=transforms,
            fill_mode=fill_mode.upper(),
            interpolation=interpolation.upper())

######### Legacy Code #########
# return tf.raw_ops.ImageProjectiveTransformV3(
#         images=images,
#         output_shape=output_shape,
#         fill_value=fill_value,
#         transforms=transforms,
#         fill_mode=fill_mode.upper(),
#         interpolation=interpolation.upper())


def get_flip_matrix(batch_size, width, height, mode):

    """

    This function creates a batch of flipping matrices

    

        Args:

            batch_size (int): size of input batch of images

            width (float): normalized image width

            height (float): normailzed image height

            mode (str): flipping direction, "horizontal", "vertical" or by default both

        Returns:

            batch of flipping matrices (tf.Tensor)

    """

    if mode == "horizontal":
        # Flip all the images horizontally
        matrix = tf.tile([-1, 0, (width-1), 0, 1, 0, 0, 0], [batch_size])
        matrix = tf.reshape(matrix, [batch_size, 8])
    elif mode == "vertical":
        # Flip all the images vertically
        matrix = tf.tile([1, 0, 0, 0, -1, (height-1), 0, 0], [batch_size])
        matrix = tf.reshape(matrix, [batch_size, 8])
    else:
        # Randomly flip images horizontally, vertically or both
        flips = [[-1, 0, (width-1),  0,  1,          0, 0, 0],
                 [ 1, 0,         0,  0, -1, (height-1), 0, 0],
                 [-1, 0, (width-1),  0, -1, (height-1), 0, 0]]
        select = tf.random.uniform([batch_size], minval=0, maxval=3, dtype=tf.int32)
        matrix = tf.gather(flips, select)

    return tf.cast(matrix, tf.float32)


def get_translation_matrix(translations):
    """

    This function creates a batch of translation matrices given 

    a batch of x and y translation fractions.

    Translation fractions are independent from each other 

    and may be different from one batch item to another.

    

    The translation matrix is:

    [[ 1,   0,  -x_translation],

     [ 0,   1,  -y_translation],

     [ 0,   1,   0            ]]

     

    The function returns the following representation of the matrix:

         [ 1, 0, -x_translation, 0, 1, -y_translation, 0, 1]

    with entry [2, 2] being implicit and equal to 1.



        Args: 

            translations (tuple): normalized translation values

        Returns:

            tf.Tensor with translation matrix    

    """
    
    num_translations = tf.shape(translations)[0]
    matrix = tf.concat([
                tf.ones((num_translations, 1), tf.float32),
                tf.zeros((num_translations, 1), tf.float32),
                -translations[:, 0, None],
                tf.zeros((num_translations, 1), tf.float32),
                tf.ones((num_translations, 1), tf.float32),
                -translations[:, 1, None],
                tf.zeros((num_translations, 2), tf.float32),
                ],
                axis=1)
    return matrix


def get_rotation_matrix(angles, width, height):
    """

    This function creates a batch of rotation matrices given a batch of angles.

    Angles are independent from each other and may be different from

    one batch item to another.

    

    The rotation matrix is:

        [ cos(angle)  -sin(angle), x_offset]

        [ sin(angle),  cos(angle), y_offset]

        [ 0,           0,          1       ]

    x_offset and y_offset are calculated from the angles and image dimensions.



    The function returns the following representation of the matrix:

         [ cos(angle), -sin(angle), x_offset, sin(angle), cos(angle), 0, 0 ]

    with entry [2, 2] being implicit and equal to 1.



        Args:

            angles (list(float)): batch of angles fow which we compute a rotation matrix

            width (float): normalized width of input images

            height (float): normalized height of input images

        Returns:

            (tf.Tensor), rotation matrices

    """

    width = tf.cast(width, tf.float32)
    height = tf.cast(height, tf.float32)
    
    num_angles = tf.shape(angles)[0]
    x_offset = ((width - 1) - (tf.cos(angles) * (width - 1) - tf.sin(angles) * (height - 1))) / 2.0
    y_offset = ((height - 1) - (tf.sin(angles) * (width - 1) + tf.cos(angles) * (height - 1))) / 2.0
    
    matrix = tf.concat([
                tf.cos(angles)[:, None],
                -tf.sin(angles)[:, None],
                x_offset[:, None],
                tf.sin(angles)[:, None],
                tf.cos(angles)[:, None],
                y_offset[:, None],
                tf.zeros((num_angles, 2), tf.float32)
                ],
                axis=1)

    return matrix


def get_shear_matrix(angles, axis):
    """

    This function creates a batch of shearing matrices given a batch 

    of angles. Angles are independent from each other and may be different

    from one batch item to another.

    

    The shear matrix along the x axis only is:

        [ 1  -sin(angle), 0 ]

        [ 0,  1,          0 ]

        [ 0,  0,          1 ]

    

    The shear matrix along the y axis only is:

        [ 1,           0, 0 ]

        [ cos(angle),  1, 0 ]

        [ 0,           0, 1 ]

    The shear matrix along both x and y axis is:

        [ 1  -sin(angle),  0 ]

        [ 0,  cos(angle),  0 ]

        [ 0,  0,           1 ]



    The function returns the following representation of the 

    shear matrix along both x and y axis:

         [ 1, -sin(angle), 0, 0, cos(angle), 0, 0, 0 ]

    with entry [2, 2] being implicit and equal to 1.

    Representations are similar for x axis only and y axis only.



        Args:

            angles (list(float)): batch of angles for which we compute a shear matrix 

            axis (str): axis on which we shear ("x" or "y", by default both)

        Returns:

            (tf.Tensor): shear matrices

    """
    
    num_angles = tf.shape(angles)[0]
    x_offset = tf.zeros(num_angles)
    y_offset = tf.zeros(num_angles)

    if axis == 'x':
        matrix = tf.concat([
                    tf.ones((num_angles, 1), tf.float32),
                    -tf.sin(angles)[:, None],
                    x_offset[:, None],
                    tf.zeros((num_angles, 1), tf.float32),
                    tf.ones((num_angles, 1), tf.float32),
                    y_offset[:, None],
                    tf.zeros((num_angles, 2), tf.float32)
                ],
                axis=1)    
    elif axis == 'y':
        matrix = tf.concat([
                    tf.ones((num_angles, 1), tf.float32),
                    tf.zeros((num_angles, 1), tf.float32),
                    x_offset[:, None],
                    tf.cos(angles)[:, None],
                    tf.ones((num_angles, 1), tf.float32),
                    y_offset[:, None],
                    tf.zeros((num_angles, 2), tf.float32)
                ],
                axis=1)    
    else:
        matrix = tf.concat([
                    tf.ones((num_angles, 1), tf.float32),
                    -tf.sin(angles)[:, None],
                    x_offset[:, None],
                    tf.zeros((num_angles, 1), tf.float32),
                    tf.cos(angles)[:, None],
                    y_offset[:, None],
                    tf.zeros((num_angles, 2), tf.float32)
                ],
                axis=1)    
                  
    return matrix


def get_zoom_matrix(zooms, width, height):
    """

    This function creates a batch of zooming matrices.

    Arguments width and height are the image dimensions.



    The zoom matrix is:

    [[ zoom   0,      x_offset],

     [ 0,     zoom,   y_offset],

     [ 0,     1,      0       ]]



        Args:

            zooms (list(float)): batch of zoom values

            width (float): normalized width of input images

            height (float): normalized height of input images

        Returns:

            (tf.Tensor): batch of zoom matrices

    """
    
    width = tf.cast(width, tf.float32)
    height = tf.cast(height, tf.float32)

    num_zooms = tf.shape(zooms)[0]
    x_offset = ((width - 1.) / 2.0) * (1.0 - zooms[:, 0, None])
    y_offset = ((height - 1.) / 2.0) * (1.0 - zooms[:, 1, None])
    
    matrix = tf.concat([
                zooms[:, 0, None],
                tf.zeros((num_zooms, 1), tf.float32),
                x_offset,
                tf.zeros((num_zooms, 1), tf.float32),
                zooms[:, 1, None],
                y_offset,
                tf.zeros((num_zooms, 2), tf.float32),
                ],
                axis=-1)
    
    return matrix