|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| import tensorflow as tf
|
|
|
| def check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name=None):
|
| """
|
| Function checking fill mode and value and interpolation method for a given augmentation function.
|
| Raise an error if parameter value is not allowed
|
|
|
| Args:
|
| fill_mode (str): fill mode method in tensorflow keras ("wrap", "nearest"...)
|
| interpolation (str): interpolation method. Support "nearest" and "bilinear"
|
| fill_value (float): pixel value in fill mode
|
| function_name (str): augmentation function name
|
|
|
| Returns:
|
| """
|
| if fill_mode not in ("reflect", "wrap", "constant", "nearest"):
|
| raise ValueError(
|
| f"Argument `fill_mode` of function `{function_name}`: supported values are 'reflect', "
|
| f"'wrap', 'constant' and 'nearest'. Received {fill_mode}")
|
|
|
| if interpolation not in ("nearest", "bilinear"):
|
| raise ValueError(
|
| f"Argument `interpolation` of function `{function_name}`: supported values "
|
| f"are 'nearest' and 'bilinear'. Received {interpolation}")
|
|
|
| if type(fill_value) not in (int, float) or fill_value < -1.:
|
| raise ValueError(
|
| f"Argument `fill_value` of function `{function_name}`: expecting float values "
|
| f"greater than or equal to -1. Received {fill_value}")
|
|
|
| def generate_coordinates(tensor_shape):
|
| """
|
| Create a list of indices for each dimension of the tensor
|
|
|
| Args:
|
| tensor_shape (tuple): tuple of 4 elements for all dimensions including batch
|
| Returns:
|
| a tf.Tensor with the generated coordinates
|
| """
|
| indices = [tf.range(tensor_shape[0]),tf.range(tensor_shape[1]),tf.range(tensor_shape[2]),tf.range(tensor_shape[3])]
|
|
|
|
|
| coordinates = tf.stack(tf.meshgrid(indices[0],indices[1],indices[2],indices[3],indexing='ij'), axis=-1)
|
| coordinates = tf.reshape(coordinates,[-1,tensor_shape[1]*tensor_shape[2]*tensor_shape[3],4])
|
| coordinates = tf.cast(coordinates,tf.float32)
|
|
|
| return coordinates
|
|
|
| def image_projective_transform(images, output_shape, fill_value, transforms, fill_mode, interpolation):
|
| """
|
| This function is here because tf.raw_ops.ImageProjectiveTransformV3() is not compatible with XLA_GPU compilation while this function works on GPU.
|
| Definition :
|
| If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the output point (x, y) to a transformed input point
|
| (x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k), where k = c0 x + c1 y + 1.
|
| If the transformed point lays outside of the input image, the output pixel is set to fill_value.
|
|
|
| The function returns the transformed image.
|
|
|
| Args:
|
| images (tf.Tensor): batch of input images
|
| output_shape (tuple): shape of the output. Not used so far
|
| fill_value (float): filled pixel value
|
| transforms (np.array): transformation matrix to be applied on image
|
| fill_mode: method for filling when image is augmented ("wrap", "reflect"...)
|
| interpolation: interpolation method such as "nearest" or "bilinear"
|
| Returns:
|
| images after transformation (tf.Tensor)
|
| """
|
|
|
|
|
| (a0, a1, a2, b0, b1, b2, c0, c1) = (transforms[:,0][...,None],
|
| transforms[:,1][...,None],
|
| transforms[:,2][...,None],
|
| transforms[:,3][...,None],
|
| transforms[:,4][...,None],
|
| transforms[:,5][...,None],
|
| transforms[:,6][...,None],
|
| transforms[:,7][...,None])
|
|
|
|
|
| im_shape = tf.shape(images)
|
|
|
|
|
| init_coordinates = generate_coordinates(im_shape)
|
|
|
| b = init_coordinates[:,:,0]
|
| x = init_coordinates[:,:,1]
|
| y = init_coordinates[:,:,2]
|
| c = init_coordinates[:,:,3]
|
|
|
| k = c1*x + c0*y + 1
|
|
|
| (x_prime, y_prime) = ((b1 * x + b0 * y + b2) / k, (a1 * x + a0 * y + a2) / k)
|
|
|
| if fill_mode=='reflect'.upper() or fill_mode=='wrap'.upper():
|
|
|
| x_prime = tf.math.floormod(x_prime,tf.cast(im_shape[1]-1,tf.float32))
|
| y_prime = tf.math.floormod(y_prime,tf.cast(im_shape[2]-1,tf.float32))
|
|
|
| trans_coordinates = tf.stack([b,x_prime,y_prime,c],axis=-1)
|
| trans_coordinates = tf.cast(trans_coordinates,tf.int32)
|
| trans_coordinates = tf.reshape(trans_coordinates,[-1,4])
|
|
|
| ll_x = trans_coordinates[:,1]>=0
|
| ul_x = trans_coordinates[:,1]<=(im_shape[1]-1)
|
|
|
| ll_y = trans_coordinates[:,2]>=0
|
| ul_y = trans_coordinates[:,2]<=(im_shape[2]-1)
|
|
|
| xbmask = tf.logical_and(ll_x,ul_x)
|
| ybmask = tf.logical_and(ll_y,ul_y)
|
| bmask = tf.logical_and(xbmask,ybmask)
|
|
|
|
|
| mask = tf.cast(bmask,dtype=trans_coordinates.dtype)
|
|
|
| trans_coordinates *= mask[...,None]
|
|
|
| mask = tf.cast(bmask,dtype=images.dtype)
|
|
|
| mask = tf.reshape(mask,im_shape)
|
|
|
| fill_mask = (1-mask)*tf.cast(fill_value,dtype=images.dtype)
|
|
|
|
|
| transformed_image = tf.gather_nd(images,trans_coordinates)
|
|
|
| transformed_image = tf.reshape(transformed_image,im_shape)
|
|
|
| transformed_image = transformed_image*mask + fill_mask
|
|
|
| return transformed_image
|
|
|
|
|
| def transform_images(
|
| images,
|
| transforms,
|
| fill_mode='reflect',
|
| fill_value=0.0,
|
| interpolation='bilinear'):
|
| """
|
| The function returns the transformed images.
|
|
|
| Args:
|
| images (tf.Tensor): batch of input images
|
| transforms (np.array): transformation matrix to be applied on image
|
| fill_mode: method for filling when image is augmented ("wrap", "reflect"...)
|
| fill_value (float): filled pixel value
|
| interpolation: interpolation method such as "nearest" or "bilinear"
|
| Returns:
|
| images after transformation (tf.Tensor)
|
| """
|
|
|
| output_shape = tf.shape(images)[1:3]
|
|
|
| return image_projective_transform(
|
| images=images,
|
| output_shape=output_shape,
|
| fill_value=fill_value,
|
| transforms=transforms,
|
| fill_mode=fill_mode.upper(),
|
| interpolation=interpolation.upper())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def get_flip_matrix(batch_size, width, height, mode):
|
|
|
| """
|
| This function creates a batch of flipping matrices
|
|
|
| Args:
|
| batch_size (int): size of input batch of images
|
| width (float): normalized image width
|
| height (float): normailzed image height
|
| mode (str): flipping direction, "horizontal", "vertical" or by default both
|
| Returns:
|
| batch of flipping matrices (tf.Tensor)
|
| """
|
|
|
| if mode == "horizontal":
|
|
|
| matrix = tf.tile([-1, 0, (width-1), 0, 1, 0, 0, 0], [batch_size])
|
| matrix = tf.reshape(matrix, [batch_size, 8])
|
| elif mode == "vertical":
|
|
|
| matrix = tf.tile([1, 0, 0, 0, -1, (height-1), 0, 0], [batch_size])
|
| matrix = tf.reshape(matrix, [batch_size, 8])
|
| else:
|
|
|
| flips = [[-1, 0, (width-1), 0, 1, 0, 0, 0],
|
| [ 1, 0, 0, 0, -1, (height-1), 0, 0],
|
| [-1, 0, (width-1), 0, -1, (height-1), 0, 0]]
|
| select = tf.random.uniform([batch_size], minval=0, maxval=3, dtype=tf.int32)
|
| matrix = tf.gather(flips, select)
|
|
|
| return tf.cast(matrix, tf.float32)
|
|
|
|
|
| def get_translation_matrix(translations):
|
| """
|
| This function creates a batch of translation matrices given
|
| a batch of x and y translation fractions.
|
| Translation fractions are independent from each other
|
| and may be different from one batch item to another.
|
|
|
| The translation matrix is:
|
| [[ 1, 0, -x_translation],
|
| [ 0, 1, -y_translation],
|
| [ 0, 1, 0 ]]
|
|
|
| The function returns the following representation of the matrix:
|
| [ 1, 0, -x_translation, 0, 1, -y_translation, 0, 1]
|
| with entry [2, 2] being implicit and equal to 1.
|
|
|
| Args:
|
| translations (tuple): normalized translation values
|
| Returns:
|
| tf.Tensor with translation matrix
|
| """
|
|
|
| num_translations = tf.shape(translations)[0]
|
| matrix = tf.concat([
|
| tf.ones((num_translations, 1), tf.float32),
|
| tf.zeros((num_translations, 1), tf.float32),
|
| -translations[:, 0, None],
|
| tf.zeros((num_translations, 1), tf.float32),
|
| tf.ones((num_translations, 1), tf.float32),
|
| -translations[:, 1, None],
|
| tf.zeros((num_translations, 2), tf.float32),
|
| ],
|
| axis=1)
|
| return matrix
|
|
|
|
|
| def get_rotation_matrix(angles, width, height):
|
| """
|
| This function creates a batch of rotation matrices given a batch of angles.
|
| Angles are independent from each other and may be different from
|
| one batch item to another.
|
|
|
| The rotation matrix is:
|
| [ cos(angle) -sin(angle), x_offset]
|
| [ sin(angle), cos(angle), y_offset]
|
| [ 0, 0, 1 ]
|
| x_offset and y_offset are calculated from the angles and image dimensions.
|
|
|
| The function returns the following representation of the matrix:
|
| [ cos(angle), -sin(angle), x_offset, sin(angle), cos(angle), 0, 0 ]
|
| with entry [2, 2] being implicit and equal to 1.
|
|
|
| Args:
|
| angles (list(float)): batch of angles fow which we compute a rotation matrix
|
| width (float): normalized width of input images
|
| height (float): normalized height of input images
|
| Returns:
|
| (tf.Tensor), rotation matrices
|
| """
|
|
|
| width = tf.cast(width, tf.float32)
|
| height = tf.cast(height, tf.float32)
|
|
|
| num_angles = tf.shape(angles)[0]
|
| x_offset = ((width - 1) - (tf.cos(angles) * (width - 1) - tf.sin(angles) * (height - 1))) / 2.0
|
| y_offset = ((height - 1) - (tf.sin(angles) * (width - 1) + tf.cos(angles) * (height - 1))) / 2.0
|
|
|
| matrix = tf.concat([
|
| tf.cos(angles)[:, None],
|
| -tf.sin(angles)[:, None],
|
| x_offset[:, None],
|
| tf.sin(angles)[:, None],
|
| tf.cos(angles)[:, None],
|
| y_offset[:, None],
|
| tf.zeros((num_angles, 2), tf.float32)
|
| ],
|
| axis=1)
|
|
|
| return matrix
|
|
|
|
|
| def get_shear_matrix(angles, axis):
|
| """
|
| This function creates a batch of shearing matrices given a batch
|
| of angles. Angles are independent from each other and may be different
|
| from one batch item to another.
|
|
|
| The shear matrix along the x axis only is:
|
| [ 1 -sin(angle), 0 ]
|
| [ 0, 1, 0 ]
|
| [ 0, 0, 1 ]
|
|
|
| The shear matrix along the y axis only is:
|
| [ 1, 0, 0 ]
|
| [ cos(angle), 1, 0 ]
|
| [ 0, 0, 1 ]
|
| The shear matrix along both x and y axis is:
|
| [ 1 -sin(angle), 0 ]
|
| [ 0, cos(angle), 0 ]
|
| [ 0, 0, 1 ]
|
|
|
| The function returns the following representation of the
|
| shear matrix along both x and y axis:
|
| [ 1, -sin(angle), 0, 0, cos(angle), 0, 0, 0 ]
|
| with entry [2, 2] being implicit and equal to 1.
|
| Representations are similar for x axis only and y axis only.
|
|
|
| Args:
|
| angles (list(float)): batch of angles for which we compute a shear matrix
|
| axis (str): axis on which we shear ("x" or "y", by default both)
|
| Returns:
|
| (tf.Tensor): shear matrices
|
| """
|
|
|
| num_angles = tf.shape(angles)[0]
|
| x_offset = tf.zeros(num_angles)
|
| y_offset = tf.zeros(num_angles)
|
|
|
| if axis == 'x':
|
| matrix = tf.concat([
|
| tf.ones((num_angles, 1), tf.float32),
|
| -tf.sin(angles)[:, None],
|
| x_offset[:, None],
|
| tf.zeros((num_angles, 1), tf.float32),
|
| tf.ones((num_angles, 1), tf.float32),
|
| y_offset[:, None],
|
| tf.zeros((num_angles, 2), tf.float32)
|
| ],
|
| axis=1)
|
| elif axis == 'y':
|
| matrix = tf.concat([
|
| tf.ones((num_angles, 1), tf.float32),
|
| tf.zeros((num_angles, 1), tf.float32),
|
| x_offset[:, None],
|
| tf.cos(angles)[:, None],
|
| tf.ones((num_angles, 1), tf.float32),
|
| y_offset[:, None],
|
| tf.zeros((num_angles, 2), tf.float32)
|
| ],
|
| axis=1)
|
| else:
|
| matrix = tf.concat([
|
| tf.ones((num_angles, 1), tf.float32),
|
| -tf.sin(angles)[:, None],
|
| x_offset[:, None],
|
| tf.zeros((num_angles, 1), tf.float32),
|
| tf.cos(angles)[:, None],
|
| y_offset[:, None],
|
| tf.zeros((num_angles, 2), tf.float32)
|
| ],
|
| axis=1)
|
|
|
| return matrix
|
|
|
|
|
| def get_zoom_matrix(zooms, width, height):
|
| """
|
| This function creates a batch of zooming matrices.
|
| Arguments width and height are the image dimensions.
|
|
|
| The zoom matrix is:
|
| [[ zoom 0, x_offset],
|
| [ 0, zoom, y_offset],
|
| [ 0, 1, 0 ]]
|
|
|
| Args:
|
| zooms (list(float)): batch of zoom values
|
| width (float): normalized width of input images
|
| height (float): normalized height of input images
|
| Returns:
|
| (tf.Tensor): batch of zoom matrices
|
| """
|
|
|
| width = tf.cast(width, tf.float32)
|
| height = tf.cast(height, tf.float32)
|
|
|
| num_zooms = tf.shape(zooms)[0]
|
| x_offset = ((width - 1.) / 2.0) * (1.0 - zooms[:, 0, None])
|
| y_offset = ((height - 1.) / 2.0) * (1.0 - zooms[:, 1, None])
|
|
|
| matrix = tf.concat([
|
| zooms[:, 0, None],
|
| tf.zeros((num_zooms, 1), tf.float32),
|
| x_offset,
|
| tf.zeros((num_zooms, 1), tf.float32),
|
| zooms[:, 1, None],
|
| y_offset,
|
| tf.zeros((num_zooms, 2), tf.float32),
|
| ],
|
| axis=-1)
|
|
|
| return matrix
|
|
|