# /*--------------------------------------------------------------------------------------------- # * Copyright (c) 2022-2023 STMicroelectronics. # * All rights reserved. # * # * This software is licensed under terms that can be found in the LICENSE file in # * the root directory of this software component. # * If no LICENSE file comes with this software, it is provided AS-IS. # *--------------------------------------------------------------------------------------------*/ """ References: ---------- Some of the code in this package is from or was inspired by: Keras Image Preprocessing Layers The Tensorflow Authors Copyright (c) 2019 Link to the source code: https://github.com/keras-team/keras/blob/v2.12.0/keras/layers/preprocessing/image_preprocessing.py#L394-L495 """ import math import tensorflow as tf from common.data_augmentation import \ check_fill_and_interpolation, transform_images, check_dataaug_argument, \ get_flip_matrix, get_translation_matrix, get_rotation_matrix, \ get_shear_matrix, get_zoom_matrix from .objdet_random_utils import objdet_apply_change_rate def _transform_boxes(boxes, transforms, image_width, image_height, scale=1.): """ This function applies affine transformations to a batch of boxes. The transformation matrices are independent from each other and are generally different from one batch item to another. Arguments: boxes: Boxes the matrices are applied to Shape:[batch_size, num_boxes, 4] transforms: Matrices coefficients to apply to the boxes Shape:[batch_size, 8] Returns: Transformed boxes Shape:[batch_size, num_boxes, 4] """ image_width = tf.cast(image_width, tf.float32) image_height = tf.cast(image_height, tf.float32) boxes_shape = tf.shape(boxes) batch_size = boxes_shape[0] num_boxes = boxes_shape[1] # Create a mask to keep track of padding boxes coords_sum = tf.math.reduce_sum(boxes, axis=-1) padding_mask = tf.where(coords_sum > 0, 1., 0.) padding_mask = tf.repeat(padding_mask, 4) padding_mask = tf.reshape(padding_mask, [batch_size, num_boxes, 4]) # Create and invert the matrices (inversion is necessary # to align with the TF function that transforms the images) transforms = tf.concat([ transforms, tf.ones([batch_size, 1], dtype=tf.float32)], axis=-1) matrices = tf.reshape(transforms, [batch_size, 3, 3]) matrices = tf.linalg.inv(matrices) # The same transform has to be applied to all the boxes # of a batch item, so we replicate the matrices. matrices = tf.expand_dims(matrices, axis=1) matrices = tf.tile(matrices, [1, num_boxes, 1, 1]) x1 = boxes[..., 0] y1 = boxes[..., 1] x2 = boxes[..., 2] y2 = boxes[..., 3] # Reduce the size of the boxes before transforming them if scale < 1: dx = scale * (x2 - x1) dy = scale * (y2 - y1) boxes = tf.stack([x1 + dx, y1 + dx, x2 - dx, y2 - dy], axis=-1) # Stack box corner vectors to create 4x4 matrices # Then multiply by transformation matrices to get # the transformed corner vectors. corners = tf.concat([ tf.stack([x1, x2, x2, x1], axis=-1), tf.stack([y1, y1, y2, y2], axis=-1), tf.ones([batch_size, num_boxes, 4], dtype=tf.float32)], axis=-1) corners = tf.reshape(corners, [batch_size, num_boxes, 3, 4]) trd_corners = tf.linalg.matmul(matrices, corners) # Project transformed corner vectors onto x and y axis tx1 = tf.math.reduce_min(trd_corners[..., 0, :], axis=-1) tx2 = tf.math.reduce_max(trd_corners[..., 0, :], axis=-1) ty1 = tf.math.reduce_min(trd_corners[..., 1, :], axis=-1) ty2 = tf.math.reduce_max(trd_corners[..., 1, :], axis=-1) # Clip transformed coordinates tx1 = tf.math.maximum(tx1, 0) tx1 = tf.math.minimum(tx1, image_width) tx2 = tf.math.maximum(tx2, 0) tx2 = tf.math.minimum(tx2, image_width) ty1 = tf.math.maximum(ty1, 0) ty1 = tf.math.minimum(ty1, image_height) ty2 = tf.math.maximum(ty2, 0) ty2 = tf.math.minimum(ty2, image_height) trd_boxes = tf.stack([tx1, ty1, tx2, ty2], axis=-1) # Get rid of boxes that don't make sense valid_boxes = tf.math.logical_and(tx2 > tx1, ty2 > ty1) valid_boxes = tf.cast(valid_boxes, tf.float32) trd_boxes *= tf.expand_dims(valid_boxes, axis=-1) # Set to 0 the coordinates of padding boxes as transforms # may have resulted in some non-zeros coordinates. trd_boxes *= padding_mask return trd_boxes #------------------------- Random flip ------------------------- def objdet_random_flip(images, labels, mode=None, change_rate=0.5): """ This function randomly flips input images and the bounding boxes in the associated groundtruth labels. Setting `change_rate` to 0.5 usually gives good results (don't set it to 1.0, otherwise all the images will be flipped). Arguments: images: Input RGB or grayscale images Shape: [batch_size, width, height, channels] labels: Groundtruth labels associated to the images in (class, x1, y1, x2, y2) format. Bounding box coordinates must be absolute, opposite corners coordinates. Shape: [batch_size, num_labels, 5] mode: A string representing the flip axis. Either "horizontal", "vertical" or "horizontal_and_vertical". change_rate: A float in the interval [0, 1] representing the number of changed images versus the total number of input images average ratio. For example, if `change_rate` is set to 0.25, 25% of the input images will get changed on average (75% won't get changed). If it is set to 0.0, no images are changed. If it is set to 1.0, all the images are changed. Returns: The flipped images and groundtruth labels with flipped bounding boxes. """ if mode not in ("horizontal", "vertical", "horizontal_and_vertical"): raise ValueError( "Argument `mode` of function `random_flip`: supported values are 'horizontal', " "'vertical' and 'horizontal_and_vertical'. Received {}".format(mode)) images_shape = tf.shape(images) batch_size = images_shape[0] image_width = images_shape[1] image_height = images_shape[2] matrix = get_flip_matrix(batch_size, image_width, image_height, mode) boxes = labels[..., 1:] flipped_images = transform_images(images, matrix) flipped_boxes = _transform_boxes(boxes, matrix, image_width, image_height) # Apply the change rate to images and labels images_aug, boxes_aug = objdet_apply_change_rate( images, boxes, flipped_images, flipped_boxes, change_rate=change_rate) classes = tf.expand_dims(labels[..., 0], axis=-1) labels_aug = tf.concat([classes, boxes_aug], axis=-1) return images_aug, labels_aug #------------------------- Random translation ------------------------- def objdet_random_translation( images, labels, width_factor, height_factor, fill_mode='reflect', interpolation='bilinear', fill_value=0.0, change_rate=1.0): """ This function randomly translates input images and the bounding boxes in the associated groundtruth labels. Arguments: images: Input RGB or grayscale images with shape Shape: [batch_size, width, height, channels] labels: Groundtruth labels associated to the images in (class, x1, y1, x2, y2) format. Bounding box coordinates must be absolute, opposite corners coordinates. Shape: [batch_size, num_labels, 5] width_factor: A float or a tuple of 2 floats, specifies the range of values the horizontal shift factors are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). A negative factor means shifting the image left, while a positive factor means shifting the image right. For example, `width_factor`=(-0.2, 0.3) results in an output shifted left by up to 20% or shifted right by up to 30%. height_factor: A float or a tuple of 2 floats, specifies the range of values the vertical shift factors are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). A negative factor means shifting the image up, while a positive factor means shifting the image down. For example, `height_factor`=(-0.2, 0.3) results in an output shifted up by up to 20% or shifted down by up to 30%. fill_mode: Points outside the boundaries of the input are filled according to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. See Tensorflow documentation at https://tensorflow.org for more details. interpolation: A string, the interpolation method. Supported values: 'nearest', 'bilinear'. change_rate: A float in the interval [0, 1] representing the number of changed images versus the total number of input images average ratio. For example, if `change_rate` is set to 0.25, 25% of the input images will get changed on average (75% won't get changed). If it is set to 0.0, no images are changed. If it is set to 1.0, all the images are changed. Returns: The translated images and groundtruth labels with translated bounding boxes. """ check_dataaug_argument(width_factor, "width_factor", function_name="random_translation", data_type=float) if isinstance(width_factor, (tuple, list)): width_lower = width_factor[0] width_upper = width_factor[1] else: width_lower = -width_factor width_upper = width_factor check_dataaug_argument(height_factor, "height_factor", function_name="random_translation", data_type=float) if isinstance(height_factor, (tuple, list)): height_lower = height_factor[0] height_upper = height_factor[1] else: height_lower = -height_factor height_upper = height_factor check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_translation") images_shape = tf.shape(images) batch_size = images_shape[0] image_width = images_shape[1] image_height = images_shape[2] classes = labels[..., 0] boxes = labels[..., 1:] width_translate = tf.random.uniform( [batch_size, 1], minval=width_lower, maxval=width_upper, dtype=tf.float32) width_translate = width_translate * tf.cast(image_width, tf.float32) height_translate = tf.random.uniform( [batch_size, 1], minval=height_lower, maxval=height_upper, dtype=tf.float32) height_translate = height_translate * tf.cast(image_height, tf.float32) translations = tf.cast( tf.concat([width_translate, height_translate], axis=1), dtype=tf.float32) translation_matrix = get_translation_matrix(translations) translated_images = transform_images( images, translation_matrix, interpolation=interpolation, fill_mode=fill_mode, fill_value=fill_value) translated_boxes = _transform_boxes( boxes, translation_matrix, image_width, image_height) # Apply the change rate to images and labels images_aug, boxes_aug = objdet_apply_change_rate( images, boxes, translated_images, translated_boxes, change_rate=change_rate) classes = tf.expand_dims(labels[..., 0], axis=-1) labels_aug = tf.concat([classes, boxes_aug], axis=-1) return images_aug, labels_aug #------------------------- Random rotation ------------------------- def objdet_random_rotation( images, labels, factor=None, fill_mode='reflect', interpolation='bilinear', fill_value=0.0, change_rate=1.0): """ This function randomly rotates input images and the bounding boxes in the associated groundtruth labels. Arguments: images: Input RGB or grayscale images with shape Shape: [batch_size, width, height, channels] labels: Groundtruth labels associated to the images in (class, x1, y1, x2, y2) format. Bounding box coordinates must be absolute, opposite corners coordinates. Shape: [batch_size, num_labels, 5] factor: A float or a tuple of 2 floats, specifies the range of values the rotation angles are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). Rotation angles are in gradients (fractions of 2*pi). A positive angle means rotating counter clock-wise, while a negative angle means rotating clock-wise. For example, `factor`=(-0.2, 0.3) results in an output rotated by a random amount in the range [-20% * 2pi, 30% * 2pi]. fill_mode: Points outside the boundaries of the input are filled according to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. See Tensorflow documentation at https://tensorflow.org for more details. interpolation: A string, the interpolation method. Supported values: 'nearest', 'bilinear'. change_rate: A float in the interval [0, 1] representing the number of changed images versus the total number of input images average ratio. For example, if `change_rate` is set to 0.25, 25% of the input images will get changed on average (75% won't get changed). If it is set to 0.0, no images are changed. If it is set to 1.0, all the images are changed. Returns: The rotated images and groundtruth labels with rotated bounding boxes. """ check_dataaug_argument(factor, "factor", function_name="random_rotation", data_type=float) if not isinstance(factor, (tuple, list)): factor = (-factor, factor) check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_rotation") images_shape = tf.shape(images) batch_size = images_shape[0] image_width = images_shape[1] image_height = images_shape[2] min_angle = factor[0] * 2. * math.pi max_angle = factor[1] * 2. * math.pi angles = tf.random.uniform([batch_size], minval=min_angle, maxval=max_angle) classes = labels[..., 0] boxes = labels[..., 1:] rotation_matrix = get_rotation_matrix(angles, image_width, image_height) rotated_images = transform_images( images, rotation_matrix, fill_mode=fill_mode, fill_value=fill_value, interpolation=interpolation) rotated_boxes = _transform_boxes( boxes, rotation_matrix, image_width, image_height, scale=0.1) # Apply the change rate to images and labels images_aug, boxes_aug = objdet_apply_change_rate( images, boxes, rotated_images, rotated_boxes, change_rate=change_rate) classes = tf.expand_dims(labels[..., 0], axis=-1) labels_aug = tf.concat([classes, boxes_aug], axis=-1) return images_aug, labels_aug #------------------------- Random shear ------------------------- def objdet_random_shear( images, labels, factor=None, axis='xy', fill_mode='reflect', interpolation='bilinear', fill_value=0.0, change_rate=1.0): """ This function randomly shears input images. Arguments: images: Input RGB or grayscale images with shape [batch_size, width, height, channels]. factor: A float or a tuple of 2 floats, specifies the range of values the shear angles are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). Angles are in radians (fractions of 2*pi). For example, factor=(-0.349, 0.785) results in an output sheared by a random angle in the range [-20 degrees, +45 degrees]. axis: The shear axis: 'xy': shear along both axis 'x': shear along the x axis only 'y': shear along the y axis only fill_mode: Points outside the boundaries of the input are filled according to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. See Tensorflow documentation at https://tensorflow.org for more details. interpolation: A string, the interpolation method. Supported values: 'nearest', 'bilinear'. change_rate: A float in the interval [0, 1] representing the number of changed images versus the total number of input images average ratio. For example, if `change_rate` is set to 0.25, 25% of the input images will get changed on average (75% won't get changed). If it is set to 0.0, no images are changed. If it is set to 1.0, all the images are changed. Returns: The sheared images. """ if axis == 'x': function_name = "random_shear_x" elif axis == 'y': function_name = "random_shear_y" else: function_name = "random_shear" check_dataaug_argument(factor, "factor", function_name=function_name, data_type=float) if not isinstance(factor, (tuple, list)): factor = (-factor, factor) check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name=function_name) images_shape = tf.shape(images) batch_size = images_shape[0] image_width = images_shape[1] image_height = images_shape[2] min_angle = factor[0] * 2. * math.pi max_angle = factor[1] * 2. * math.pi angles = tf.random.uniform([batch_size], minval=min_angle, maxval=max_angle) classes = labels[..., 0] boxes = labels[..., 1:] shear_matrix = get_shear_matrix(angles, axis=axis) sheared_images = transform_images( images, shear_matrix, fill_mode=fill_mode, fill_value=fill_value, interpolation=interpolation) sheared_boxes = _transform_boxes( boxes, shear_matrix, image_width, image_height, scale=0.1) # Apply the change rate to images and labels images_aug, boxes_aug = objdet_apply_change_rate( images, boxes, sheared_images, sheared_boxes, change_rate=change_rate) classes = tf.expand_dims(labels[..., 0], axis=-1) labels_aug = tf.concat([classes, boxes_aug], axis=-1) return images_aug, labels_aug #------------------------- Random zoom ------------------------- def objdet_random_zoom( images, labels, width_factor=None, height_factor=None, fill_mode='reflect', interpolation='bilinear', fill_value=0.0, change_rate=1.0): """ This function randomly zooms input images and the bounding boxes in the associated groundtruth labels. If `width_factor` and `height_factor` are both set, the images are zoomed in or out on each axis independently, which may result in noticeable distortion. If you want to avoid distortion, only set `width_factor` and the mages will be zoomed by the same amount in both directions. Arguments: images: Input RGB or grayscale images with shape Shape: [batch_size, width, height, channels] labels: Groundtruth labels associated to the images in (class, x1, y1, x2, y2) format. Bounding box coordinates must be absolute, opposite corners coordinates. Shape: [batch_size, num_labels, 5] width_factor: A float or a tuple of 2 floats, specifies the range of values horizontal zoom factors are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). Factors are fractions of the width of the image. A positive factor means zooming out, while a negative factor means zooming in. For example, width_factor=(0.2, 0.3) results in an output zoomed out by a random amount in the range [+20%, +30%]. width_factor=(-0.3, -0.2) results in an output zoomed in by a random amount in the range [+20%, +30%]. height_factor: A float or a tuple of 2 floats, specifies the range of values vertical zoom factors are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). Factors are fractions of the height of the image. A positive value means zooming out, while a negative value means zooming in. For example, height_factor=(0.2, 0.3) results in an output zoomed out between 20% to 30%. height_factor=(-0.3, -0.2) results in an output zoomed in between 20% to 30%. If `height_factor` is not set, it defaults to None. In this case, images images will be zoomed by the same amounts in both directions and no image distortion will occur. fill_mode: Points outside the boundaries of the input are filled according to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. See Tensorflow documentation at https://tensorflow.org for more details. interpolation: A string, the interpolation method. Supported values: 'nearest', 'bilinear'. change_rate: A float in the interval [0, 1] representing the number of changed images versus the total number of input images average ratio. For example, if `change_rate` is set to 0.25, 25% of the input images will get changed on average (75% won't get changed). If it is set to 0.0, no images are changed. If it is set to 1.0, all the images are changed. Returns: The zoomed images and groundtruth labels with zoomed bounding boxes. """ check_dataaug_argument(width_factor, "width_factor", function_name="random_zoom", data_type=float) if isinstance(width_factor, (tuple, list)): width_lower = width_factor[0] width_upper = width_factor[1] else: width_lower = -width_factor width_upper = width_factor if height_factor is not None: check_dataaug_argument(height_factor, "height_factor", function_name="random_zoom", data_type=float) if isinstance(height_factor, (tuple, list)): height_lower = height_factor[0] height_upper = height_factor[1] else: height_lower = -height_factor height_upper = height_factor if abs(height_lower) > 1.0 or abs(height_upper) > 1.0: raise ValueError( "Argument `height_factor` of function `random_zoom`: expecting float " "values in the interval [-1.0, 1.0]. Received: {}".format(height_factor)) else: height_lower = width_lower height_upper = width_upper check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_zoom") images_shape = tf.shape(images) batch_size = images_shape[0] image_width = images_shape[1] image_height = images_shape[2] classes = labels[..., 0] boxes = labels[..., 1:] height_zoom = tf.random.uniform( [batch_size, 1], minval=1. + height_lower, maxval=1. + height_upper, dtype=tf.float32) width_zoom = tf.random.uniform( [batch_size, 1], minval=1. + width_lower, maxval=1. + width_upper, dtype=tf.float32) zooms = tf.cast(tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32) zoom_matrix = get_zoom_matrix(zooms, image_width, image_height) zoomed_images = transform_images( images, zoom_matrix, fill_mode=fill_mode, fill_value=fill_value, interpolation=interpolation) zoomed_boxes = _transform_boxes( boxes, zoom_matrix, image_width, image_height) # Apply the change rate to images and labels images_aug, boxes_aug = objdet_apply_change_rate( images, boxes, zoomed_images, zoomed_boxes, change_rate=change_rate) classes = tf.expand_dims(labels[..., 0], axis=-1) labels_aug = tf.concat([classes, boxes_aug], axis=-1) return images_aug, labels_aug #--------------------------------- Random bounded crop --------------------- def objdet_random_bounded_crop( images, labels, width_factor=None, height_factor=None, crop_center_x=None, crop_center_y=None, fill_mode='reflect', interpolation='bilinear', fill_value=0.0, change_rate=1.0): """ This function randomly crops or dezoom on each axis of the input images. If `width_factor` and `height_factor` are both set, the images are zoomed in or out on each axis independently, which may result in noticeable distortion. If you want to avoid distortion, only set `width_factor` and the mages will be zoomed by the same amount in both directions. Arguments: images: Input RGB or grayscale images with shape [batch_size, width, height, channels]. labels: Groundtruth labels associated to the images in (class, x1, y1, x2, y2) format. Bounding box coordinates must be absolute, opposite corners coordinates. Shape: [batch_size, num_labels, 5] width_factor: A float or a tuple of 2 floats, specifies the range of values horizontal zoom factors are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). Factors are fractions of the width of the image. A positive factor means zooming out, while a negative factor means zooming in. For example, width_factor=(0.2, 0.3) results in an output zoomed out by a random amount in the range [+20%, +30%]. width_factor=(-0.3, -0.2) results in an output zoomed in by a random amount in the range [+20%, +30%]. height_factor: A float or a tuple of 2 floats, specifies the range of values vertical zoom factors are sampled from (one per image). If a scalar value v is used, it is equivalent to the tuple (-v, v). Factors are fractions of the height of the image. A positive value means zooming out, while a negative value means zooming in. For example, height_factor=(0.2, 0.3) results in an output zoomed out between 20% to 30%. height_factor=(-0.3, -0.2) results in an output zoomed in between 20% to 30%. If `height_factor` is not set, it defaults to None. In this case, images images will be zoomed by the same amounts in both directions and no image distortion will occur. fill_mode: Points outside the boundaries of the input are filled according to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. See Tensorflow documentation at https://tensorflow.org for more details. interpolation: A string, the interpolation method. Supported values: 'nearest', 'bilinear'. change_rate: A float in the interval [0, 1] representing the number of changed images versus the total number of input images average ratio. For example, if `change_rate` is set to 0.25, 25% of the input images will get changed on average (75% won't get changed). If it is set to 0.0, no images are changed. If it is set to 1.0, all the images are changed. Returns: The zoomed images and groundtruth labels with zoomed bounding boxes. """ check_dataaug_argument(width_factor, "width_factor", function_name="random_bounded_crop", data_type=float) if isinstance(width_factor, (tuple, list)): width_lower = width_factor[0] width_upper = width_factor[1] else: width_lower = width_factor width_upper = width_factor if height_factor is not None: check_dataaug_argument(height_factor, "height_factor", function_name="random_bounded_crop", data_type=float) if isinstance(height_factor, (tuple, list)): height_lower = height_factor[0] height_upper = height_factor[1] else: height_lower = height_factor height_upper = height_factor if abs(height_lower) > 1.0 or abs(height_upper) > 1.0: raise ValueError( "Argument `height_factor` of function `random_bounded_crop`: expecting float " "values in the interval [-1.0, 1.0]. Received: {}".format(height_factor)) if (crop_center_x is not None) and (crop_center_y is not None): check_dataaug_argument(crop_center_x, "crop_center_x", function_name="random_bounded_crop", data_type=float) if isinstance(crop_center_x, (tuple, list)) and isinstance(crop_center_y, (tuple, list)): t_width_lower = 0.5 - crop_center_x[1] t_width_upper = 0.5 - crop_center_x[0] t_height_lower = 0.5 - crop_center_y[1] t_height_upper = 0.5 - crop_center_y[0] else: t_width_lower = - crop_center_x/2 t_width_upper = crop_center_x/2 t_height_lower = -crop_center_y/2 t_height_upper = crop_center_y/2 if abs(t_width_lower) > 1.0 or abs(t_width_upper) > 1.0 or abs(t_height_lower) > 1.0 or abs(t_height_upper) > 1.0: raise ValueError( "Argument `crop_center_x` or `crop_center_y` of function `random_bounded_crop`: expecting float " "values in the interval [-1.0, 1.0]. Received: {}, {}".format(crop_center_x,crop_center_y)) else: t_width_lower = None t_width_upper = None t_height_lower = None t_height_upper = None check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_bounded_crop") image_shape = tf.shape(images) batch_size = image_shape[0] width = tf.cast(image_shape[1], tf.float32) height = tf.cast(image_shape[2], tf.float32) classes = labels[..., 0] boxes = labels[..., 1:] zoom_width = tf.random.uniform( [batch_size, 1], minval=width_lower, maxval=width_upper, dtype=tf.float32) if height_factor is not None: zoom_height = tf.random.uniform( [batch_size, 1], minval=height_lower, maxval=height_upper, dtype=tf.float32) else: zoom_height = zoom_width zoom_factor_w = 1-zoom_width zoom_factor_h = 1-zoom_height zoom_factor_w *= tf.cast(zoom_factor_w>=0,tf.float32) zoom_factor_h *= tf.cast(zoom_factor_h>=0,tf.float32) if t_width_lower is None: translation_width = tf.random.uniform( [batch_size, 1], minval=-1, maxval=1, dtype=tf.float32) translation_height = tf.random.uniform( [batch_size, 1], minval=-1, maxval=1, dtype=tf.float32) translation_width *= zoom_factor_w/2 translation_height *= zoom_factor_h/2 else: translation_width = tf.random.uniform( [batch_size, 1], minval=t_width_lower, maxval=t_width_upper, dtype=tf.float32) translation_height = tf.random.uniform( [batch_size, 1], minval=t_height_lower, maxval=t_height_upper, dtype=tf.float32) zooms = tf.cast(tf.concat([zoom_width, zoom_height], axis=1), dtype=tf.float32) # shape : (batch, 2) translations = tf.cast( tf.concat([translation_width * width, translation_height * height], axis=1), dtype=tf.float32) zoom_matrix = get_zoom_matrix(zooms, width, height) translation_matrix = get_translation_matrix(translations) translated_images = transform_images( images, translation_matrix, fill_mode=fill_mode, fill_value=fill_value, interpolation=interpolation) translated_boxes = _transform_boxes( boxes, translation_matrix, width, height) zoomed_images = transform_images( translated_images, zoom_matrix, fill_mode=fill_mode, fill_value=fill_value, interpolation=interpolation) zoomed_boxes = _transform_boxes( translated_boxes, zoom_matrix, width, height) # Apply the change rate to images and labels images_aug, boxes_aug = objdet_apply_change_rate( images, boxes, zoomed_images, zoomed_boxes, change_rate=change_rate) classes = tf.expand_dims(labels[..., 0], axis=-1) labels_aug = tf.concat([classes, boxes_aug], axis=-1) return images_aug, labels_aug