|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """ |
| References: |
| ---------- |
| Some of the code in this package is from or was inspired by: |
| |
| Keras Image Preprocessing Layers |
| The Tensorflow Authors |
| Copyright (c) 2019 |
| |
| Link to the source code: |
| https://github.com/keras-team/keras/blob/v2.12.0/keras/layers/preprocessing/image_preprocessing.py#L394-L495 |
| |
| """ |
|
|
| import math |
| import tensorflow as tf |
|
|
| from common.data_augmentation import \ |
| check_fill_and_interpolation, transform_images, check_dataaug_argument, \ |
| get_flip_matrix, get_translation_matrix, get_rotation_matrix, \ |
| get_shear_matrix, get_zoom_matrix |
| from .objdet_random_utils import objdet_apply_change_rate |
|
|
|
|
| def _transform_boxes(boxes, transforms, image_width, image_height, scale=1.): |
| """ |
| This function applies affine transformations to a batch of boxes. |
| The transformation matrices are independent from each other |
| and are generally different from one batch item to another. |
| |
| Arguments: |
| boxes: |
| Boxes the matrices are applied to |
| Shape:[batch_size, num_boxes, 4] |
| transforms: |
| Matrices coefficients to apply to the boxes |
| Shape:[batch_size, 8] |
| |
| Returns: |
| Transformed boxes |
| Shape:[batch_size, num_boxes, 4] |
| """ |
| |
| image_width = tf.cast(image_width, tf.float32) |
| image_height = tf.cast(image_height, tf.float32) |
| |
| boxes_shape = tf.shape(boxes) |
| batch_size = boxes_shape[0] |
| num_boxes = boxes_shape[1] |
| |
| |
| coords_sum = tf.math.reduce_sum(boxes, axis=-1) |
| padding_mask = tf.where(coords_sum > 0, 1., 0.) |
| padding_mask = tf.repeat(padding_mask, 4) |
| padding_mask = tf.reshape(padding_mask, [batch_size, num_boxes, 4]) |
| |
| |
| |
| transforms = tf.concat([ |
| transforms, |
| tf.ones([batch_size, 1], dtype=tf.float32)], |
| axis=-1) |
| matrices = tf.reshape(transforms, [batch_size, 3, 3]) |
| matrices = tf.linalg.inv(matrices) |
| |
| |
| |
| matrices = tf.expand_dims(matrices, axis=1) |
| matrices = tf.tile(matrices, [1, num_boxes, 1, 1]) |
|
|
| x1 = boxes[..., 0] |
| y1 = boxes[..., 1] |
| x2 = boxes[..., 2] |
| y2 = boxes[..., 3] |
|
|
| |
| if scale < 1: |
| dx = scale * (x2 - x1) |
| dy = scale * (y2 - y1) |
| boxes = tf.stack([x1 + dx, y1 + dx, x2 - dx, y2 - dy], axis=-1) |
|
|
| |
| |
| |
| corners = tf.concat([ |
| tf.stack([x1, x2, x2, x1], axis=-1), |
| tf.stack([y1, y1, y2, y2], axis=-1), |
| tf.ones([batch_size, num_boxes, 4], dtype=tf.float32)], |
| axis=-1) |
| corners = tf.reshape(corners, [batch_size, num_boxes, 3, 4]) |
| |
| trd_corners = tf.linalg.matmul(matrices, corners) |
|
|
| |
| tx1 = tf.math.reduce_min(trd_corners[..., 0, :], axis=-1) |
| tx2 = tf.math.reduce_max(trd_corners[..., 0, :], axis=-1) |
| ty1 = tf.math.reduce_min(trd_corners[..., 1, :], axis=-1) |
| ty2 = tf.math.reduce_max(trd_corners[..., 1, :], axis=-1) |
|
|
| |
| tx1 = tf.math.maximum(tx1, 0) |
| tx1 = tf.math.minimum(tx1, image_width) |
| |
| tx2 = tf.math.maximum(tx2, 0) |
| tx2 = tf.math.minimum(tx2, image_width) |
| |
| ty1 = tf.math.maximum(ty1, 0) |
| ty1 = tf.math.minimum(ty1, image_height) |
| |
| ty2 = tf.math.maximum(ty2, 0) |
| ty2 = tf.math.minimum(ty2, image_height) |
| |
| trd_boxes = tf.stack([tx1, ty1, tx2, ty2], axis=-1) |
|
|
| |
| valid_boxes = tf.math.logical_and(tx2 > tx1, ty2 > ty1) |
| valid_boxes = tf.cast(valid_boxes, tf.float32) |
| trd_boxes *= tf.expand_dims(valid_boxes, axis=-1) |
|
|
| |
| |
| trd_boxes *= padding_mask |
| |
| return trd_boxes |
|
|
|
|
| |
|
|
| def objdet_random_flip(images, labels, mode=None, change_rate=0.5): |
| """ |
| This function randomly flips input images and the bounding boxes |
| in the associated groundtruth labels. |
| |
| Setting `change_rate` to 0.5 usually gives good results (don't set |
| it to 1.0, otherwise all the images will be flipped). |
| |
| Arguments: |
| images: |
| Input RGB or grayscale images |
| Shape: [batch_size, width, height, channels] |
| labels: |
| Groundtruth labels associated to the images in |
| (class, x1, y1, x2, y2) format. Bounding box coordinates |
| must be absolute, opposite corners coordinates. |
| Shape: [batch_size, num_labels, 5] |
| mode: |
| A string representing the flip axis. Either "horizontal", |
| "vertical" or "horizontal_and_vertical". |
| change_rate: |
| A float in the interval [0, 1] representing the number of |
| changed images versus the total number of input images average |
| ratio. For example, if `change_rate` is set to 0.25, 25% of |
| the input images will get changed on average (75% won't get |
| changed). If it is set to 0.0, no images are changed. If it is |
| set to 1.0, all the images are changed. |
| |
| Returns: |
| The flipped images and groundtruth labels with flipped bounding boxes. |
| """ |
|
|
| if mode not in ("horizontal", "vertical", "horizontal_and_vertical"): |
| raise ValueError( |
| "Argument `mode` of function `random_flip`: supported values are 'horizontal', " |
| "'vertical' and 'horizontal_and_vertical'. Received {}".format(mode)) |
|
|
| images_shape = tf.shape(images) |
| batch_size = images_shape[0] |
| image_width = images_shape[1] |
| image_height = images_shape[2] |
| |
| matrix = get_flip_matrix(batch_size, image_width, image_height, mode) |
|
|
| boxes = labels[..., 1:] |
| flipped_images = transform_images(images, matrix) |
| flipped_boxes = _transform_boxes(boxes, matrix, image_width, image_height) |
|
|
| |
| images_aug, boxes_aug = objdet_apply_change_rate( |
| images, boxes, flipped_images, flipped_boxes, change_rate=change_rate) |
| classes = tf.expand_dims(labels[..., 0], axis=-1) |
| labels_aug = tf.concat([classes, boxes_aug], axis=-1) |
|
|
| return images_aug, labels_aug |
|
|
|
|
| |
|
|
| def objdet_random_translation( |
| images, labels, |
| width_factor, height_factor, |
| fill_mode='reflect', interpolation='bilinear', fill_value=0.0, |
| change_rate=1.0): |
| """ |
| This function randomly translates input images and the bounding boxes |
| in the associated groundtruth labels. |
| |
| Arguments: |
| images: |
| Input RGB or grayscale images with shape |
| Shape: [batch_size, width, height, channels] |
| labels: |
| Groundtruth labels associated to the images in |
| (class, x1, y1, x2, y2) format. Bounding box coordinates |
| must be absolute, opposite corners coordinates. |
| Shape: [batch_size, num_labels, 5] |
| width_factor: |
| A float or a tuple of 2 floats, specifies the range of values |
| the horizontal shift factors are sampled from (one per image). |
| If a scalar value v is used, it is equivalent to the tuple (-v, v). |
| A negative factor means shifting the image left, while a positive |
| factor means shifting the image right. |
| For example, `width_factor`=(-0.2, 0.3) results in an output shifted |
| left by up to 20% or shifted right by up to 30%. |
| height_factor: |
| A float or a tuple of 2 floats, specifies the range of values |
| the vertical shift factors are sampled from (one per image). |
| If a scalar value v is used, it is equivalent to the tuple (-v, v). |
| A negative factor means shifting the image up, while a positive |
| factor means shifting the image down. |
| For example, `height_factor`=(-0.2, 0.3) results in an output shifted |
| up by up to 20% or shifted down by up to 30%. |
| fill_mode: |
| Points outside the boundaries of the input are filled according |
| to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. |
| See Tensorflow documentation at https://tensorflow.org |
| for more details. |
| interpolation: |
| A string, the interpolation method. Supported values: 'nearest', 'bilinear'. |
| change_rate: |
| A float in the interval [0, 1] representing the number of |
| changed images versus the total number of input images average |
| ratio. For example, if `change_rate` is set to 0.25, 25% of |
| the input images will get changed on average (75% won't get |
| changed). If it is set to 0.0, no images are changed. If it is |
| set to 1.0, all the images are changed. |
| |
| Returns: |
| The translated images and groundtruth labels with translated bounding boxes. |
| """ |
|
|
| check_dataaug_argument(width_factor, "width_factor", function_name="random_translation", data_type=float) |
| if isinstance(width_factor, (tuple, list)): |
| width_lower = width_factor[0] |
| width_upper = width_factor[1] |
| else: |
| width_lower = -width_factor |
| width_upper = width_factor |
| |
| check_dataaug_argument(height_factor, "height_factor", function_name="random_translation", data_type=float) |
| if isinstance(height_factor, (tuple, list)): |
| height_lower = height_factor[0] |
| height_upper = height_factor[1] |
| else: |
| height_lower = -height_factor |
| height_upper = height_factor |
|
|
| check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_translation") |
|
|
| images_shape = tf.shape(images) |
| batch_size = images_shape[0] |
| image_width = images_shape[1] |
| image_height = images_shape[2] |
| |
| classes = labels[..., 0] |
| boxes = labels[..., 1:] |
| |
| width_translate = tf.random.uniform( |
| [batch_size, 1], minval=width_lower, maxval=width_upper, dtype=tf.float32) |
| width_translate = width_translate * tf.cast(image_width, tf.float32) |
| |
| height_translate = tf.random.uniform( |
| [batch_size, 1], minval=height_lower, maxval=height_upper, dtype=tf.float32) |
| height_translate = height_translate * tf.cast(image_height, tf.float32) |
|
|
| translations = tf.cast( |
| tf.concat([width_translate, height_translate], axis=1), |
| dtype=tf.float32) |
|
|
| translation_matrix = get_translation_matrix(translations) |
| |
| translated_images = transform_images( |
| images, |
| translation_matrix, |
| interpolation=interpolation, |
| fill_mode=fill_mode, |
| fill_value=fill_value) |
|
|
| translated_boxes = _transform_boxes( |
| boxes, |
| translation_matrix, |
| image_width, |
| image_height) |
|
|
| |
| images_aug, boxes_aug = objdet_apply_change_rate( |
| images, boxes, translated_images, translated_boxes, change_rate=change_rate) |
| classes = tf.expand_dims(labels[..., 0], axis=-1) |
| labels_aug = tf.concat([classes, boxes_aug], axis=-1) |
|
|
| return images_aug, labels_aug |
|
|
|
|
| |
|
|
| def objdet_random_rotation( |
| images, labels, factor=None, |
| fill_mode='reflect', interpolation='bilinear', fill_value=0.0, |
| change_rate=1.0): |
| """ |
| This function randomly rotates input images and the bounding boxes |
| in the associated groundtruth labels. |
| |
| Arguments: |
| images: |
| Input RGB or grayscale images with shape |
| Shape: [batch_size, width, height, channels] |
| labels: |
| Groundtruth labels associated to the images in |
| (class, x1, y1, x2, y2) format. Bounding box coordinates |
| must be absolute, opposite corners coordinates. |
| Shape: [batch_size, num_labels, 5] |
| factor: |
| A float or a tuple of 2 floats, specifies the range of values the |
| rotation angles are sampled from (one per image). If a scalar |
| value v is used, it is equivalent to the tuple (-v, v). |
| Rotation angles are in gradients (fractions of 2*pi). A positive |
| angle means rotating counter clock-wise, while a negative angle |
| means rotating clock-wise. |
| For example, `factor`=(-0.2, 0.3) results in an output rotated by |
| a random amount in the range [-20% * 2pi, 30% * 2pi]. |
| fill_mode: |
| Points outside the boundaries of the input are filled according |
| to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. |
| See Tensorflow documentation at https://tensorflow.org |
| for more details. |
| interpolation: |
| A string, the interpolation method. Supported values: 'nearest', 'bilinear'. |
| change_rate: |
| A float in the interval [0, 1] representing the number of |
| changed images versus the total number of input images average |
| ratio. For example, if `change_rate` is set to 0.25, 25% of |
| the input images will get changed on average (75% won't get |
| changed). If it is set to 0.0, no images are changed. If it is |
| set to 1.0, all the images are changed. |
| |
| Returns: |
| The rotated images and groundtruth labels with rotated bounding boxes. |
| """ |
|
|
| check_dataaug_argument(factor, "factor", function_name="random_rotation", data_type=float) |
| if not isinstance(factor, (tuple, list)): |
| factor = (-factor, factor) |
| |
| check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_rotation") |
|
|
| images_shape = tf.shape(images) |
| batch_size = images_shape[0] |
| image_width = images_shape[1] |
| image_height = images_shape[2] |
|
|
| min_angle = factor[0] * 2. * math.pi |
| max_angle = factor[1] * 2. * math.pi |
| angles = tf.random.uniform([batch_size], minval=min_angle, maxval=max_angle) |
|
|
| classes = labels[..., 0] |
| boxes = labels[..., 1:] |
| |
| rotation_matrix = get_rotation_matrix(angles, image_width, image_height) |
| |
| rotated_images = transform_images( |
| images, |
| rotation_matrix, |
| fill_mode=fill_mode, |
| fill_value=fill_value, |
| interpolation=interpolation) |
| |
| rotated_boxes = _transform_boxes( |
| boxes, |
| rotation_matrix, |
| image_width, |
| image_height, |
| scale=0.1) |
|
|
| |
| images_aug, boxes_aug = objdet_apply_change_rate( |
| images, boxes, rotated_images, rotated_boxes, change_rate=change_rate) |
| classes = tf.expand_dims(labels[..., 0], axis=-1) |
| labels_aug = tf.concat([classes, boxes_aug], axis=-1) |
|
|
| return images_aug, labels_aug |
|
|
|
|
| |
|
|
| def objdet_random_shear( |
| images, |
| labels, |
| factor=None, |
| axis='xy', |
| fill_mode='reflect', |
| interpolation='bilinear', |
| fill_value=0.0, |
| change_rate=1.0): |
| """ |
| This function randomly shears input images. |
| |
| Arguments: |
| images: |
| Input RGB or grayscale images with shape |
| [batch_size, width, height, channels]. |
| factor: |
| A float or a tuple of 2 floats, specifies the range of values |
| the shear angles are sampled from (one per image). If a scalar |
| value v is used, it is equivalent to the tuple (-v, v). Angles |
| are in radians (fractions of 2*pi). |
| For example, factor=(-0.349, 0.785) results in an output sheared |
| by a random angle in the range [-20 degrees, +45 degrees]. |
| axis: |
| The shear axis: |
| 'xy': shear along both axis |
| 'x': shear along the x axis only |
| 'y': shear along the y axis only |
| fill_mode: |
| Points outside the boundaries of the input are filled according |
| to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. |
| See Tensorflow documentation at https://tensorflow.org |
| for more details. |
| interpolation: |
| A string, the interpolation method. Supported values: 'nearest', 'bilinear'. |
| change_rate: |
| A float in the interval [0, 1] representing the number of |
| changed images versus the total number of input images average |
| ratio. For example, if `change_rate` is set to 0.25, 25% of |
| the input images will get changed on average (75% won't get |
| changed). If it is set to 0.0, no images are changed. If it is |
| set to 1.0, all the images are changed. |
| Returns: |
| The sheared images. |
| """ |
| |
| if axis == 'x': |
| function_name = "random_shear_x" |
| elif axis == 'y': |
| function_name = "random_shear_y" |
| else: |
| function_name = "random_shear" |
|
|
| check_dataaug_argument(factor, "factor", function_name=function_name, data_type=float) |
| if not isinstance(factor, (tuple, list)): |
| factor = (-factor, factor) |
| |
| check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name=function_name) |
|
|
| images_shape = tf.shape(images) |
| batch_size = images_shape[0] |
| image_width = images_shape[1] |
| image_height = images_shape[2] |
|
|
| min_angle = factor[0] * 2. * math.pi |
| max_angle = factor[1] * 2. * math.pi |
| angles = tf.random.uniform([batch_size], minval=min_angle, maxval=max_angle) |
|
|
| classes = labels[..., 0] |
| boxes = labels[..., 1:] |
|
|
| shear_matrix = get_shear_matrix(angles, axis=axis) |
| |
| sheared_images = transform_images( |
| images, |
| shear_matrix, |
| fill_mode=fill_mode, |
| fill_value=fill_value, |
| interpolation=interpolation) |
| |
| sheared_boxes = _transform_boxes( |
| boxes, |
| shear_matrix, |
| image_width, |
| image_height, |
| scale=0.1) |
| |
| |
| images_aug, boxes_aug = objdet_apply_change_rate( |
| images, boxes, sheared_images, sheared_boxes, change_rate=change_rate) |
| classes = tf.expand_dims(labels[..., 0], axis=-1) |
| labels_aug = tf.concat([classes, boxes_aug], axis=-1) |
|
|
| return images_aug, labels_aug |
|
|
|
|
| |
|
|
| def objdet_random_zoom( |
| images, labels, width_factor=None, height_factor=None, |
| fill_mode='reflect', interpolation='bilinear', fill_value=0.0, |
| change_rate=1.0): |
| """ |
| This function randomly zooms input images and the bounding boxes |
| in the associated groundtruth labels. |
| |
| If `width_factor` and `height_factor` are both set, the images are zoomed |
| in or out on each axis independently, which may result in noticeable distortion. |
| If you want to avoid distortion, only set `width_factor` and the mages will be |
| zoomed by the same amount in both directions. |
| |
| Arguments: |
| images: |
| Input RGB or grayscale images with shape |
| Shape: [batch_size, width, height, channels] |
| labels: |
| Groundtruth labels associated to the images in |
| (class, x1, y1, x2, y2) format. Bounding box coordinates |
| must be absolute, opposite corners coordinates. |
| Shape: [batch_size, num_labels, 5] |
| width_factor: |
| A float or a tuple of 2 floats, specifies the range of values horizontal |
| zoom factors are sampled from (one per image). If a scalar value v is used, |
| it is equivalent to the tuple (-v, v). Factors are fractions of the width |
| of the image. A positive factor means zooming out, while a negative factor |
| means zooming in. |
| For example, width_factor=(0.2, 0.3) results in an output zoomed out by |
| a random amount in the range [+20%, +30%]. width_factor=(-0.3, -0.2) results |
| in an output zoomed in by a random amount in the range [+20%, +30%]. |
| height_factor: |
| A float or a tuple of 2 floats, specifies the range of values vertical |
| zoom factors are sampled from (one per image). If a scalar value v is used, |
| it is equivalent to the tuple (-v, v). Factors are fractions of the height |
| of the image. A positive value means zooming out, while a negative value |
| means zooming in. |
| For example, height_factor=(0.2, 0.3) results in an output zoomed out |
| between 20% to 30%. height_factor=(-0.3, -0.2) results in an output zoomed |
| in between 20% to 30%. |
| If `height_factor` is not set, it defaults to None. In this case, images |
| images will be zoomed by the same amounts in both directions and no image |
| distortion will occur. |
| fill_mode: |
| Points outside the boundaries of the input are filled according |
| to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. |
| See Tensorflow documentation at https://tensorflow.org |
| for more details. |
| interpolation: |
| A string, the interpolation method. Supported values: 'nearest', 'bilinear'. |
| change_rate: |
| A float in the interval [0, 1] representing the number of |
| changed images versus the total number of input images average |
| ratio. For example, if `change_rate` is set to 0.25, 25% of |
| the input images will get changed on average (75% won't get |
| changed). If it is set to 0.0, no images are changed. If it is |
| set to 1.0, all the images are changed. |
| |
| Returns: |
| The zoomed images and groundtruth labels with zoomed bounding boxes. |
| """ |
|
|
| check_dataaug_argument(width_factor, "width_factor", function_name="random_zoom", data_type=float) |
| if isinstance(width_factor, (tuple, list)): |
| width_lower = width_factor[0] |
| width_upper = width_factor[1] |
| else: |
| width_lower = -width_factor |
| width_upper = width_factor |
| |
| if height_factor is not None: |
| check_dataaug_argument(height_factor, "height_factor", function_name="random_zoom", data_type=float) |
| if isinstance(height_factor, (tuple, list)): |
| height_lower = height_factor[0] |
| height_upper = height_factor[1] |
| else: |
| height_lower = -height_factor |
| height_upper = height_factor |
| if abs(height_lower) > 1.0 or abs(height_upper) > 1.0: |
| raise ValueError( |
| "Argument `height_factor` of function `random_zoom`: expecting float " |
| "values in the interval [-1.0, 1.0]. Received: {}".format(height_factor)) |
| else: |
| height_lower = width_lower |
| height_upper = width_upper |
| |
| check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_zoom") |
|
|
| images_shape = tf.shape(images) |
| batch_size = images_shape[0] |
| image_width = images_shape[1] |
| image_height = images_shape[2] |
|
|
| classes = labels[..., 0] |
| boxes = labels[..., 1:] |
|
|
| height_zoom = tf.random.uniform( |
| [batch_size, 1], minval=1. + height_lower, maxval=1. + height_upper, dtype=tf.float32) |
| width_zoom = tf.random.uniform( |
| [batch_size, 1], minval=1. + width_lower, maxval=1. + width_upper, dtype=tf.float32) |
| |
| zooms = tf.cast(tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32) |
| |
| zoom_matrix = get_zoom_matrix(zooms, image_width, image_height) |
| |
| zoomed_images = transform_images( |
| images, |
| zoom_matrix, |
| fill_mode=fill_mode, |
| fill_value=fill_value, |
| interpolation=interpolation) |
|
|
| zoomed_boxes = _transform_boxes( |
| boxes, |
| zoom_matrix, |
| image_width, |
| image_height) |
| |
| |
| images_aug, boxes_aug = objdet_apply_change_rate( |
| images, boxes, zoomed_images, zoomed_boxes, change_rate=change_rate) |
| classes = tf.expand_dims(labels[..., 0], axis=-1) |
| labels_aug = tf.concat([classes, boxes_aug], axis=-1) |
|
|
| return images_aug, labels_aug |
|
|
|
|
| |
|
|
| def objdet_random_bounded_crop( |
| images, |
| labels, |
| width_factor=None, |
| height_factor=None, |
| crop_center_x=None, |
| crop_center_y=None, |
| fill_mode='reflect', |
| interpolation='bilinear', |
| fill_value=0.0, |
| change_rate=1.0): |
| |
| """ |
| This function randomly crops or dezoom on each axis of the input images. |
| |
| If `width_factor` and `height_factor` are both set, the images are zoomed |
| in or out on each axis independently, which may result in noticeable distortion. |
| If you want to avoid distortion, only set `width_factor` and the mages will be |
| zoomed by the same amount in both directions. |
| |
| Arguments: |
| images: |
| Input RGB or grayscale images with shape |
| [batch_size, width, height, channels]. |
| labels: |
| Groundtruth labels associated to the images in |
| (class, x1, y1, x2, y2) format. Bounding box coordinates |
| must be absolute, opposite corners coordinates. |
| Shape: [batch_size, num_labels, 5] |
| width_factor: |
| A float or a tuple of 2 floats, specifies the range of values horizontal |
| zoom factors are sampled from (one per image). If a scalar value v is used, |
| it is equivalent to the tuple (-v, v). Factors are fractions of the width |
| of the image. A positive factor means zooming out, while a negative factor |
| means zooming in. |
| For example, width_factor=(0.2, 0.3) results in an output zoomed out by |
| a random amount in the range [+20%, +30%]. width_factor=(-0.3, -0.2) results |
| in an output zoomed in by a random amount in the range [+20%, +30%]. |
| height_factor: |
| A float or a tuple of 2 floats, specifies the range of values vertical |
| zoom factors are sampled from (one per image). If a scalar value v is used, |
| it is equivalent to the tuple (-v, v). Factors are fractions of the height |
| of the image. A positive value means zooming out, while a negative value |
| means zooming in. |
| For example, height_factor=(0.2, 0.3) results in an output zoomed out |
| between 20% to 30%. height_factor=(-0.3, -0.2) results in an output zoomed |
| in between 20% to 30%. |
| If `height_factor` is not set, it defaults to None. In this case, images |
| images will be zoomed by the same amounts in both directions and no image |
| distortion will occur. |
| fill_mode: |
| Points outside the boundaries of the input are filled according |
| to the given mode. One of {'constant', 'reflect', 'wrap', 'nearest'}. |
| See Tensorflow documentation at https://tensorflow.org |
| for more details. |
| interpolation: |
| A string, the interpolation method. Supported values: 'nearest', 'bilinear'. |
| change_rate: |
| A float in the interval [0, 1] representing the number of |
| changed images versus the total number of input images average |
| ratio. For example, if `change_rate` is set to 0.25, 25% of |
| the input images will get changed on average (75% won't get |
| changed). If it is set to 0.0, no images are changed. If it is |
| set to 1.0, all the images are changed. |
| Returns: |
| The zoomed images and groundtruth labels with zoomed bounding boxes. |
| """ |
|
|
| check_dataaug_argument(width_factor, "width_factor", function_name="random_bounded_crop", data_type=float) |
| if isinstance(width_factor, (tuple, list)): |
| width_lower = width_factor[0] |
| width_upper = width_factor[1] |
| else: |
| width_lower = width_factor |
| width_upper = width_factor |
| |
| if height_factor is not None: |
| check_dataaug_argument(height_factor, "height_factor", function_name="random_bounded_crop", data_type=float) |
| if isinstance(height_factor, (tuple, list)): |
| height_lower = height_factor[0] |
| height_upper = height_factor[1] |
| else: |
| height_lower = height_factor |
| height_upper = height_factor |
| if abs(height_lower) > 1.0 or abs(height_upper) > 1.0: |
| raise ValueError( |
| "Argument `height_factor` of function `random_bounded_crop`: expecting float " |
| "values in the interval [-1.0, 1.0]. Received: {}".format(height_factor)) |
| if (crop_center_x is not None) and (crop_center_y is not None): |
| check_dataaug_argument(crop_center_x, "crop_center_x", function_name="random_bounded_crop", data_type=float) |
| if isinstance(crop_center_x, (tuple, list)) and isinstance(crop_center_y, (tuple, list)): |
| t_width_lower = 0.5 - crop_center_x[1] |
| t_width_upper = 0.5 - crop_center_x[0] |
| t_height_lower = 0.5 - crop_center_y[1] |
| t_height_upper = 0.5 - crop_center_y[0] |
| else: |
| t_width_lower = - crop_center_x/2 |
| t_width_upper = crop_center_x/2 |
| t_height_lower = -crop_center_y/2 |
| t_height_upper = crop_center_y/2 |
| if abs(t_width_lower) > 1.0 or abs(t_width_upper) > 1.0 or abs(t_height_lower) > 1.0 or abs(t_height_upper) > 1.0: |
| raise ValueError( |
| "Argument `crop_center_x` or `crop_center_y` of function `random_bounded_crop`: expecting float " |
| "values in the interval [-1.0, 1.0]. Received: {}, {}".format(crop_center_x,crop_center_y)) |
| else: |
| t_width_lower = None |
| t_width_upper = None |
| t_height_lower = None |
| t_height_upper = None |
| |
| check_fill_and_interpolation(fill_mode, interpolation, fill_value, function_name="random_bounded_crop") |
|
|
| image_shape = tf.shape(images) |
| batch_size = image_shape[0] |
| width = tf.cast(image_shape[1], tf.float32) |
| height = tf.cast(image_shape[2], tf.float32) |
|
|
| classes = labels[..., 0] |
| boxes = labels[..., 1:] |
|
|
| zoom_width = tf.random.uniform( |
| [batch_size, 1], minval=width_lower, maxval=width_upper, dtype=tf.float32) |
| |
| if height_factor is not None: |
| zoom_height = tf.random.uniform( |
| [batch_size, 1], minval=height_lower, maxval=height_upper, dtype=tf.float32) |
| else: |
| zoom_height = zoom_width |
| |
| zoom_factor_w = 1-zoom_width |
| zoom_factor_h = 1-zoom_height |
|
|
| zoom_factor_w *= tf.cast(zoom_factor_w>=0,tf.float32) |
| zoom_factor_h *= tf.cast(zoom_factor_h>=0,tf.float32) |
|
|
| if t_width_lower is None: |
| translation_width = tf.random.uniform( |
| [batch_size, 1], minval=-1, maxval=1, dtype=tf.float32) |
| |
| translation_height = tf.random.uniform( |
| [batch_size, 1], minval=-1, maxval=1, dtype=tf.float32) |
|
|
| translation_width *= zoom_factor_w/2 |
| translation_height *= zoom_factor_h/2 |
| else: |
| translation_width = tf.random.uniform( |
| [batch_size, 1], minval=t_width_lower, maxval=t_width_upper, dtype=tf.float32) |
| |
| translation_height = tf.random.uniform( |
| [batch_size, 1], minval=t_height_lower, maxval=t_height_upper, dtype=tf.float32) |
|
|
| zooms = tf.cast(tf.concat([zoom_width, zoom_height], axis=1), dtype=tf.float32) |
|
|
| translations = tf.cast( |
| tf.concat([translation_width * width, translation_height * height], axis=1), |
| dtype=tf.float32) |
|
|
| zoom_matrix = get_zoom_matrix(zooms, width, height) |
| translation_matrix = get_translation_matrix(translations) |
|
|
|
|
| translated_images = transform_images( |
| images, |
| translation_matrix, |
| fill_mode=fill_mode, |
| fill_value=fill_value, |
| interpolation=interpolation) |
|
|
| translated_boxes = _transform_boxes( |
| boxes, |
| translation_matrix, |
| width, |
| height) |
|
|
|
|
| zoomed_images = transform_images( |
| translated_images, |
| zoom_matrix, |
| fill_mode=fill_mode, |
| fill_value=fill_value, |
| interpolation=interpolation) |
|
|
| zoomed_boxes = _transform_boxes( |
| translated_boxes, |
| zoom_matrix, |
| width, |
| height) |
| |
| |
| images_aug, boxes_aug = objdet_apply_change_rate( |
| images, boxes, zoomed_images, zoomed_boxes, change_rate=change_rate) |
| classes = tf.expand_dims(labels[..., 0], axis=-1) |
| labels_aug = tf.concat([classes, boxes_aug], axis=-1) |
|
|
| return images_aug, labels_aug |