|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| """Provides utilities to preprocess images for the Inception networks."""
|
|
|
| from __future__ import absolute_import
|
| from __future__ import division
|
| from __future__ import print_function
|
|
|
| import tensorflow.compat.v1 as tf
|
|
|
| from tensorflow.python.ops import control_flow_ops
|
|
|
|
|
| def apply_with_random_selector(x, func, num_cases):
|
| """Computes func(x, sel), with sel sampled from [0...num_cases-1].
|
|
|
| Args:
|
| x: input Tensor.
|
| func: Python function to apply.
|
| num_cases: Python int32, number of cases to sample sel from.
|
|
|
| Returns:
|
| The result of func(x, sel), where func receives the value of the
|
| selector as a python integer, but sel is sampled dynamically.
|
| """
|
| sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
|
|
|
| return control_flow_ops.merge([
|
| func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
|
| for case in range(num_cases)])[0]
|
|
|
|
|
| def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
|
| """Distort the color of a Tensor image.
|
|
|
| Each color distortion is non-commutative and thus ordering of the color ops
|
| matters. Ideally we would randomly permute the ordering of the color ops.
|
| Rather then adding that level of complication, we select a distinct ordering
|
| of color ops for each preprocessing thread.
|
|
|
| Args:
|
| image: 3-D Tensor containing single image in [0, 1].
|
| color_ordering: Python int, a type of distortion (valid values: 0-3).
|
| fast_mode: Avoids slower ops (random_hue and random_contrast)
|
| scope: Optional scope for name_scope.
|
| Returns:
|
| 3-D Tensor color-distorted image on range [0, 1]
|
| Raises:
|
| ValueError: if color_ordering not in [0, 3]
|
| """
|
| with tf.name_scope(scope, 'distort_color', [image]):
|
| if fast_mode:
|
| if color_ordering == 0:
|
| image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
| image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
| else:
|
| image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
| else:
|
| if color_ordering == 0:
|
| image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
| image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_hue(image, max_delta=0.2)
|
| image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
| elif color_ordering == 1:
|
| image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
| image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_hue(image, max_delta=0.2)
|
| elif color_ordering == 2:
|
| image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_hue(image, max_delta=0.2)
|
| image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
| image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
| elif color_ordering == 3:
|
| image = tf.image.random_hue(image, max_delta=0.2)
|
| image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
| image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
| else:
|
| raise ValueError('color_ordering must be in [0, 3]')
|
|
|
|
|
| return tf.clip_by_value(image, 0.0, 1.0)
|
|
|
|
|
| def distorted_bounding_box_crop(image,
|
| bbox,
|
| min_object_covered=0.1,
|
| aspect_ratio_range=(0.75, 1.33),
|
| area_range=(0.05, 1.0),
|
| max_attempts=100,
|
| scope=None):
|
| """Generates cropped_image using a one of the bboxes randomly distorted.
|
|
|
| See `tf.image.sample_distorted_bounding_box` for more documentation.
|
|
|
| Args:
|
| image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
|
| bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
| where each coordinate is [0, 1) and the coordinates are arranged
|
| as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
|
| image.
|
| min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
|
| area of the image must contain at least this fraction of any bounding box
|
| supplied.
|
| aspect_ratio_range: An optional list of `floats`. The cropped area of the
|
| image must have an aspect ratio = width / height within this range.
|
| area_range: An optional list of `floats`. The cropped area of the image
|
| must contain a fraction of the supplied image within in this range.
|
| max_attempts: An optional `int`. Number of attempts at generating a cropped
|
| region of the image of the specified constraints. After `max_attempts`
|
| failures, return the entire image.
|
| scope: Optional scope for name_scope.
|
| Returns:
|
| A tuple, a 3-D Tensor cropped_image and the distorted bbox
|
| """
|
| with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
|
| tf.shape(image),
|
| bounding_boxes=bbox,
|
| min_object_covered=min_object_covered,
|
| aspect_ratio_range=aspect_ratio_range,
|
| area_range=area_range,
|
| max_attempts=max_attempts,
|
| use_image_if_no_bounding_boxes=True)
|
| bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
|
|
|
|
|
| cropped_image = tf.slice(image, bbox_begin, bbox_size)
|
| return cropped_image, distort_bbox
|
|
|
|
|
| def preprocess_for_train(image,
|
| height,
|
| width,
|
| bbox,
|
| fast_mode=True,
|
| scope=None,
|
| add_image_summaries=True,
|
| random_crop=True,
|
| use_grayscale=False):
|
| """Distort one image for training a network.
|
|
|
| Distorting images provides a useful technique for augmenting the data
|
| set during training in order to make the network invariant to aspects
|
| of the image that do not effect the label.
|
|
|
| Additionally it would create image_summaries to display the different
|
| transformations applied to the image.
|
|
|
| Args:
|
| image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
| [0, 1], otherwise it would converted to tf.float32 assuming that the range
|
| is [0, MAX], where MAX is largest positive representable number for
|
| int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
|
| height: integer
|
| width: integer
|
| bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
| where each coordinate is [0, 1) and the coordinates are arranged
|
| as [ymin, xmin, ymax, xmax].
|
| fast_mode: Optional boolean, if True avoids slower transformations (i.e.
|
| bi-cubic resizing, random_hue or random_contrast).
|
| scope: Optional scope for name_scope.
|
| add_image_summaries: Enable image summaries.
|
| random_crop: Enable random cropping of images during preprocessing for
|
| training.
|
| use_grayscale: Whether to convert the image from RGB to grayscale.
|
| Returns:
|
| 3-D float Tensor of distorted image used for training with range [-1, 1].
|
| """
|
| with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
|
| if bbox is None:
|
| bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
|
| dtype=tf.float32,
|
| shape=[1, 1, 4])
|
| if image.dtype != tf.float32:
|
| image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
|
|
|
|
| image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
| bbox)
|
| if add_image_summaries:
|
| tf.summary.image('image_with_bounding_boxes', image_with_box)
|
|
|
| if not random_crop:
|
| distorted_image = image
|
| else:
|
| distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
|
|
|
|
|
| distorted_image.set_shape([None, None, 3])
|
| image_with_distorted_box = tf.image.draw_bounding_boxes(
|
| tf.expand_dims(image, 0), distorted_bbox)
|
| if add_image_summaries:
|
| tf.summary.image('images_with_distorted_bounding_box',
|
| image_with_distorted_box)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| num_resize_cases = 1 if fast_mode else 4
|
| distorted_image = apply_with_random_selector(
|
| distorted_image,
|
| lambda x, method: tf.image.resize_images(x, [height, width], method),
|
| num_cases=num_resize_cases)
|
|
|
| if add_image_summaries:
|
| tf.summary.image(('cropped_' if random_crop else '') + 'resized_image',
|
| tf.expand_dims(distorted_image, 0))
|
|
|
|
|
| distorted_image = tf.image.random_flip_left_right(distorted_image)
|
|
|
|
|
| num_distort_cases = 1 if fast_mode else 4
|
| distorted_image = apply_with_random_selector(
|
| distorted_image,
|
| lambda x, ordering: distort_color(x, ordering, fast_mode),
|
| num_cases=num_distort_cases)
|
|
|
| if use_grayscale:
|
| distorted_image = tf.image.rgb_to_grayscale(distorted_image)
|
|
|
| if add_image_summaries:
|
| tf.summary.image('final_distorted_image',
|
| tf.expand_dims(distorted_image, 0))
|
| distorted_image = tf.subtract(distorted_image, 0.5)
|
| distorted_image = tf.multiply(distorted_image, 2.0)
|
| return distorted_image
|
|
|
|
|
| def preprocess_for_eval(image,
|
| height,
|
| width,
|
| central_fraction=0.875,
|
| scope=None,
|
| central_crop=True,
|
| use_grayscale=False):
|
| """Prepare one image for evaluation.
|
|
|
| If height and width are specified it would output an image with that size by
|
| applying resize_bilinear.
|
|
|
| If central_fraction is specified it would crop the central fraction of the
|
| input image.
|
|
|
| Args:
|
| image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
| [0, 1], otherwise it would converted to tf.float32 assuming that the range
|
| is [0, MAX], where MAX is largest positive representable number for
|
| int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
|
| height: integer
|
| width: integer
|
| central_fraction: Optional Float, fraction of the image to crop.
|
| scope: Optional scope for name_scope.
|
| central_crop: Enable central cropping of images during preprocessing for
|
| evaluation.
|
| use_grayscale: Whether to convert the image from RGB to grayscale.
|
| Returns:
|
| 3-D float Tensor of prepared image.
|
| """
|
| with tf.name_scope(scope, 'eval_image', [image, height, width]):
|
| if image.dtype != tf.float32:
|
| image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
| if use_grayscale:
|
| image = tf.image.rgb_to_grayscale(image)
|
|
|
|
|
| if central_crop and central_fraction:
|
| image = tf.image.central_crop(image, central_fraction=central_fraction)
|
|
|
| if height and width:
|
|
|
| image = tf.expand_dims(image, 0)
|
| image = tf.image.resize_bilinear(image, [height, width],
|
| align_corners=False)
|
| image = tf.squeeze(image, [0])
|
| image = tf.subtract(image, 0.5)
|
| image = tf.multiply(image, 2.0)
|
| return image
|
|
|
|
|
| def preprocess_image(image,
|
| height,
|
| width,
|
| is_training=False,
|
| bbox=None,
|
| fast_mode=True,
|
| add_image_summaries=True,
|
| crop_image=True,
|
| use_grayscale=False):
|
| """Pre-process one image for training or evaluation.
|
|
|
| Args:
|
| image: 3-D Tensor [height, width, channels] with the image. If dtype is
|
| tf.float32 then the range should be [0, 1], otherwise it would converted
|
| to tf.float32 assuming that the range is [0, MAX], where MAX is largest
|
| positive representable number for int(8/16/32) data type (see
|
| `tf.image.convert_image_dtype` for details).
|
| height: integer, image expected height.
|
| width: integer, image expected width.
|
| is_training: Boolean. If true it would transform an image for train,
|
| otherwise it would transform it for evaluation.
|
| bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
| where each coordinate is [0, 1) and the coordinates are arranged as
|
| [ymin, xmin, ymax, xmax].
|
| fast_mode: Optional boolean, if True avoids slower transformations.
|
| add_image_summaries: Enable image summaries.
|
| crop_image: Whether to enable cropping of images during preprocessing for
|
| both training and evaluation.
|
| use_grayscale: Whether to convert the image from RGB to grayscale.
|
|
|
| Returns:
|
| 3-D float Tensor containing an appropriately scaled image
|
|
|
| Raises:
|
| ValueError: if user does not provide bounding box
|
| """
|
| if is_training:
|
| return preprocess_for_train(
|
| image,
|
| height,
|
| width,
|
| bbox,
|
| fast_mode,
|
| add_image_summaries=add_image_summaries,
|
| random_crop=crop_image,
|
| use_grayscale=use_grayscale)
|
| else:
|
| return preprocess_for_eval(
|
| image,
|
| height,
|
| width,
|
| central_crop=crop_image,
|
| use_grayscale=use_grayscale)
|
|
|