ASL-MoViNet-T5-translator

Sleeping

App Files Files Community

ASL-MoViNet-T5-translator / official /vision /utils /object_detection /ops.py

deanna-emery

updates

93528c6 about 2 years ago

raw

history blame contribute delete

7.08 kB

	# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""A module for helper tensorflow ops.

	This is originally implemented in TensorFlow Object Detection API.
	"""

	import tensorflow as tf, tf_keras

	from official.vision.utils.object_detection import shape_utils


	def indices_to_dense_vector(indices,
	size,
	indices_value=1.,
	default_value=0,
	dtype=tf.float32):
	"""Creates dense vector with indices set to specific value and rest to zeros.

	This function exists because it is unclear if it is safe to use
	tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
	with indices which are not ordered.
	This function accepts a dynamic size (e.g. tf.shape(tensor)[0])

	Args:
	indices: 1d Tensor with integer indices which are to be set to
	indices_values.
	size: scalar with size (integer) of output Tensor.
	indices_value: values of elements specified by indices in the output vector
	default_value: values of other elements in the output vector.
	dtype: data type.

	Returns:
	dense 1D Tensor of shape [size] with indices set to indices_values and the
	rest set to default_value.
	"""
	size = tf.cast(size, dtype=tf.int32)
	zeros = tf.ones([size], dtype=dtype) * default_value
	values = tf.ones_like(indices, dtype=dtype) * indices_value

	return tf.dynamic_stitch(
	[tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values])


	def matmul_gather_on_zeroth_axis(params, indices, scope=None):
	"""Matrix multiplication based implementation of tf.gather on zeroth axis.

	TODO(rathodv, jonathanhuang): enable sparse matmul option.

	Args:
	params: A float32 Tensor. The tensor from which to gather values. Must be at
	least rank 1.
	indices: A Tensor. Must be one of the following types: int32, int64. Must be
	in range [0, params.shape[0])
	scope: A name for the operation (optional).

	Returns:
	A Tensor. Has the same type as params. Values from params gathered
	from indices given by indices, with shape indices.shape + params.shape[1:].
	"""
	scope = scope or 'MatMulGather'
	with tf.name_scope(scope):
	params_shape = shape_utils.combined_static_and_dynamic_shape(params)
	indices_shape = shape_utils.combined_static_and_dynamic_shape(indices)
	params2d = tf.reshape(params, [params_shape[0], -1])
	indicator_matrix = tf.one_hot(indices, params_shape[0])
	gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
	return tf.reshape(gathered_result_flattened,
	tf.stack(indices_shape + params_shape[1:]))


	def merge_boxes_with_multiple_labels(
	boxes, classes, confidences, num_classes, quantization_bins=10000
	):
	"""Merges boxes with same coordinates and returns K-hot encoded classes.

	Args:
	boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
	normalized coordinates are allowed.
	classes: A tf.int32 tensor with shape [N] holding class indices. The class
	index starts at 0.
	confidences: A tf.float32 tensor with shape [N] holding class confidences.
	num_classes: total number of classes to use for K-hot encoding.
	quantization_bins: the number of bins used to quantize the box coordinate.

	Returns:
	merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
	where N' <= N.
	class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
	K-hot encodings for the merged boxes.
	confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
	holding encodings of confidences for the merged boxes.
	merged_box_indices: A tf.int32 tensor with shape [N'] holding original
	indices of the boxes.
	"""
	quantized_boxes = tf.cast(boxes * (quantization_bins - 1), dtype=tf.int64)
	ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
	hashcodes = (
	ymin
	+ xmin * quantization_bins
	+ ymax * quantization_bins * quantization_bins
	+ xmax * quantization_bins * quantization_bins * quantization_bins
	)
	unique_hashcodes, unique_indices = tf.unique(hashcodes)
	num_boxes = tf.shape(boxes)[0]
	num_unique_boxes = tf.shape(unique_hashcodes)[0]
	merged_box_indices = tf.math.unsorted_segment_min(
	tf.range(num_boxes), unique_indices, num_unique_boxes
	)
	merged_boxes = tf.gather(boxes, merged_box_indices)
	unique_indices = tf.cast(unique_indices, dtype=tf.int64)
	classes = tf.cast(classes, dtype=tf.int64)

	def map_box_encodings(i):
	"""Produces box K-hot and score encodings for each class index."""
	box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int64))
	box_mask = tf.reshape(box_mask, [-1])
	box_indices = tf.boolean_mask(classes, box_mask)
	box_confidences = tf.boolean_mask(confidences, box_mask)
	box_indices = tf.cast(box_indices, dtype=tf.int64)

	if tf.rank(box_indices) == 1:
	box_indices = tf.expand_dims(box_indices, axis=-1)

	box_class_encodings = tf.SparseTensor(
	box_indices,
	tf.squeeze(tf.ones_like(box_indices, dtype=tf.int64), axis=-1),
	[num_classes],
	)
	box_class_encodings = tf.sparse.reorder(box_class_encodings)
	box_class_encodings = tf.sparse.to_dense(box_class_encodings)

	if tf.rank(box_confidences) > 1:
	box_confidences = tf.squeeze(box_confidences, axis=-1)

	box_confidence_encodings = tf.SparseTensor(
	box_indices,
	box_confidences,
	[num_classes],
	)
	box_confidence_encodings = tf.sparse.reorder(box_confidence_encodings)
	box_confidence_encodings = tf.sparse.to_dense(box_confidence_encodings)

	return box_class_encodings, box_confidence_encodings

	# Important to avoid int32 here since there is no GPU kernel for int32.
	# int64 and float32 are fine.
	class_encodings, confidence_encodings = tf.nest.map_structure(
	tf.stop_gradient,
	tf.map_fn(
	map_box_encodings,
	tf.range(tf.cast(num_unique_boxes, dtype=tf.int64)),
	dtype=(tf.int64, tf.float32),
	),
	)

	merged_boxes = tf.reshape(merged_boxes, [-1, 4])
	class_encodings = tf.cast(class_encodings, dtype=tf.int32)
	class_encodings = tf.reshape(class_encodings, [-1, num_classes])
	confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
	merged_box_indices = tf.reshape(merged_box_indices, [-1])
	return (
	merged_boxes,
	class_encodings,
	confidence_encodings,
	merged_box_indices,
	)