Spaces:

NCTCMumbai
/

NCTC

Runtime error

App Files Files Community

NCTC / models /research /object_detection /box_coders /keypoint_box_coder.py

NCTCMumbai

Upload 2571 files

0b8359d over 2 years ago

raw

history blame contribute delete

6.41 kB

	# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Keypoint box coder.

	The keypoint box coder follows the coding schema described below (this is
	similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
	to box coordinates):
	ty = (y - ya) / ha
	tx = (x - xa) / wa
	th = log(h / ha)
	tw = log(w / wa)
	tky0 = (ky0 - ya) / ha
	tkx0 = (kx0 - xa) / wa
	tky1 = (ky1 - ya) / ha
	tkx1 = (kx1 - xa) / wa
	...
	where x, y, w, h denote the box's center coordinates, width and height
	respectively. Similarly, xa, ya, wa, ha denote the anchor's center
	coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
	center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
	keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
	anchor-encoded keypoint coordinates.
	"""

	import tensorflow.compat.v1 as tf

	from object_detection.core import box_coder
	from object_detection.core import box_list
	from object_detection.core import standard_fields as fields

	EPSILON = 1e-8


	class KeypointBoxCoder(box_coder.BoxCoder):
	"""Keypoint box coder."""

	def __init__(self, num_keypoints, scale_factors=None):
	"""Constructor for KeypointBoxCoder.

	Args:
	num_keypoints: Number of keypoints to encode/decode.
	scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
	In addition to scaling ty and tx, the first 2 scalars are used to scale
	the y and x coordinates of the keypoints as well. If set to None, does
	not perform scaling.
	"""
	self._num_keypoints = num_keypoints

	if scale_factors:
	assert len(scale_factors) == 4
	for scalar in scale_factors:
	assert scalar > 0
	self._scale_factors = scale_factors
	self._keypoint_scale_factors = None
	if scale_factors is not None:
	self._keypoint_scale_factors = tf.expand_dims(
	tf.tile([
	tf.cast(scale_factors[0], dtype=tf.float32),
	tf.cast(scale_factors[1], dtype=tf.float32)
	], [num_keypoints]), 1)

	@property
	def code_size(self):
	return 4 + self._num_keypoints * 2

	def _encode(self, boxes, anchors):
	"""Encode a box and keypoint collection with respect to anchor collection.

	Args:
	boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
	tensors with the shape [N, 4], and keypoints are tensors with the shape
	[N, num_keypoints, 2].
	anchors: BoxList of anchors.

	Returns:
	a tensor representing N anchor-encoded boxes of the format
	[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
	represent the y and x coordinates of the first keypoint, tky1 and tkx1
	represent the y and x coordinates of the second keypoint, and so on.
	"""
	# Convert anchors to the center coordinate representation.
	ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
	ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
	keypoints = boxes.get_field(fields.BoxListFields.keypoints)
	keypoints = tf.transpose(tf.reshape(keypoints,
	[-1, self._num_keypoints * 2]))
	num_boxes = boxes.num_boxes()

	# Avoid NaN in division and log below.
	ha += EPSILON
	wa += EPSILON
	h += EPSILON
	w += EPSILON

	tx = (xcenter - xcenter_a) / wa
	ty = (ycenter - ycenter_a) / ha
	tw = tf.log(w / wa)
	th = tf.log(h / ha)

	tiled_anchor_centers = tf.tile(
	tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
	tiled_anchor_sizes = tf.tile(
	tf.stack([ha, wa]), [self._num_keypoints, 1])
	tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes

	# Scales location targets as used in paper for joint training.
	if self._scale_factors:
	ty *= self._scale_factors[0]
	tx *= self._scale_factors[1]
	th *= self._scale_factors[2]
	tw *= self._scale_factors[3]
	tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])

	tboxes = tf.stack([ty, tx, th, tw])
	return tf.transpose(tf.concat([tboxes, tkeypoints], 0))

	def _decode(self, rel_codes, anchors):
	"""Decode relative codes to boxes and keypoints.

	Args:
	rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
	anchor-encoded boxes and keypoints
	anchors: BoxList of anchors.

	Returns:
	boxes: BoxList holding N bounding boxes and keypoints.
	"""
	ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()

	num_codes = tf.shape(rel_codes)[0]
	result = tf.unstack(tf.transpose(rel_codes))
	ty, tx, th, tw = result[:4]
	tkeypoints = result[4:]
	if self._scale_factors:
	ty /= self._scale_factors[0]
	tx /= self._scale_factors[1]
	th /= self._scale_factors[2]
	tw /= self._scale_factors[3]
	tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])

	w = tf.exp(tw) * wa
	h = tf.exp(th) * ha
	ycenter = ty * ha + ycenter_a
	xcenter = tx * wa + xcenter_a
	ymin = ycenter - h / 2.
	xmin = xcenter - w / 2.
	ymax = ycenter + h / 2.
	xmax = xcenter + w / 2.
	decoded_boxes_keypoints = box_list.BoxList(
	tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))

	tiled_anchor_centers = tf.tile(
	tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
	tiled_anchor_sizes = tf.tile(
	tf.stack([ha, wa]), [self._num_keypoints, 1])
	keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
	keypoints = tf.reshape(tf.transpose(keypoints),
	[-1, self._num_keypoints, 2])
	decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
	return decoded_boxes_keypoints