Spaces:

Pinwheel
/

GLIP-BLIP-Object-Detection-VQA

Runtime error

App Files Files Community

GLIP-BLIP-Object-Detection-VQA / maskrcnn_benchmark /modeling /roi_heads /keypoint_head /inference.py

Pinwheel

HF Demo

128757a about 3 years ago

raw

history blame contribute delete

4.54 kB

	import cv2
	import numpy as np
	import torch
	from torch import nn

	from maskrcnn_benchmark.structures.bounding_box import BoxList
	from maskrcnn_benchmark.structures.keypoint import PersonKeypoints


	class KeypointPostProcessor(nn.Module):
	def __init__(self, keypointer=None):
	super(KeypointPostProcessor, self).__init__()
	self.keypointer = keypointer

	def forward(self, x, boxes):
	mask_prob = x

	scores = None
	if self.keypointer:
	mask_prob, scores = self.keypointer(x, boxes)

	assert len(boxes) == 1, "Only non-batched inference supported for now"
	boxes_per_image = [box.bbox.size(0) for box in boxes]
	mask_prob = mask_prob.split(boxes_per_image, dim=0)
	scores = scores.split(boxes_per_image, dim=0)

	results = []
	for prob, box, score in zip(mask_prob, boxes, scores):
	bbox = BoxList(box.bbox, box.size, mode="xyxy")
	for field in box.fields():
	bbox.add_field(field, box.get_field(field))
	prob = PersonKeypoints(prob, box.size)
	prob.add_field("logits", score)
	bbox.add_field("keypoints", prob)
	results.append(bbox)

	return results


	def heatmaps_to_keypoints(maps, rois):
	"""Extract predicted keypoint locations from heatmaps. Output has shape
	(#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
	for each keypoint.
	"""
	# This function converts a discrete image coordinate in a HEATMAP_SIZE x
	# HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
	# consistency with keypoints_to_heatmap_labels by using the conversion from
	# Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
	# continuous coordinate.
	offset_x = rois[:, 0]
	offset_y = rois[:, 1]

	widths = rois[:, 2] - rois[:, 0]
	heights = rois[:, 3] - rois[:, 1]
	widths = np.maximum(widths, 1)
	heights = np.maximum(heights, 1)
	widths_ceil = np.ceil(widths)
	heights_ceil = np.ceil(heights)

	# NCHW to NHWC for use with OpenCV
	maps = np.transpose(maps, [0, 2, 3, 1])
	min_size = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE
	num_keypoints = maps.shape[3]
	xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32)
	end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32)
	for i in range(len(rois)):
	if min_size > 0:
	roi_map_width = int(np.maximum(widths_ceil[i], min_size))
	roi_map_height = int(np.maximum(heights_ceil[i], min_size))
	else:
	roi_map_width = widths_ceil[i]
	roi_map_height = heights_ceil[i]
	width_correction = widths[i] / roi_map_width
	height_correction = heights[i] / roi_map_height
	roi_map = cv2.resize(
	maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC
	)
	# Bring back to CHW
	roi_map = np.transpose(roi_map, [2, 0, 1])
	# roi_map_probs = scores_to_probs(roi_map.copy())
	w = roi_map.shape[2]
	pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1)
	x_int = pos % w
	y_int = (pos - x_int) // w
	# assert (roi_map_probs[k, y_int, x_int] ==
	# roi_map_probs[k, :, :].max())
	x = (x_int + 0.5) * width_correction
	y = (y_int + 0.5) * height_correction
	xy_preds[i, 0, :] = x + offset_x[i]
	xy_preds[i, 1, :] = y + offset_y[i]
	xy_preds[i, 2, :] = 1
	end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int]

	return np.transpose(xy_preds, [0, 2, 1]), end_scores


	class Keypointer(object):
	"""
	Projects a set of masks in an image on the locations
	specified by the bounding boxes
	"""

	def __init__(self, padding=0):
	self.padding = padding

	def __call__(self, masks, boxes):
	# TODO do this properly
	if isinstance(boxes, BoxList):
	boxes = [boxes]
	assert len(boxes) == 1

	result, scores = heatmaps_to_keypoints(
	masks.detach().cpu().numpy(), boxes[0].bbox.cpu().numpy()
	)
	return torch.from_numpy(result).to(masks.device), torch.as_tensor(scores, device=masks.device)


	def make_roi_keypoint_post_processor(cfg):
	keypointer = Keypointer()
	keypoint_post_processor = KeypointPostProcessor(keypointer)
	return keypoint_post_processor