Spaces:

Pinwheel
/

GLIP-BLIP-Object-Detection-VQA

Runtime error

App Files Files Community

GLIP-BLIP-Object-Detection-VQA / maskrcnn_benchmark /modeling /roi_heads /keypoint_head /loss.py

Pinwheel

HF Demo

128757a about 3 years ago

raw

history blame contribute delete

7.28 kB

	import torch
	from torch.nn import functional as F

	from maskrcnn_benchmark.modeling.matcher import Matcher

	from maskrcnn_benchmark.modeling.balanced_positive_negative_sampler import (
	BalancedPositiveNegativeSampler,
	)
	from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
	from maskrcnn_benchmark.modeling.utils import cat
	from maskrcnn_benchmark.layers import smooth_l1_loss
	from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist

	from maskrcnn_benchmark.structures.keypoint import keypoints_to_heat_map


	def project_keypoints_to_heatmap(keypoints, proposals, discretization_size):
	proposals = proposals.convert("xyxy")
	return keypoints_to_heat_map(
	keypoints.keypoints, proposals.bbox, discretization_size
	)


	def cat_boxlist_with_keypoints(boxlists):
	assert all(boxlist.has_field("keypoints") for boxlist in boxlists)

	kp = [boxlist.get_field("keypoints").keypoints for boxlist in boxlists]
	kp = cat(kp, 0)

	fields = boxlists[0].get_fields()
	fields = [field for field in fields if field != "keypoints"]

	boxlists = [boxlist.copy_with_fields(fields) for boxlist in boxlists]
	boxlists = cat_boxlist(boxlists)
	boxlists.add_field("keypoints", kp)
	return boxlists


	def _within_box(points, boxes):
	"""Validate which keypoints are contained inside a given box.
	points: NxKx2
	boxes: Nx4
	output: NxK
	"""
	x_within = (points[..., 0] >= boxes[:, 0, None]) & (
	points[..., 0] <= boxes[:, 2, None]
	)
	y_within = (points[..., 1] >= boxes[:, 1, None]) & (
	points[..., 1] <= boxes[:, 3, None]
	)
	return x_within & y_within


	class KeypointRCNNLossComputation(object):
	def __init__(self, proposal_matcher, fg_bg_sampler, discretization_size):
	"""
	Arguments:
	proposal_matcher (Matcher)
	fg_bg_sampler (BalancedPositiveNegativeSampler)
	discretization_size (int)
	"""
	self.proposal_matcher = proposal_matcher
	self.fg_bg_sampler = fg_bg_sampler
	self.discretization_size = discretization_size

	def match_targets_to_proposals(self, proposal, target):
	match_quality_matrix = boxlist_iou(target, proposal)
	matched_idxs = self.proposal_matcher(match_quality_matrix)
	# Keypoint RCNN needs "labels" and "keypoints "fields for creating the targets
	target = target.copy_with_fields(["labels", "keypoints"])
	# get the targets corresponding GT for each proposal
	# NB: need to clamp the indices because we can have a single
	# GT in the image, and matched_idxs can be -2, which goes
	# out of bounds
	matched_targets = target[matched_idxs.clamp(min=0)]
	matched_targets.add_field("matched_idxs", matched_idxs)
	return matched_targets

	def prepare_targets(self, proposals, targets):
	labels = []
	keypoints = []
	for proposals_per_image, targets_per_image in zip(proposals, targets):
	matched_targets = self.match_targets_to_proposals(
	proposals_per_image, targets_per_image
	)
	matched_idxs = matched_targets.get_field("matched_idxs")

	labels_per_image = matched_targets.get_field("labels")
	labels_per_image = labels_per_image.to(dtype=torch.int64)

	# this can probably be removed, but is left here for clarity
	# and completeness
	# TODO check if this is the right one, as BELOW_THRESHOLD
	neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
	labels_per_image[neg_inds] = 0

	keypoints_per_image = matched_targets.get_field("keypoints")
	within_box = _within_box(
	keypoints_per_image.keypoints, matched_targets.bbox
	)
	vis_kp = keypoints_per_image.keypoints[..., 2] > 0
	is_visible = (within_box & vis_kp).sum(1) > 0

	labels_per_image[~is_visible] = -1

	labels.append(labels_per_image)
	keypoints.append(keypoints_per_image)

	return labels, keypoints

	def subsample(self, proposals, targets):
	"""
	This method performs the positive/negative sampling, and return
	the sampled proposals.
	Note: this function keeps a state.

	Arguments:
	proposals (list[BoxList])
	targets (list[BoxList])
	"""

	labels, keypoints = self.prepare_targets(proposals, targets)
	sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)

	proposals = list(proposals)
	# add corresponding label and regression_targets information to the bounding boxes
	for labels_per_image, keypoints_per_image, proposals_per_image in zip(
	labels, keypoints, proposals
	):
	proposals_per_image.add_field("labels", labels_per_image)
	proposals_per_image.add_field("keypoints", keypoints_per_image)

	# distributed sampled proposals, that were obtained on all feature maps
	# concatenated via the fg_bg_sampler, into individual feature map levels
	for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
	zip(sampled_pos_inds, sampled_neg_inds)
	):
	img_sampled_inds = torch.nonzero(pos_inds_img).squeeze(1)
	proposals_per_image = proposals[img_idx][img_sampled_inds]
	proposals[img_idx] = proposals_per_image

	self._proposals = proposals
	return proposals

	def __call__(self, proposals, keypoint_logits):
	heatmaps = []
	valid = []
	for proposals_per_image in proposals:
	kp = proposals_per_image.get_field("keypoints")
	heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
	kp, proposals_per_image, self.discretization_size
	)
	heatmaps.append(heatmaps_per_image.view(-1))
	valid.append(valid_per_image.view(-1))

	keypoint_targets = cat(heatmaps, dim=0)
	valid = cat(valid, dim=0).to(dtype=torch.bool)
	valid = torch.nonzero(valid).squeeze(1)

	# torch.mean (in binary_cross_entropy_with_logits) does'nt
	# accept empty tensors, so handle it sepaartely
	if keypoint_targets.numel() == 0 or len(valid) == 0:
	return keypoint_logits.sum() * 0

	N, K, H, W = keypoint_logits.shape
	keypoint_logits = keypoint_logits.view(N * K, H * W)

	keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
	return keypoint_loss


	def make_roi_keypoint_loss_evaluator(cfg):
	matcher = Matcher(
	cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
	cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
	allow_low_quality_matches=False,
	)
	fg_bg_sampler = BalancedPositiveNegativeSampler(
	cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
	)
	resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.RESOLUTION
	loss_evaluator = KeypointRCNNLossComputation(matcher, fg_bg_sampler, resolution)
	return loss_evaluator