Spaces:

ppeyret
/

nbm_v1

Build error

nbm_v1 / faster_utils.py

unknown

add app files

82b8273 about 2 years ago

10.9 kB

	import numpy as np
	import torch
	import torch.nn as nn

	# Utility functions for Faster R-CNN layers

	class Config:

	verbose = False
	save_dir = './models'
	conv_net_path = 'checkpoints/vgg16_bn-6c64b313.pth'
	backbone = 'vgg'
	pretrain_conv_net = True
	device = 'cuda'
	classification = False

	# General params
	num_classes = 1
	input_channels = 1
	img_width = 1024
	img_height = 256
	use_biophonia = True
	fpn = True
	fpn_rpn = False
	fpn_p_channels = 256
	fpn_o_channels = 256
	normalize_input = False
	noise_strength = 0
	self_attention = False
	encode_frequency = False
	position_encoding = False
	transform = False

	# Anchors
	anchor_stride = 16
	base_size = 16
	ratios = [0.5, 1, 2]
	scales_factor_low = 0
	scales_factor_high = 4
	scales = 2**np.arange(scales_factor_low, scales_factor_high)
	n_anchors = len(ratios) * len(scales)

	# Anchor Target Layer
	rpn_neg_label = 0.3
	rpn_pos_label = 0.7 # p-e baisser un peu ça
	rpn_batchsize = 16 # jouer là dessus, le réduire exagérément ?
	rpn_fg_fraction = 0.5

	# Proposal Layer
	pre_nms_topN = 3000 # jouer là dessus
	min_threshold = 5 # minimum proposal size in px
	nms_thresh = 0.7
	post_nms_topN = 1000 # jouer là dessus
	post_nms_topN_eval = 50
	pre_nms_topN_eval = 500

	# Proposal Target Layer
	rcnn_batch_size = 16 # jouer là dessus
	rcnn_fg_prop = 0.4 # 0.25 dans le papier original, essayer différentes valeurs
	fg_threshold = 0.5
	bg_threshold_lo = 0.1
	bg_threshold_hi = 0.5

	# ROI Pooling
	roi_pool_h = 2 # à changer (3? 4?)
	roi_pool_w = 2 # à changer (3? 4?)
	hidden_size = 4096
	top_pyramid_roi_size = 128
	rcnn_attention = False

	# Inference
	proposal_number = 50 # number of proposals per class after last nms

	# Training
	lambda_reg_rpn_loss = 1.0 # tester qq autres val
	lambda_reg_rcnn_loss = 1.0 # tester qq autres val
	batch_size = 2
	val_size = 20
	learning_rate = 1e-4 # jouer la dessus
	validation_prop = 0.01
	n_epochs = 10
	save_every = 10
	scheduler_gamma = 0.1
	scheduler_milestones = [15, 25]
	cv_idx = -1


	def generate_anchors(base_size, ratios, scales):

	base_anchor_wh = np.array([base_size, base_size])

	# Deform base anchor dimensions to the given ratios
	coeffs = np.hstack([np.sqrt(ratios)[:, np.newaxis], (1 / np.sqrt(ratios))[:, np.newaxis]])
	ratios_anchors_wh = coeffs * np.sqrt(np.prod(base_anchor_wh))

	# Expand the resulting anchor dimensions to the given sizes
	all_anchor_whs = (ratios_anchors_wh.flatten() * scales[:, np.newaxis]).reshape(-1, 2)

	# Convert from w h to x1 y1 x2 y2 representation, given center coordinates at int(base_size / 2)
	all_anchor = (np.hstack([- all_anchor_whs / 2, all_anchor_whs / 2]) + int(base_size / 2)).astype(int)

	return all_anchor


	def get_anchor_shifts(width, height, anchor_stride):

	shift_x = np.arange(0, width) * anchor_stride
	shift_y = np.arange(0, height) * anchor_stride
	shifts = np.hstack([np.tile(shift_x, len(shift_y)).reshape(-1, 1), np.repeat(shift_y, len(shift_x)).reshape(-1, 1)])
	shifts = np.tile(shifts, 2)

	return shifts.reshape(-1, 1, 4)


	def bbox_overlap(anchors, bbox):
	"""
	Computes a K (anchors) x N (bbox) intersection over union matrix
	"""

	right_boundaries = torch.stack([anchors[:, 2].repeat(len(bbox)), bbox[:, 2].repeat_interleave(len(anchors))]).min(dim=0)[0]
	left_boundaries = torch.stack([anchors[:, 0].repeat(len(bbox)), bbox[:, 0].repeat_interleave(len(anchors))]).max(dim=0)[0]
	x_intersec = (right_boundaries - left_boundaries + 1).clamp(min=0)

	# shapes anchors * bbox

	top_boundaries = torch.stack([anchors[:, 3].repeat(len(bbox)), bbox[:, 3].repeat_interleave(len(anchors))]).min(dim=0)[0]
	bottom_boundaries = torch.stack([anchors[:, 1].repeat(len(bbox)), bbox[:, 1].repeat_interleave(len(anchors))]).max(dim=0)[0]
	y_intersec = (top_boundaries - bottom_boundaries + 1).clamp(min=0)

	intersection = x_intersec * y_intersec

	areas_anchors = (anchors[:, 2] - anchors[:, 0] + 1) * (anchors[:, 3] - anchors[:, 1] + 1)
	areas_bbox = (bbox[:, 2] - bbox[:, 0] + 1) * (bbox[:, 3] - bbox[:, 1] + 1)

	union = torch.stack([areas_anchors.repeat(len(bbox)), areas_bbox.repeat_interleave(len(anchors))]).sum(dim=0) - intersection
	iou = (intersection / union).view(len(bbox), len(anchors)).transpose(1, 0)

	return iou


	def bbox_transform(anchors, bbox):

	wa = (anchors[:, 2] - anchors[:, 0]) + 1
	ha = (anchors[:, 3] - anchors[:, 1]) + 1
	xa = anchors[:, 0] + 0.5 * wa
	ya = anchors[:, 1] + 0.5 * ha

	w = (bbox[:, 2] - bbox[:, 0]) + 1
	h = (bbox[:, 3] - bbox[:, 1]) + 1
	x = bbox[:, 0] + 0.5 * w
	y = bbox[:, 1] + 0.5 * h

	t_x = (x - xa) / wa
	t_y = (y - ya) / ha
	t_w = torch.log(w / wa)
	t_h = torch.log(h / ha)

	return torch.stack([t_x, t_y, t_w, t_h]).transpose(1, 0)


	def weight_init(m):
	classname = m.__class__.__name__
	if classname.find('BatchNorm') != -1:
	m.weight.data.normal_(0.0, 0.02)
	if (classname.find('Linear') != -1) & (classname.find('LinearLayer') == -1):
	nn.init.kaiming_normal_(m.weight)
	if (classname.find('Conv2d') != -1):
	nn.init.kaiming_normal_(m.weight)


	def collate_fn(list_batch):
	lengths = [elt[1].size(0) for elt in list_batch]
	img_batch = torch.stack([img for (img, bb_cord, bird_id, img_info) in list_batch])
	bb_coord_batch = torch.cat([bb_cord for (img, bb_cord, bird_id, img_info) in list_batch], dim=0)
	bird_ids = torch.cat([bird_id for (img, bb_cord, bird_id, img_info) in list_batch])
	img_infos = [elt[-1] for elt in list_batch]

	return [img_batch, bb_coord_batch, lengths, bird_ids, img_infos]



	def bbox_reg_to_coord(bbox_pred, anchors):

	wa = (anchors[:, 2] - anchors[:, 0]) + 1
	ha = (anchors[:, 3] - anchors[:, 1]) + 1
	xa = anchors[:, 0] + 0.5 * wa
	ya = anchors[:, 1] + 0.5 * ha

	t_x = bbox_pred[..., 0]
	t_y = bbox_pred[..., 1]
	t_w = bbox_pred[..., 2]
	t_h = bbox_pred[..., 3]

	x = (t_x * wa) + xa
	y = (t_y * ha) + ya
	w = torch.exp(t_w) * wa
	h = torch.exp(t_h) * ha

	return torch.stack([(x - 0.5 * w).round(), (y - 0.5 * h).round(), (x + 0.5 * w).round(), (y + 0.5 * h).round()], dim=2)


	def batch_self_overlap(bbox_pred):

	rep = bbox_pred.shape[1]

	right_boundaries = torch.stack([bbox_pred[..., 2].repeat(1, rep), bbox_pred[..., 2].repeat_interleave(rep, dim=1)]).min(dim=0)[0]
	left_boundaries = torch.stack([bbox_pred[..., 0].repeat(1, rep), bbox_pred[..., 0].repeat_interleave(rep, dim=1)]).max(dim=0)[0]
	x_intersec = (right_boundaries - left_boundaries + 1).clamp(min=0)

	top_boundaries = torch.stack([bbox_pred[..., 3].repeat(1, rep), bbox_pred[..., 3].repeat_interleave(rep, dim=1)]).min(dim=0)[0]
	bottom_boundaries = torch.stack([bbox_pred[..., 1].repeat(1, rep), bbox_pred[..., 1].repeat_interleave(rep, dim=1)]).max(dim=0)[0]
	y_intersec = (top_boundaries - bottom_boundaries + 1).clamp(min=0)

	intersection = x_intersec * y_intersec

	areas = (bbox_pred[..., 2] - bbox_pred[..., 0] + 1) * (bbox_pred[..., 3] - bbox_pred[..., 1] + 1)
	union = torch.stack([areas.repeat(1, rep), areas.repeat_interleave(rep, dim=1)]).sum(dim=0) - intersection
	iou = (intersection / union).view(-1, rep, rep)

	return iou


	def nms(bbox_pred, scores, nms_thresh=0.7, post_nms_topN=300, return_idx=False):
	"""
	Applies non maximum suppression to the predicted bbox coordinates bbox_pred (shape batch_size * n_boxes * 4)
	scores are sorted in decreasing order, and bbox_pred coordinates are sorted accordingly for each batch idx
	"""

	iou = batch_self_overlap(bbox_pred)

	batch_keep = []
	batch_size = len(bbox_pred)

	for b_idx in range(batch_size):

	suppress = []
	keep_idx = []
	b_iou = iou[b_idx]

	for idx in range(len(b_iou)):
	if idx in suppress:
	continue
	keep_idx.append(idx)
	suppress += (torch.nonzero(b_iou[idx, idx + 1:] >= nms_thresh)[:, 0] + idx + 1).tolist()

	batch_keep.append(keep_idx)

	# Truncate idx vectors if one has length < post nms topN
	post_nms_topN = min(np.array([len(b_keep) for b_keep in batch_keep]).min(), post_nms_topN)
	scores = torch.stack([scores[i, batch_keep[i][:post_nms_topN]] for i in range(batch_size)])
	bbox_pred = torch.stack([bbox_pred[i, batch_keep[i][:post_nms_topN], :] for i in range(batch_size)])

	out = bbox_pred, scores

	if return_idx:
	out += (batch_keep,)

	return out


	def get_bbox_regression_targets(bbox_targets, b_labels, num_classes):
	"""
	One regression objective per object class
	"""

	expanded_bbox_targets = torch.zeros(len(bbox_targets), 4 * (1 + num_classes)).cuda()
	for i in range(1, num_classes + 1):
	class_idx = torch.nonzero(b_labels == i)[:, 0]
	col_idx = 4 * i
	expanded_bbox_targets[class_idx, col_idx:col_idx + 4] = bbox_targets[class_idx]

	return expanded_bbox_targets


	def cross_entropy_loss(bbox_classes, labels):
	"""
	labels must be a flatten (numpy) array of class indices (0 for background)
	"""

	gt_probs = bbox_classes[range(len(bbox_classes)), labels]
	cel = (-torch.log(gt_probs)).sum()

	return cel


	def smooth_l1_loss(bbox_reg, bbox_targets):

	deltas = torch.abs(bbox_reg - bbox_targets)
	mask_smoothing = (deltas >= 1)
	smoothed_l1 = (~mask_smoothing).float() * 0.5 * (deltas*2) + mask_smoothing.float() (deltas - 0.5)

	return smoothed_l1


	def bool_parser(string):
	if string.lower() == 'false':
	return False
	return True


	def train_test_split(length, val_prop):
	indices = np.arange(length)
	np.random.shuffle(indices)
	cut = int(val_prop * length)
	return indices[cut:], indices[:cut]

	def position_encodings(x, device):
	bs, channels, height, width = x.shape
	i_idx = np.arange(width)
	j_idx = np.arange(height)

	position_encodings = torch.from_numpy(np.stack(
	[np.tile(np.sin(i_idx * 128 / (width * (1e4 ** (2 * k / channels)))), (height, 1)) for k in range(int(channels / 4))] + \
	[np.tile(np.cos(i_idx * 128 / (width * (1e4 ** (2 * k / channels)))), (height, 1)) for k in range(int(channels / 4))] + \
	[np.tile(np.sin(j_idx * 128 / (height * (1e4 ** (2 * k / channels))))[:, np.newaxis], (1, width)) for k in range(int(channels / 4))] + \
	[np.tile(np.cos(j_idx * 128 / (height * (1e4 ** (2 * k / channels))))[:, np.newaxis], (1, width)) for k in range(int(channels / 4))]
	)).to(device)

	return position_encodings.unsqueeze(0).repeat(bs, 1, 1, 1).float()