File size: 5,388 Bytes
d670799 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
def temporal_iou(proposal_min, proposal_max, gt_min, gt_max):
"""Compute IoU score between a groundtruth bbox and the proposals.
Args:
proposal_min (list[float]): List of temporal anchor min.
proposal_max (list[float]): List of temporal anchor max.
gt_min (float): Groundtruth temporal box min.
gt_max (float): Groundtruth temporal box max.
Returns:
list[float]: List of iou scores.
"""
len_anchors = proposal_max - proposal_min
int_tmin = np.maximum(proposal_min, gt_min)
int_tmax = np.minimum(proposal_max, gt_max)
inter_len = np.maximum(int_tmax - int_tmin, 0.)
union_len = len_anchors - inter_len + gt_max - gt_min
jaccard = np.divide(inter_len, union_len)
return jaccard
def temporal_iop(proposal_min, proposal_max, gt_min, gt_max):
"""Compute IoP score between a groundtruth bbox and the proposals.
Compute the IoP which is defined as the overlap ratio with
groundtruth proportional to the duration of this proposal.
Args:
proposal_min (list[float]): List of temporal anchor min.
proposal_max (list[float]): List of temporal anchor max.
gt_min (float): Groundtruth temporal box min.
gt_max (float): Groundtruth temporal box max.
Returns:
list[float]: List of intersection over anchor scores.
"""
len_anchors = np.array(proposal_max - proposal_min)
int_tmin = np.maximum(proposal_min, gt_min)
int_tmax = np.minimum(proposal_max, gt_max)
inter_len = np.maximum(int_tmax - int_tmin, 0.)
scores = np.divide(inter_len, len_anchors)
return scores
def soft_nms(proposals, alpha, low_threshold, high_threshold, top_k):
"""Soft NMS for temporal proposals.
Args:
proposals (np.ndarray): Proposals generated by network.
alpha (float): Alpha value of Gaussian decaying function.
low_threshold (float): Low threshold for soft nms.
high_threshold (float): High threshold for soft nms.
top_k (int): Top k values to be considered.
Returns:
np.ndarray: The updated proposals.
"""
proposals = proposals[proposals[:, -1].argsort()[::-1]]
tstart = list(proposals[:, 0])
tend = list(proposals[:, 1])
tscore = list(proposals[:, -1])
rstart = []
rend = []
rscore = []
while len(tscore) > 0 and len(rscore) <= top_k:
max_index = np.argmax(tscore)
max_width = tend[max_index] - tstart[max_index]
iou_list = temporal_iou(tstart[max_index], tend[max_index],
np.array(tstart), np.array(tend))
iou_exp_list = np.exp(-np.square(iou_list) / alpha)
for idx, _ in enumerate(tscore):
if idx != max_index:
current_iou = iou_list[idx]
if current_iou > low_threshold + (high_threshold -
low_threshold) * max_width:
tscore[idx] = tscore[idx] * iou_exp_list[idx]
rstart.append(tstart[max_index])
rend.append(tend[max_index])
rscore.append(tscore[max_index])
tstart.pop(max_index)
tend.pop(max_index)
tscore.pop(max_index)
rstart = np.array(rstart).reshape(-1, 1)
rend = np.array(rend).reshape(-1, 1)
rscore = np.array(rscore).reshape(-1, 1)
new_proposals = np.concatenate((rstart, rend, rscore), axis=1)
return new_proposals
def post_processing(result, video_info, soft_nms_alpha, soft_nms_low_threshold,
soft_nms_high_threshold, post_process_top_k,
feature_extraction_interval):
"""Post process for temporal proposals generation.
Args:
result (np.ndarray): Proposals generated by network.
video_info (dict): Meta data of video. Required keys are
'duration_frame', 'duration_second'.
soft_nms_alpha (float): Alpha value of Gaussian decaying function.
soft_nms_low_threshold (float): Low threshold for soft nms.
soft_nms_high_threshold (float): High threshold for soft nms.
post_process_top_k (int): Top k values to be considered.
feature_extraction_interval (int): Interval used in feature extraction.
Returns:
list[dict]: The updated proposals, e.g.
[{'score': 0.9, 'segment': [0, 1]},
{'score': 0.8, 'segment': [0, 2]},
...].
"""
if len(result) > 1:
result = soft_nms(result, soft_nms_alpha, soft_nms_low_threshold,
soft_nms_high_threshold, post_process_top_k)
result = result[result[:, -1].argsort()[::-1]]
video_duration = float(
video_info['duration_frame'] // feature_extraction_interval *
feature_extraction_interval
) / video_info['duration_frame'] * video_info['duration_second']
proposal_list = []
for j in range(min(post_process_top_k, len(result))):
proposal = {}
proposal['score'] = float(result[j, -1])
proposal['segment'] = [
max(0, result[j, 0]) * video_duration,
min(1, result[j, 1]) * video_duration
]
proposal_list.append(proposal)
return proposal_list
|