# Copyright (c) OpenMMLab. All rights reserved. import numpy as np def temporal_iou(proposal_min, proposal_max, gt_min, gt_max): """Compute IoU score between a groundtruth bbox and the proposals. Args: proposal_min (list[float]): List of temporal anchor min. proposal_max (list[float]): List of temporal anchor max. gt_min (float): Groundtruth temporal box min. gt_max (float): Groundtruth temporal box max. Returns: list[float]: List of iou scores. """ len_anchors = proposal_max - proposal_min int_tmin = np.maximum(proposal_min, gt_min) int_tmax = np.minimum(proposal_max, gt_max) inter_len = np.maximum(int_tmax - int_tmin, 0.) union_len = len_anchors - inter_len + gt_max - gt_min jaccard = np.divide(inter_len, union_len) return jaccard def temporal_iop(proposal_min, proposal_max, gt_min, gt_max): """Compute IoP score between a groundtruth bbox and the proposals. Compute the IoP which is defined as the overlap ratio with groundtruth proportional to the duration of this proposal. Args: proposal_min (list[float]): List of temporal anchor min. proposal_max (list[float]): List of temporal anchor max. gt_min (float): Groundtruth temporal box min. gt_max (float): Groundtruth temporal box max. Returns: list[float]: List of intersection over anchor scores. """ len_anchors = np.array(proposal_max - proposal_min) int_tmin = np.maximum(proposal_min, gt_min) int_tmax = np.minimum(proposal_max, gt_max) inter_len = np.maximum(int_tmax - int_tmin, 0.) scores = np.divide(inter_len, len_anchors) return scores def soft_nms(proposals, alpha, low_threshold, high_threshold, top_k): """Soft NMS for temporal proposals. Args: proposals (np.ndarray): Proposals generated by network. alpha (float): Alpha value of Gaussian decaying function. low_threshold (float): Low threshold for soft nms. high_threshold (float): High threshold for soft nms. top_k (int): Top k values to be considered. Returns: np.ndarray: The updated proposals. """ proposals = proposals[proposals[:, -1].argsort()[::-1]] tstart = list(proposals[:, 0]) tend = list(proposals[:, 1]) tscore = list(proposals[:, -1]) rstart = [] rend = [] rscore = [] while len(tscore) > 0 and len(rscore) <= top_k: max_index = np.argmax(tscore) max_width = tend[max_index] - tstart[max_index] iou_list = temporal_iou(tstart[max_index], tend[max_index], np.array(tstart), np.array(tend)) iou_exp_list = np.exp(-np.square(iou_list) / alpha) for idx, _ in enumerate(tscore): if idx != max_index: current_iou = iou_list[idx] if current_iou > low_threshold + (high_threshold - low_threshold) * max_width: tscore[idx] = tscore[idx] * iou_exp_list[idx] rstart.append(tstart[max_index]) rend.append(tend[max_index]) rscore.append(tscore[max_index]) tstart.pop(max_index) tend.pop(max_index) tscore.pop(max_index) rstart = np.array(rstart).reshape(-1, 1) rend = np.array(rend).reshape(-1, 1) rscore = np.array(rscore).reshape(-1, 1) new_proposals = np.concatenate((rstart, rend, rscore), axis=1) return new_proposals def post_processing(result, video_info, soft_nms_alpha, soft_nms_low_threshold, soft_nms_high_threshold, post_process_top_k, feature_extraction_interval): """Post process for temporal proposals generation. Args: result (np.ndarray): Proposals generated by network. video_info (dict): Meta data of video. Required keys are 'duration_frame', 'duration_second'. soft_nms_alpha (float): Alpha value of Gaussian decaying function. soft_nms_low_threshold (float): Low threshold for soft nms. soft_nms_high_threshold (float): High threshold for soft nms. post_process_top_k (int): Top k values to be considered. feature_extraction_interval (int): Interval used in feature extraction. Returns: list[dict]: The updated proposals, e.g. [{'score': 0.9, 'segment': [0, 1]}, {'score': 0.8, 'segment': [0, 2]}, ...]. """ if len(result) > 1: result = soft_nms(result, soft_nms_alpha, soft_nms_low_threshold, soft_nms_high_threshold, post_process_top_k) result = result[result[:, -1].argsort()[::-1]] video_duration = float( video_info['duration_frame'] // feature_extraction_interval * feature_extraction_interval ) / video_info['duration_frame'] * video_info['duration_second'] proposal_list = [] for j in range(min(post_process_top_k, len(result))): proposal = {} proposal['score'] = float(result[j, -1]) proposal['segment'] = [ max(0, result[j, 0]) * video_duration, min(1, result[j, 1]) * video_duration ] proposal_list.append(proposal) return proposal_list