File size: 5,388 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np


def temporal_iou(proposal_min, proposal_max, gt_min, gt_max):
    """Compute IoU score between a groundtruth bbox and the proposals.



    Args:

        proposal_min (list[float]): List of temporal anchor min.

        proposal_max (list[float]): List of temporal anchor max.

        gt_min (float): Groundtruth temporal box min.

        gt_max (float): Groundtruth temporal box max.

    Returns:

        list[float]: List of iou scores.

    """
    len_anchors = proposal_max - proposal_min
    int_tmin = np.maximum(proposal_min, gt_min)
    int_tmax = np.minimum(proposal_max, gt_max)
    inter_len = np.maximum(int_tmax - int_tmin, 0.)
    union_len = len_anchors - inter_len + gt_max - gt_min
    jaccard = np.divide(inter_len, union_len)
    return jaccard


def temporal_iop(proposal_min, proposal_max, gt_min, gt_max):
    """Compute IoP score between a groundtruth bbox and the proposals.



    Compute the IoP which is defined as the overlap ratio with

    groundtruth proportional to the duration of this proposal.

    Args:

        proposal_min (list[float]): List of temporal anchor min.

        proposal_max (list[float]): List of temporal anchor max.

        gt_min (float): Groundtruth temporal box min.

        gt_max (float): Groundtruth temporal box max.

    Returns:

        list[float]: List of intersection over anchor scores.

    """
    len_anchors = np.array(proposal_max - proposal_min)
    int_tmin = np.maximum(proposal_min, gt_min)
    int_tmax = np.minimum(proposal_max, gt_max)
    inter_len = np.maximum(int_tmax - int_tmin, 0.)
    scores = np.divide(inter_len, len_anchors)
    return scores


def soft_nms(proposals, alpha, low_threshold, high_threshold, top_k):
    """Soft NMS for temporal proposals.



    Args:

        proposals (np.ndarray): Proposals generated by network.

        alpha (float): Alpha value of Gaussian decaying function.

        low_threshold (float): Low threshold for soft nms.

        high_threshold (float): High threshold for soft nms.

        top_k (int): Top k values to be considered.

    Returns:

        np.ndarray: The updated proposals.

    """
    proposals = proposals[proposals[:, -1].argsort()[::-1]]
    tstart = list(proposals[:, 0])
    tend = list(proposals[:, 1])
    tscore = list(proposals[:, -1])
    rstart = []
    rend = []
    rscore = []

    while len(tscore) > 0 and len(rscore) <= top_k:
        max_index = np.argmax(tscore)
        max_width = tend[max_index] - tstart[max_index]
        iou_list = temporal_iou(tstart[max_index], tend[max_index],
                                np.array(tstart), np.array(tend))
        iou_exp_list = np.exp(-np.square(iou_list) / alpha)

        for idx, _ in enumerate(tscore):
            if idx != max_index:
                current_iou = iou_list[idx]
                if current_iou > low_threshold + (high_threshold -
                                                  low_threshold) * max_width:
                    tscore[idx] = tscore[idx] * iou_exp_list[idx]

        rstart.append(tstart[max_index])
        rend.append(tend[max_index])
        rscore.append(tscore[max_index])
        tstart.pop(max_index)
        tend.pop(max_index)
        tscore.pop(max_index)

    rstart = np.array(rstart).reshape(-1, 1)
    rend = np.array(rend).reshape(-1, 1)
    rscore = np.array(rscore).reshape(-1, 1)
    new_proposals = np.concatenate((rstart, rend, rscore), axis=1)
    return new_proposals


def post_processing(result, video_info, soft_nms_alpha, soft_nms_low_threshold,

                    soft_nms_high_threshold, post_process_top_k,

                    feature_extraction_interval):
    """Post process for temporal proposals generation.

    Args:

        result (np.ndarray): Proposals generated by network.

        video_info (dict): Meta data of video. Required keys are

            'duration_frame', 'duration_second'.

        soft_nms_alpha (float): Alpha value of Gaussian decaying function.

        soft_nms_low_threshold (float): Low threshold for soft nms.

        soft_nms_high_threshold (float): High threshold for soft nms.

        post_process_top_k (int): Top k values to be considered.

        feature_extraction_interval (int): Interval used in feature extraction.

    Returns:

        list[dict]: The updated proposals, e.g.

            [{'score': 0.9, 'segment': [0, 1]},

             {'score': 0.8, 'segment': [0, 2]},

            ...].

    """
    if len(result) > 1:
        result = soft_nms(result, soft_nms_alpha, soft_nms_low_threshold,
                          soft_nms_high_threshold, post_process_top_k)

    result = result[result[:, -1].argsort()[::-1]]
    video_duration = float(
        video_info['duration_frame'] // feature_extraction_interval *
        feature_extraction_interval
    ) / video_info['duration_frame'] * video_info['duration_second']
    proposal_list = []

    for j in range(min(post_process_top_k, len(result))):
        proposal = {}
        proposal['score'] = float(result[j, -1])
        proposal['segment'] = [
            max(0, result[j, 0]) * video_duration,
            min(1, result[j, 1]) * video_duration
        ]
        proposal_list.append(proposal)
    return proposal_list