| import json |
| import numpy as np |
| import pandas as pd |
| import os |
| from libs.utils.Evaluation.utils import get_blocked_videos |
|
|
|
|
|
|
| def interpolated_prec_rec(prec, rec): |
| """Interpolated AP - VOCdevkit from VOC 2011. |
| """ |
| mprec = np.hstack([[0], prec, [0]]) |
| mrec = np.hstack([[0], rec, [1]]) |
| for i in range(len(mprec) - 1)[::-1]: |
| mprec[i] = max(mprec[i], mprec[i + 1]) |
| idx = np.where(mrec[1::] != mrec[0:-1])[0] + 1 |
| ap = np.sum((mrec[idx] - mrec[idx - 1]) * mprec[idx]) |
| return ap |
|
|
|
|
| def segment_iou(target_segment, candidate_segments): |
| """Compute the temporal intersection over union between a |
| target segment and all the test segments. |
| |
| Parameters |
| ---------- |
| target_segment : 1d array |
| Temporal target segment containing [starting, ending] times. |
| candidate_segments : 2d array |
| Temporal candidate segments containing N x [starting, ending] times. |
| |
| Outputs |
| ------- |
| tiou : 1d array |
| Temporal intersection over union score of the N's candidate segments. |
| """ |
| tt1 = np.maximum(target_segment[0], candidate_segments[:, 0]) |
| tt2 = np.minimum(target_segment[1], candidate_segments[:, 1]) |
| |
| segments_intersection = (tt2 - tt1).clip(0) |
| |
| segments_union = (candidate_segments[:, 1] - candidate_segments[:, 0]) \ |
| + (target_segment[1] - target_segment[0]) - segments_intersection |
| |
| |
| tIoU = segments_intersection.astype(float) / segments_union |
| return tIoU |
|
|
|
|
| def wrapper_segment_iou(target_segments, candidate_segments): |
| """Compute intersection over union btw segments |
| Parameters |
| ---------- |
| target_segments : ndarray |
| 2-dim array in format [m x 2:=[init, end]] |
| candidate_segments : ndarray, |
| 2-dim array in format [n x 2:=[init, end]] |
| Outputs |
| ------- |
| tiou : ndarray |
| 2-dim array [n x m] with IOU ratio. |
| Note: It assumes that candidate-segments are more scarce that target-segments |
| """ |
| if candidate_segments.ndim != 2 or target_segments.ndim != 2: |
| raise ValueError('Dimension of arguments is incorrect') |
|
|
| n, m = candidate_segments.shape[0], target_segments.shape[0] |
| tiou = np.empty((n, m)) |
| for i in range(m): |
| tiou[:, i] = segment_iou(target_segments[i, :], candidate_segments) |
|
|
| return tiou |
|
|
|
|
| class ANETproposal(object): |
| GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version'] |
| PROPOSAL_FIELDS = ['results', 'version', 'external_data'] |
|
|
| def __init__(self, ground_truth_filename=None, proposal_filename=None, dataset_name='', |
| ground_truth_fields=GROUND_TRUTH_FIELDS, |
| proposal_fields=PROPOSAL_FIELDS, |
| tiou_thresholds=np.linspace(0.5, 0.95, 10), |
| max_avg_nr_proposals=None, |
| subset='validation', verbose=False, |
| check_status=False): |
|
|
| if not ground_truth_filename: |
| raise IOError('Please input a valid ground truth file.') |
|
|
| if not proposal_filename: |
| raise IOError('Please input a valid proposal file.') |
| self.subset = subset |
|
|
| self.tiou_thresholds = tiou_thresholds |
|
|
| self.max_avg_nr_proposals = max_avg_nr_proposals |
| self.verbose = verbose |
|
|
| self.gt_fields = ground_truth_fields |
|
|
| self.pred_fields = proposal_fields |
| self.recall = None |
| self.avg_recall = None |
| self.proposals_per_video = None |
| self.check_status = check_status |
| self.dataset_name = dataset_name |
| |
| if self.check_status: |
| pass |
| self.blocked_videos = get_blocked_videos() |
| else: |
| self.blocked_videos = list() |
| |
| self.ground_truth, self.activity_index = self._import_ground_truth( |
| ground_truth_filename) |
| self.proposal = self._import_proposal(proposal_filename) |
|
|
| if self.verbose: |
| print('[INIT] Loaded annotations from {} subset.'.format(subset)) |
| nr_gt = len(self.ground_truth) |
| print('\tNumber of ground truth instances: {}'.format(nr_gt)) |
| nr_pred = len(self.proposal) |
| print('\tNumber of proposals: {}'.format(nr_pred)) |
| print('\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds)) |
|
|
| def _import_ground_truth(self, ground_truth_filename): |
| """Reads ground truth file, checks if it is well formatted, and returns |
| the ground truth instances and the activity classes. |
| |
| Parameters |
| ---------- |
| ground_truth_filename : str |
| Full path to the ground truth json file. |
| |
| Outputs |
| ------- |
| ground_truth : df |
| Data frame containing the ground truth instances. |
| activity_index : dict |
| Dictionary containing class index. |
| """ |
| with open(ground_truth_filename, 'r') as fobj: |
| data = json.load(fobj) |
| |
| |
| |
|
|
| |
| activity_index= {'Fake': 0} |
| video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], [] |
| for v in data: |
| if isinstance(v, str): |
| v = data[v] |
| videoid = os.path.basename(v['file']).replace('.mp4','') if v['file'].endswith('.mp4') else os.path.basename(v['file']).replace('.wav','') |
| |
| if self.subset != v['split']: |
| continue |
| if videoid in self.blocked_videos: |
| continue |
| if v['n_fakes']==0: |
| continue |
| for ann in v['fake_periods']: |
| video_lst.append(videoid) |
| t_start_lst.append(float(ann[0])) |
| t_end_lst.append(float(ann[1])) |
| label_lst.append(0) |
|
|
| ground_truth = pd.DataFrame({'video-id': video_lst, |
| 't-start': t_start_lst, |
| 't-end': t_end_lst, |
| 'label': label_lst}) |
| if self.verbose: |
| print(activity_index) |
| return ground_truth, activity_index |
|
|
| def _import_proposal(self, proposal_filename): |
| """Reads proposal file, checks if it is well formatted, and returns |
| the proposal instances. |
| |
| Parameters |
| ---------- |
| proposal_filename : str |
| Full path to the proposal json file. |
| |
| Outputs |
| ------- |
| proposal : df |
| Data frame containing the proposal instances. |
| """ |
| with open(proposal_filename, 'r') as fobj: |
| data = json.load(fobj) |
| |
| if not all([field in data.keys() for field in self.pred_fields]): |
| raise IOError('Please input a valid proposal file.') |
|
|
| |
| video_lst, t_start_lst, t_end_lst = [], [], [] |
| score_lst = [] |
| for videoid, v in data['results'].items(): |
| if videoid in self.blocked_videos: |
| continue |
| for result in v: |
| video_lst.append(videoid) |
| t_start_lst.append(result['segment'][0]) |
| t_end_lst.append(result['segment'][1]) |
| score_lst.append(result['score']) |
| proposal = pd.DataFrame({'video-id': video_lst, |
| 't-start': t_start_lst, |
| 't-end': t_end_lst, |
| 'score': score_lst}) |
| return proposal |
|
|
| def evaluate(self): |
| """Evaluates a proposal file. To measure the performance of a |
| method for the proposal task, we computes the area under the |
| average recall vs average number of proposals per video curve. |
| """ |
| recall, avg_recall, proposals_per_video = average_recall_vs_avg_nr_proposals( |
| self.ground_truth, self.proposal, |
| max_avg_nr_proposals=self.max_avg_nr_proposals, |
| tiou_thresholds=self.tiou_thresholds) |
|
|
| area_under_curve = np.trapz(avg_recall, proposals_per_video) |
|
|
| if self.verbose: |
| print(f'[RESULTS] Performance on {self.dataset_name} proposal task.') |
| print( |
| '\tArea Under the AR vs AN curve: {}%'.format(100. * float(area_under_curve) / proposals_per_video[-1])) |
|
|
| self.recall = recall |
| self.avg_recall = avg_recall |
| self.proposals_per_video = proposals_per_video |
|
|
|
|
| def average_recall_vs_avg_nr_proposals(ground_truth, proposals, |
| max_avg_nr_proposals=None, |
| tiou_thresholds=np.linspace(0.5, 0.95, 11)): |
| """ Computes the average recall given an average number |
| of proposals per video. |
| |
| Parameters |
| ---------- |
| ground_truth : df |
| Data frame containing the ground truth instances. |
| Required fields: ['video-id', 't-start', 't-end'] |
| proposal : df |
| Data frame containing the proposal instances. |
| Required fields: ['video-id, 't-start', 't-end', 'score'] |
| tiou_thresholds : 1darray, optional |
| array with tiou thresholds. |
| |
| Outputs |
| ------- |
| recall : 2darray |
| recall[i,j] is recall at ith tiou threshold at the jth average number of average number of proposals per video. |
| average_recall : 1darray |
| recall averaged over a list of tiou threshold. This is equivalent to recall.mean(axis=0). |
| proposals_per_video : 1darray |
| average number of proposals per video. |
| """ |
|
|
| |
| video_lst = ground_truth['video-id'].unique() |
| if not max_avg_nr_proposals: |
| max_avg_nr_proposals = float(proposals.shape[0]) / video_lst.shape[0] |
| ratio = max_avg_nr_proposals * float(video_lst.shape[0]) / proposals.shape[0] |
|
|
| |
| ground_truth_gbvn = ground_truth.groupby('video-id') |
| proposals_gbvn = proposals.groupby('video-id') |
|
|
| |
| score_lst = [] |
| total_nr_proposals = 0 |
| for videoid in video_lst: |
|
|
| |
| proposals_videoid = proposals_gbvn.get_group(videoid) |
| this_video_proposals = proposals_videoid.loc[:, ['t-start', 't-end']].values |
|
|
| |
| sort_idx = proposals_videoid['score'].argsort()[::-1] |
| this_video_proposals = this_video_proposals[sort_idx, :] |
|
|
| |
| ground_truth_videoid = ground_truth_gbvn.get_group(videoid) |
|
|
| this_video_ground_truth = ground_truth_videoid.loc[:, ['t-start', 't-end']].values |
|
|
| if this_video_proposals.shape[0] == 0: |
| n = this_video_ground_truth.shape[0] |
| score_lst.append(np.zeros((n, 1))) |
| continue |
|
|
| if this_video_proposals.ndim != 2: |
| this_video_proposals = np.expand_dims(this_video_proposals, axis=0) |
| if this_video_ground_truth.ndim != 2: |
| this_video_ground_truth = np.expand_dims(this_video_ground_truth, axis=0) |
|
|
| nr_proposals = np.minimum(int(this_video_proposals.shape[0] * ratio), this_video_proposals.shape[0]) |
| total_nr_proposals += nr_proposals |
|
|
| this_video_proposals = this_video_proposals[:nr_proposals, :] |
| tiou = wrapper_segment_iou(this_video_proposals, this_video_ground_truth) |
|
|
| score_lst.append(tiou) |
|
|
| |
| |
| |
| |
|
|
| |
| pcn_lst = np.arange(1, max_avg_nr_proposals + 1) / float(max_avg_nr_proposals) * ( |
| max_avg_nr_proposals * float(video_lst.shape[0]) / total_nr_proposals) |
| matches = np.empty((video_lst.shape[0], pcn_lst.shape[0])) |
| positives = np.empty(video_lst.shape[0]) |
| recall = np.empty((tiou_thresholds.shape[0], pcn_lst.shape[0])) |
| |
| for ridx, tiou in enumerate(tiou_thresholds): |
|
|
| |
| |
| for i, score in enumerate(score_lst): |
| |
| positives[i] = score.shape[0] |
| |
|
|
| true_positives_tiou = score >= tiou |
| |
| pcn_proposals = np.minimum((score.shape[1] * pcn_lst).astype(np.int64), score.shape[1]) |
|
|
| for j, nr_proposals in enumerate(pcn_proposals): |
| |
| matches[i, j] = np.count_nonzero((true_positives_tiou[:, :nr_proposals]).sum(axis=1)) |
|
|
| |
| recall[ridx, :] = matches.sum(axis=0) / positives.sum() |
|
|
| |
| avg_recall = recall.mean(axis=0) |
|
|
| |
| proposals_per_video = pcn_lst * (float(total_nr_proposals) / video_lst.shape[0]) |
|
|
| return recall, avg_recall, proposals_per_video |