File size: 4,810 Bytes
33569f9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | import os
import shutil
import time
import json
import pickle
from typing import Dict
import numpy as np
import torch
from .metrics import ANETdetection
def load_results_from_pkl(filename):
# load from pickle file
assert os.path.isfile(filename)
with open(filename, "rb") as f:
results = pickle.load(f)
return results
def load_results_from_json(filename):
assert os.path.isfile(filename)
with open(filename, "r") as f:
results = json.load(f)
# for activity net external classification scores
if 'results' in results:
results = results['results']
return results
def results_to_dict(results):
"""convert result arrays into dict used by json files"""
# video ids and allocate the dict
vidxs = sorted(list(set(results['video-id'])))
results_dict = {}
for vidx in vidxs:
results_dict[vidx] = []
# fill in the dict
for vidx, start, end, label, score in zip(
results['video-id'],
results['t-start'],
results['t-end'],
results['label'],
results['score']
):
results_dict[vidx].append(
{
"label" : int(label),
"score" : float(score),
"segment": [float(start), float(end)],
}
)
return results_dict
def results_to_array(results, num_pred):
# video ids and allocate the dict
vidxs = sorted(list(set(results['video-id'])))
results_dict = {}
for vidx in vidxs:
results_dict[vidx] = {
'label' : [],
'score' : [],
'segment' : [],
}
# fill in the dict
for vidx, start, end, label, score in zip(
results['video-id'],
results['t-start'],
results['t-end'],
results['label'],
results['score']
):
results_dict[vidx]['label'].append(int(label))
results_dict[vidx]['score'].append(float(score))
results_dict[vidx]['segment'].append(
[float(start), float(end)]
)
for vidx in vidxs:
label = np.asarray(results_dict[vidx]['label'])
score = np.asarray(results_dict[vidx]['score'])
segment = np.asarray(results_dict[vidx]['segment'])
# the score should be already sorted, just for safety
inds = np.argsort(score)[::-1][:num_pred]
label, score, segment = label[inds], score[inds], segment[inds]
results_dict[vidx]['label'] = label
results_dict[vidx]['score'] = score
results_dict[vidx]['segment'] = segment
return results_dict
def postprocess_results(results, cls_score_file, num_pred=200, topk=2):
# load results and convert to dict
if isinstance(results, str):
results = load_results_from_pkl(results)
# array -> dict
results = results_to_array(results, num_pred)
# load external classification scores
if '.json' in cls_score_file:
cls_scores = load_results_from_json(cls_score_file)
else:
cls_scores = load_results_from_pkl(cls_score_file)
# dict for processed results
processed_results = {
'video-id': [],
't-start' : [],
't-end': [],
'label': [],
'score': []
}
# process each video
for vid, result in results.items():
# pick top k cls scores and idx
curr_cls_scores = np.asarray(cls_scores[vid])
topk_cls_idx = np.argsort(curr_cls_scores)[::-1][:topk]
topk_cls_score = curr_cls_scores[topk_cls_idx]
# model outputs
pred_score, pred_segment, pred_label = \
result['score'], result['segment'], result['label']
num_segs = min(num_pred, len(pred_score))
# duplicate all segment and assign the topk labels
# K x 1 @ 1 N -> K x N -> KN
# multiply the scores
new_pred_score = np.sqrt(topk_cls_score[:, None] @ pred_score[None, :]).flatten()
new_pred_segment = np.tile(pred_segment, (topk, 1))
new_pred_label = np.tile(topk_cls_idx[:, None], (1, num_segs)).flatten()
# add to result
processed_results['video-id'].extend([vid]*num_segs*topk)
processed_results['t-start'].append(new_pred_segment[:, 0])
processed_results['t-end'].append(new_pred_segment[:, 1])
processed_results['label'].append(new_pred_label)
processed_results['score'].append(new_pred_score)
processed_results['t-start'] = np.concatenate(
processed_results['t-start'], axis=0)
processed_results['t-end'] = np.concatenate(
processed_results['t-end'], axis=0)
processed_results['label'] = np.concatenate(
processed_results['label'],axis=0)
processed_results['score'] = np.concatenate(
processed_results['score'], axis=0)
return processed_results
|