DIBS / anet_clip /backup /eval_utils.py
Exclibur's picture
Upload folder using huggingface_hub
f1c1609 verified
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import collections
import torch
import numpy as np
import json
from collections import OrderedDict
from tqdm import tqdm
from os.path import dirname, abspath
pdvc_dir = dirname(abspath(__file__))
sys.path.insert(0, pdvc_dir)
sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
from densevid_eval3.eval_soda import eval_soda
from densevid_eval3.eval_para import eval_para
from densevid_eval3.eval_dvc import eval_dvc
def calculate_avg_proposal_num(json_path):
data = json.load(open(json_path))
return np.array([len(v) for v in data['results'].values()]).mean()
def convert_tapjson_to_dvcjson(tap_json, dvc_json):
data = json.load(open(tap_json, 'r'))
data['version'] = "VERSION 1.0"
data['external_data'] = {'used:': True, 'details': "C3D pretrained on Sports-1M"}
all_names = list(data['results'].keys())
for video_name in all_names:
for p_info in data['results'][video_name]:
p_info['timestamp'] = p_info.pop('segment')
p_info['proposal_score'] = p_info.pop('score')
p_info['sentence_score'] = p_info.pop('sentence_score', 0)
data['results']["v_" + video_name] = data['results'].pop(video_name)
json.dump(data, open(dvc_json, 'w'))
def convert_dvcjson_to_tapjson(dvc_json, tap_json):
data = json.load(open(dvc_json, 'r'))['results']
out = {}
out['version'] = "VERSION 1.0"
out['external_data'] = {'used:': True, 'details': "GT proposals"}
out['results'] = {}
all_names = list(data.keys())
for video_name in all_names:
video_info = []
event_num = len(data[video_name])
timestamps = [data[video_name][i]['timestamp'] for i in range(event_num)]
sentences = [data[video_name][i]['sentence'] for i in range(event_num)]
for i, timestamp in enumerate(timestamps):
score = data[video_name][i].get('proposal_score', 1.0)
video_info.append({'segment': timestamp, 'score': score, 'sentence': sentences[i], 'sentence_score': data[video_name][i].get('sentence_score', 0)})
out['results'][video_name[2:]] = video_info
json.dump(out, open(tap_json, 'w'))
def convert_gtjson_to_tapjson(gt_json, tap_json):
data = json.load(open(gt_json, 'r'))
out = {}
out['version'] = "VERSION 1.0"
out['external_data'] = {'used:': True, 'details': "GT proposals"}
out['results'] = {}
all_names = list(data.keys())
for video_name in all_names:
video_info = []
timestamps = data[video_name]['timestamps']
sentences = data[video_name]['sentences']
for i, timestamp in enumerate(timestamps):
video_info.append({'segment': timestamp, 'score': 1., 'sentence': sentences[i]})
out['results'][video_name[2:]] = video_info
with open(tap_json, 'w') as f:
json.dump(out, f)
def get_topn_from_dvcjson(dvc_json, out_json, top_n=3, ranking_key='proposal_score', score_thres=-1e8):
data = json.load(open(dvc_json, 'r'))['results']
out = {}
out['version'] = "VERSION 1.0"
out['external_data'] = {'used:': True, 'details': "GT proposals"}
out['results'] = {}
all_names = list(data.keys())
num = 0
bad_vid = 0
for video_name in all_names:
info = data[video_name]
new_info = sorted(info, key=lambda x: x[ranking_key], reverse=True)
new_info = [p for p in new_info if p[ranking_key] > score_thres]
new_info = new_info[:top_n]
out['results'][video_name] = new_info
num += len(new_info)
if len(new_info) == 0:
bad_vid += 1
out['results'].pop(video_name)
print('average proosal number: {}'.format(num / len(all_names)))
print('bad videos number: {}'.format(bad_vid))
print('good videos number: {}'.format(len(out['results'])))
with open(out_json, 'w') as f:
json.dump(out, f)
def eval_metrics(dvc_filename, gt_filenames, para_gt_filenames, alpha=0.3, ranking_key='proposal_score', rerank=False, dvc_eval_version='2018', transformer_input_type='queries'):
score = collections.defaultdict(lambda: -1)
# top_n = 3
# top_n_filename = dvc_filename + '.top{}.json'.format(top_n)
# get_topn_from_dvcjson(dvc_filename, top_n_filename, top_n=top_n, ranking_key=ranking_key)
# dvc_score = eval_dvc(json_path=top_n_filename, reference=gt_filenames)
# dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()}
# dvc_score.update(eval_soda(top_n_filename, ref_list=gt_filenames))
# dvc_score.update(eval_para(top_n_filename, referneces=para_gt_filenames))
# for key in dvc_score.keys():
# score[key] = dvc_score[key]
if transformer_input_type == 'prior_proposals':
dvc_score = eval_para(dvc_filename, referneces=para_gt_filenames)
score.update(dvc_score)
#breakpoint()
return score
else:
if rerank:
dvc_filename = reranking(dvc_filename, alpha=alpha, temperature=2.0)
dvc_score = eval_dvc(json_path=dvc_filename, reference=gt_filenames, version=dvc_eval_version)
dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()}
dvc_score.update(eval_soda(dvc_filename, ref_list=gt_filenames))
dvc_score.update(eval_para(dvc_filename, referneces=para_gt_filenames))
score.update(dvc_score)
return score
def save_dvc_json(out_json, path):
with open(path, 'w') as f:
out_json['valid_video_num'] = len(out_json['results'])
out_json['avg_proposal_num'] = np.array([len(v) for v in out_json['results'].values()]).mean().item()
json.dump(out_json, f)
def reranking(p_src, alpha, temperature):
print('alpha: {}, temp: {}'.format(alpha, temperature))
d = json.load(open(p_src))
d_items = list(d['results'].items())
for k,v in d_items:
if True:
sent_scores = [p['sentence_score'] / (float(len(p['sentence'].split()))**(temperature) + 1e-5) for p in v]
prop_score = [p['proposal_score'] for p in v]
joint_score = alpha * (np.array(sent_scores)) + (np.array(prop_score))
for i,p in enumerate(v):
p['joint_score'] = joint_score[i]
v = sorted(v, key=lambda x: x['joint_score'], reverse=True)
topN = v[0]['pred_event_count']
v = v[:topN]
v = sorted(v, key=lambda x: x['timestamp'])
d['results'][k] = v
save_path = p_src+'_rerank_alpha{}_temp{}.json'.format(alpha, temperature)
save_dvc_json(d, save_path)
return save_path
def evaluate(model, criterion, postprocessors, loader, dvc_json_path, logger=None, args=None, score_threshold=0,
alpha=0.3, dvc_eval_version='2018', device='cuda', debug=False, skip_lang_eval=False):
out_json = {'results': {},
'version': "VERSION 1.0",
'external_data': {'used:': True, 'details': None}}
opt = loader.dataset.opt
loss_sum = OrderedDict()
with torch.set_grad_enabled(False):
for dt in tqdm(loader, disable=opt.disable_tqdm):
# valid_keys = ["video_tensor", "video_length", "video_mask", "video_key"]
# dt = {key: value for key, value in dt.items() if key in valid_keys}
dt = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()}
dt = collections.defaultdict(lambda: None, dt)
dt['video_target'] = [
{key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in
dt['video_target']]
# output, loss = model(dt, criterion, contrastive_criterion=None, eval_mode=True)
output, _ = model(dt, criterion, contrastive_criterion=None, eval_mode=True)
orig_target_sizes = dt['video_length'][:, 1]
weight_dict = criterion.weight_dict
# Huabin comment this line (anything about 'loss') to avoid reporting losses during evaluation
# final_loss = sum(loss[k] * weight_dict[k] for k in loss.keys() if k in weight_dict)
# Huabin comment this line to avoid reporting losses during evaluation
# for loss_k, loss_v in loss.items():
# loss_sum[loss_k] = loss_sum.get(loss_k, 0) + loss_v.item()
# loss_sum['total_loss'] = loss_sum.get('total_loss', 0) + final_loss.item()
results = postprocessors['bbox'](output, orig_target_sizes, loader)
batch_json = {}
for idx, video_name in enumerate(dt['video_key']):
segment = results[idx]['boxes'].cpu().numpy()
raw_boxes = results[idx]['raw_boxes'].cpu().numpy()
# pdb.set_trace()
#breakpoint()
batch_json[video_name] = [
{
"timestamp": segment[pid].tolist(),
"raw_box": raw_boxes[pid].tolist(),
"proposal_score": results[idx]['scores'][pid].item(),
"sentence": results[idx]['captions'][pid],
"sentence_score": results[idx]['caption_scores'][pid],
'query_id': results[idx]['query_id'][pid].item(),
'vid_duration': results[idx]['vid_duration'].item(),
'pred_event_count': results[idx]['pred_seq_len'].item(),
}
for pid in range(len(segment)) if results[idx]['scores'][pid].item() > score_threshold]
out_json['results'].update(batch_json)
if debug and len(out_json['results']) > 5:
break
save_dvc_json(out_json, dvc_json_path)
if skip_lang_eval:
return None, None
# Huabin comment this line to avoid reporting losses during evaluation
# for k in loss_sum.keys():
# loss_sum[k] = np.round(loss_sum[k] / (len(loader) + 1e-5), 3).item()
# logger.info('loss: {}'.format(loss_sum))
scores = eval_metrics(dvc_json_path,
gt_filenames=opt.gt_file_for_eval,
para_gt_filenames=opt.gt_file_for_para_eval,
alpha=alpha,
rerank=(opt.count_loss_coef > 0),
dvc_eval_version=dvc_eval_version,
transformer_input_type=opt.transformer_input_type
)
out_json.update(scores)
save_dvc_json(out_json, dvc_json_path)
# return scores, loss_sum
return scores, []