TencentARC
/

QA-CLIP

Zero-Shot Classification

Model card Files Files and versions

QA-CLIP / eval /make_topk_predictions_tr.py

kunyi

Upload 30 files

f76d30f almost 3 years ago

3.2 kB

	# -- coding: utf-8 --
	'''
	This scripts performs kNN search on inferenced image and text features (on single-GPU) and outputs image-to-text retrieval prediction file for evaluation.
	'''

	import argparse
	import numpy
	from tqdm import tqdm
	import json

	import numpy as np
	import torch

	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--image-feats',
	type=str,
	required=True,
	help="Specify the path of image features."
	)
	parser.add_argument(
	'--text-feats',
	type=str,
	required=True,
	help="Specify the path of text features."
	)
	parser.add_argument(
	'--top-k',
	type=int,
	default=10,
	help="Specify the k value of top-k predictions."
	)
	parser.add_argument(
	'--eval-batch-size',
	type=int,
	default=32768,
	help="Specify the image-side batch size when computing the inner products, default to 8192"
	)
	parser.add_argument(
	'--output',
	type=str,
	required=True,
	help="Specify the output jsonl prediction filepath."
	)
	return parser.parse_args()

	if __name__ == "__main__":
	args = parse_args()

	# Log params.
	print("Params:")
	for name in sorted(vars(args)):
	val = getattr(args, name)
	print(f" {name}: {val}")

	print("Begin to load text features...")
	text_ids = []
	text_feats = []
	with open(args.text_feats, "r") as fin:
	for line in tqdm(fin):
	obj = json.loads(line.strip())
	text_ids.append(obj['text_id'])
	text_feats.append(obj['feature'])
	text_feats_array = np.array(text_feats, dtype=np.float32)
	print("Finished loading text features.")

	print("Begin to compute top-{} predictions for images...".format(args.top_k))
	with open(args.output, "w") as fout:
	with open(args.image_feats, "r") as fin:
	for line in tqdm(fin):
	obj = json.loads(line.strip())
	image_id = obj['image_id']
	image_feat = obj['feature']
	score_tuples = []
	image_feat_tensor = torch.tensor([image_feat], dtype=torch.float).cuda() # [1, feature_dim]
	idx = 0
	while idx < len(text_ids):
	text_feats_tensor = torch.from_numpy(text_feats_array[idx : min(idx + args.eval_batch_size, len(text_ids))]).cuda() # [batch_size, feature_dim]
	batch_scores = image_feat_tensor @ text_feats_tensor.t() # [1, batch_size]
	for text_id, score in zip(text_ids[idx : min(idx + args.eval_batch_size, len(text_ids))], batch_scores.squeeze(0).tolist()):
	score_tuples.append((text_id, score))
	idx += args.eval_batch_size
	top_k_predictions = sorted(score_tuples, key=lambda x:x[1], reverse=True)[:args.top_k]
	fout.write("{}\n".format(json.dumps({"image_id": image_id, "text_ids": [entry[0] for entry in top_k_predictions]})))

	print("Top-{} predictions are saved in {}".format(args.top_k, args.output))
	print("Done!")