| | |
| | ''' |
| | This scripts performs kNN search on inferenced image and text features (on single-GPU) and outputs image-to-text retrieval prediction file for evaluation. |
| | ''' |
| |
|
| | import argparse |
| | import numpy |
| | from tqdm import tqdm |
| | import json |
| |
|
| | import numpy as np |
| | import torch |
| |
|
| | def parse_args(): |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument( |
| | '--image-feats', |
| | type=str, |
| | required=True, |
| | help="Specify the path of image features." |
| | ) |
| | parser.add_argument( |
| | '--text-feats', |
| | type=str, |
| | required=True, |
| | help="Specify the path of text features." |
| | ) |
| | parser.add_argument( |
| | '--top-k', |
| | type=int, |
| | default=10, |
| | help="Specify the k value of top-k predictions." |
| | ) |
| | parser.add_argument( |
| | '--eval-batch-size', |
| | type=int, |
| | default=32768, |
| | help="Specify the image-side batch size when computing the inner products, default to 8192" |
| | ) |
| | parser.add_argument( |
| | '--output', |
| | type=str, |
| | required=True, |
| | help="Specify the output jsonl prediction filepath." |
| | ) |
| | return parser.parse_args() |
| |
|
| | if __name__ == "__main__": |
| | args = parse_args() |
| |
|
| | |
| | print("Params:") |
| | for name in sorted(vars(args)): |
| | val = getattr(args, name) |
| | print(f" {name}: {val}") |
| |
|
| | print("Begin to load text features...") |
| | text_ids = [] |
| | text_feats = [] |
| | with open(args.text_feats, "r") as fin: |
| | for line in tqdm(fin): |
| | obj = json.loads(line.strip()) |
| | text_ids.append(obj['text_id']) |
| | text_feats.append(obj['feature']) |
| | text_feats_array = np.array(text_feats, dtype=np.float32) |
| | print("Finished loading text features.") |
| |
|
| | print("Begin to compute top-{} predictions for images...".format(args.top_k)) |
| | with open(args.output, "w") as fout: |
| | with open(args.image_feats, "r") as fin: |
| | for line in tqdm(fin): |
| | obj = json.loads(line.strip()) |
| | image_id = obj['image_id'] |
| | image_feat = obj['feature'] |
| | score_tuples = [] |
| | image_feat_tensor = torch.tensor([image_feat], dtype=torch.float).cuda() |
| | idx = 0 |
| | while idx < len(text_ids): |
| | text_feats_tensor = torch.from_numpy(text_feats_array[idx : min(idx + args.eval_batch_size, len(text_ids))]).cuda() |
| | batch_scores = image_feat_tensor @ text_feats_tensor.t() |
| | for text_id, score in zip(text_ids[idx : min(idx + args.eval_batch_size, len(text_ids))], batch_scores.squeeze(0).tolist()): |
| | score_tuples.append((text_id, score)) |
| | idx += args.eval_batch_size |
| | top_k_predictions = sorted(score_tuples, key=lambda x:x[1], reverse=True)[:args.top_k] |
| | fout.write("{}\n".format(json.dumps({"image_id": image_id, "text_ids": [entry[0] for entry in top_k_predictions]}))) |
| | |
| | print("Top-{} predictions are saved in {}".format(args.top_k, args.output)) |
| | print("Done!") |
| |
|