| | |
| | from tqdm import tqdm |
| | import argparse |
| | import json |
| |
|
| | def parse_args(): |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument( |
| | '--input', |
| | type=str, |
| | required=True, |
| | help="Input path of text-to-image Jsonl annotation file." |
| | ) |
| | return parser.parse_args() |
| |
|
| | if __name__ == "__main__": |
| | args = parse_args() |
| |
|
| | t2i_record = dict() |
| |
|
| | with open(args.input, "r", encoding="utf-8") as fin: |
| | for line in tqdm(fin): |
| | obj = json.loads(line.strip()) |
| | text_id = obj['text_id'] |
| | image_ids = obj['image_ids'] |
| | for image_id in image_ids: |
| | if image_id not in t2i_record: |
| | t2i_record[image_id] = [] |
| | t2i_record[image_id].append(text_id) |
| | |
| | with open(args.input.replace(".jsonl", "") + ".tr.jsonl", "w", encoding="utf-8") as fout: |
| | for image_id, text_ids in t2i_record.items(): |
| | out_obj = {"image_id": image_id, "text_ids": text_ids} |
| | fout.write("{}\n".format(json.dumps(out_obj))) |
| | |
| | print("Done!") |