| import json | |
| from pycocotools.coco import COCO | |
| from tqdm import tqdm | |
| import string | |
| text_pth = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/HANDAL/handal_test_all_text.json" | |
| save_path = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/HANDAL/handal_test_all_instruct_correct.json" | |
| new_data = [] | |
| sent_id = 0 | |
| with open(text_pth, "r") as fp: | |
| datas = json.load(fp) | |
| # data是一帧帧图片 | |
| for data in datas: | |
| instruct_list = [] | |
| for anno in data["first_frame_anns"]: | |
| obj_name_tmp = data["image"].split("/")[0] | |
| obj_name = obj_name_tmp.split("_")[-1] | |
| # 去掉最后的 'es'(如果存在) | |
| if obj_name.endswith('es'): | |
| obj_name = obj_name[:-2] | |
| # 去掉最后的 's'(如果存在) | |
| elif obj_name.endswith('s'): | |
| obj_name = obj_name[:-1] | |
| raw = f"a {obj_name}." | |
| sent = f"a {obj_name}" | |
| tokens = sent.split() | |
| sample = { | |
| "tokens": tokens, | |
| "raw": raw, | |
| "sent_id": sent_id, | |
| "sent": sent | |
| } | |
| sent_id += 1 | |
| instruct_list.append(sample) | |
| anno["text"] = f'The object covered by the green mask is a {obj_name}.' | |
| data["instruction"] = instruct_list | |
| new_data.append(data) | |
| print(sent_id) | |
| print(len(new_data)) | |
| with open(save_path, "w") as fp: | |
| json.dump(new_data, fp) | |