import json from pycocotools.coco import COCO from tqdm import tqdm import string text_pth = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/HANDAL/handal_test_all_text.json" save_path = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/HANDAL/handal_test_all_instruct_correct.json" new_data = [] sent_id = 0 with open(text_pth, "r") as fp: datas = json.load(fp) # data是一帧帧图片 for data in datas: instruct_list = [] for anno in data["first_frame_anns"]: obj_name_tmp = data["image"].split("/")[0] obj_name = obj_name_tmp.split("_")[-1] # 去掉最后的 'es'(如果存在) if obj_name.endswith('es'): obj_name = obj_name[:-2] # 去掉最后的 's'(如果存在) elif obj_name.endswith('s'): obj_name = obj_name[:-1] raw = f"a {obj_name}." sent = f"a {obj_name}" tokens = sent.split() sample = { "tokens": tokens, "raw": raw, "sent_id": sent_id, "sent": sent } sent_id += 1 instruct_list.append(sample) anno["text"] = f'The object covered by the green mask is a {obj_name}.' data["instruction"] = instruct_list new_data.append(data) print(sent_id) print(len(new_data)) with open(save_path, "w") as fp: json.dump(new_data, fp)