import json from natsort import natsorted import os import re #json_path = "/home/yuqian_fu/Projects/PSALM/check_text_byname_600_objname_llavaname.json" json_path = "/work/yuqian_fu/Data/datasets/DAVIS/2017/trainval_test_psalm_20gap.json" # data_path = "/work/yuqian_fu/Ego/data_segswap_test" with open(json_path, "r") as fp: datas = json.load(fp) # split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json" # with open(split_path, "r") as fp: # takes_all = json.load(fp) # takes = takes_all["test"] #names = ["basketball", "bike", "cooking", "health", "music", "soccer"] #names = ["basketball", "bike", "music", "soccer"] #统计这批数据中的take_id # takes = [] # for name in names: # data = datas[name] # for d in data: # video_name = d["video_name"] # takes.append(video_name) # takes = list(set(takes)) # print("takes:", len(takes)) #重新格式化数据,每个数据中增加新键:scene # datas_new = [] # for name in names: # data_this_name = datas[name] # for data in data_this_name: # data["scene"] = name # datas_new.append(data) # print("datas_new:", len(datas_new)) datas_final_save = [] # # 给datas_new中的每个数据添加一个新的键:obj_name # for take in takes: # # datas_tmp = [] # gt_path = f"{data_path}/{take}/annotation.json" # with open(gt_path, 'r') as fp: # gt = json.load(fp) # # 创建逆字典 # objs = natsorted(list(gt["masks"].keys())) # coco_id_to_cont_id = {cont_id + 1: coco_id for cont_id, coco_id in enumerate(objs)} # # for data in datas_new: # # if data["video_name"] == take: # # datas_tmp.append(data) for data in datas: # 只处理属于当前 take 的数据 # if data.get("video_name") != take: # continue annos = data["first_frame_anns"] annos_new = [] for anno in annos: category_id = int(anno["category_id"]) # 转换为整数 obj_name = "obj" anno["text"] = {} anno["text"] = obj_name annos_new.append(anno) data["first_frame_anns"] = annos_new datas_final_save.append(data) # datas_final_save = [] # for data in datas: # annos = data["first_frame_anns"] # annos_new = [] # for anno in annos: # obj_name = anno["obj_name"] # result = re.sub(r'_\d+$', '', obj_name) # anno["obj_name"] = result # llava_text = anno["llava_text"] # # 如果llava_text中有表示a、an的量词,比如“a basketball”,则删除”a“ 变为”baskerball“ # if "a " in llava_text: # llava_text = llava_text.replace("a ", "") # elif "an " in llava_text: # llava_text = llava_text.replace("an ", "") # anno["llava_text"] = llava_text # annos_new.append(anno) # data["first_frame_anns"] = annos_new # datas_final_save.append(data) print("datas_final_save:", len(datas_final_save)) save_path = "/home/yuqian_fu/Projects/DAVIS_test_gap20.json" with open(save_path, "w") as fp: json.dump(datas_final_save, fp) # path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json" # with open(path, "r") as fp: # data = json.load(fp) # takes = data["test"] # print("takes:", len(takes)) # takes_new = os.listdir("/work/yuqian_fu/Ego/data_segswap_test_20250508_new888_v2") # print("takes_new:", len(takes_new)) # # 找出 takes 中有但 takes_new 中没有的元素 # diff_in_takes = set(takes) - set(takes_new) # # 找出 takes_new 中有但 takes 中没有的元素 # diff_in_takes_new = set(takes_new) - set(takes) # # 打印结果 # print("takes 中有但 takes_new 中没有的元素:", diff_in_takes) # print("takes_new 中有但 takes 中没有的元素:", diff_in_takes_new)