File size: 3,876 Bytes
625a17f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import json
from natsort import natsorted
import os
import re
#json_path = "/home/yuqian_fu/Projects/PSALM/check_text_byname_600_objname_llavaname.json"
json_path = "/work/yuqian_fu/Data/datasets/DAVIS/2017/trainval_test_psalm_20gap.json"
# data_path = "/work/yuqian_fu/Ego/data_segswap_test"
with open(json_path, "r") as fp:
datas = json.load(fp)
# split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
# with open(split_path, "r") as fp:
# takes_all = json.load(fp)
# takes = takes_all["test"]
#names = ["basketball", "bike", "cooking", "health", "music", "soccer"]
#names = ["basketball", "bike", "music", "soccer"]
#统计这批数据中的take_id
# takes = []
# for name in names:
# data = datas[name]
# for d in data:
# video_name = d["video_name"]
# takes.append(video_name)
# takes = list(set(takes))
# print("takes:", len(takes))
#重新格式化数据,每个数据中增加新键:scene
# datas_new = []
# for name in names:
# data_this_name = datas[name]
# for data in data_this_name:
# data["scene"] = name
# datas_new.append(data)
# print("datas_new:", len(datas_new))
datas_final_save = []
# # 给datas_new中的每个数据添加一个新的键:obj_name
# for take in takes:
# # datas_tmp = []
# gt_path = f"{data_path}/{take}/annotation.json"
# with open(gt_path, 'r') as fp:
# gt = json.load(fp)
# # 创建逆字典
# objs = natsorted(list(gt["masks"].keys()))
# coco_id_to_cont_id = {cont_id + 1: coco_id for cont_id, coco_id in enumerate(objs)}
# # for data in datas_new:
# # if data["video_name"] == take:
# # datas_tmp.append(data)
for data in datas:
# 只处理属于当前 take 的数据
# if data.get("video_name") != take:
# continue
annos = data["first_frame_anns"]
annos_new = []
for anno in annos:
category_id = int(anno["category_id"]) # 转换为整数
obj_name = "obj"
anno["text"] = {}
anno["text"] = obj_name
annos_new.append(anno)
data["first_frame_anns"] = annos_new
datas_final_save.append(data)
# datas_final_save = []
# for data in datas:
# annos = data["first_frame_anns"]
# annos_new = []
# for anno in annos:
# obj_name = anno["obj_name"]
# result = re.sub(r'_\d+$', '', obj_name)
# anno["obj_name"] = result
# llava_text = anno["llava_text"]
# # 如果llava_text中有表示a、an的量词,比如“a basketball”,则删除”a“ 变为”baskerball“
# if "a " in llava_text:
# llava_text = llava_text.replace("a ", "")
# elif "an " in llava_text:
# llava_text = llava_text.replace("an ", "")
# anno["llava_text"] = llava_text
# annos_new.append(anno)
# data["first_frame_anns"] = annos_new
# datas_final_save.append(data)
print("datas_final_save:", len(datas_final_save))
save_path = "/home/yuqian_fu/Projects/DAVIS_test_gap20.json"
with open(save_path, "w") as fp:
json.dump(datas_final_save, fp)
# path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
# with open(path, "r") as fp:
# data = json.load(fp)
# takes = data["test"]
# print("takes:", len(takes))
# takes_new = os.listdir("/work/yuqian_fu/Ego/data_segswap_test_20250508_new888_v2")
# print("takes_new:", len(takes_new))
# # 找出 takes 中有但 takes_new 中没有的元素
# diff_in_takes = set(takes) - set(takes_new)
# # 找出 takes_new 中有但 takes 中没有的元素
# diff_in_takes_new = set(takes_new) - set(takes)
# # 打印结果
# print("takes 中有但 takes_new 中没有的元素:", diff_in_takes)
# print("takes_new 中有但 takes 中没有的元素:", diff_in_takes_new)
|