ObjectRelator-Original / datasets /build_ref_ego_objname.py
YuqianFu's picture
Upload folder using huggingface_hub
625a17f verified
import json
from natsort import natsorted
import os
import re
#json_path = "/home/yuqian_fu/Projects/PSALM/check_text_byname_600_objname_llavaname.json"
json_path = "/work/yuqian_fu/Data/datasets/DAVIS/2017/trainval_test_psalm_20gap.json"
# data_path = "/work/yuqian_fu/Ego/data_segswap_test"
with open(json_path, "r") as fp:
datas = json.load(fp)
# split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
# with open(split_path, "r") as fp:
# takes_all = json.load(fp)
# takes = takes_all["test"]
#names = ["basketball", "bike", "cooking", "health", "music", "soccer"]
#names = ["basketball", "bike", "music", "soccer"]
#统计这批数据中的take_id
# takes = []
# for name in names:
# data = datas[name]
# for d in data:
# video_name = d["video_name"]
# takes.append(video_name)
# takes = list(set(takes))
# print("takes:", len(takes))
#重新格式化数据,每个数据中增加新键:scene
# datas_new = []
# for name in names:
# data_this_name = datas[name]
# for data in data_this_name:
# data["scene"] = name
# datas_new.append(data)
# print("datas_new:", len(datas_new))
datas_final_save = []
# # 给datas_new中的每个数据添加一个新的键:obj_name
# for take in takes:
# # datas_tmp = []
# gt_path = f"{data_path}/{take}/annotation.json"
# with open(gt_path, 'r') as fp:
# gt = json.load(fp)
# # 创建逆字典
# objs = natsorted(list(gt["masks"].keys()))
# coco_id_to_cont_id = {cont_id + 1: coco_id for cont_id, coco_id in enumerate(objs)}
# # for data in datas_new:
# # if data["video_name"] == take:
# # datas_tmp.append(data)
for data in datas:
# 只处理属于当前 take 的数据
# if data.get("video_name") != take:
# continue
annos = data["first_frame_anns"]
annos_new = []
for anno in annos:
category_id = int(anno["category_id"]) # 转换为整数
obj_name = "obj"
anno["text"] = {}
anno["text"] = obj_name
annos_new.append(anno)
data["first_frame_anns"] = annos_new
datas_final_save.append(data)
# datas_final_save = []
# for data in datas:
# annos = data["first_frame_anns"]
# annos_new = []
# for anno in annos:
# obj_name = anno["obj_name"]
# result = re.sub(r'_\d+$', '', obj_name)
# anno["obj_name"] = result
# llava_text = anno["llava_text"]
# # 如果llava_text中有表示a、an的量词,比如“a basketball”,则删除”a“ 变为”baskerball“
# if "a " in llava_text:
# llava_text = llava_text.replace("a ", "")
# elif "an " in llava_text:
# llava_text = llava_text.replace("an ", "")
# anno["llava_text"] = llava_text
# annos_new.append(anno)
# data["first_frame_anns"] = annos_new
# datas_final_save.append(data)
print("datas_final_save:", len(datas_final_save))
save_path = "/home/yuqian_fu/Projects/DAVIS_test_gap20.json"
with open(save_path, "w") as fp:
json.dump(datas_final_save, fp)
# path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
# with open(path, "r") as fp:
# data = json.load(fp)
# takes = data["test"]
# print("takes:", len(takes))
# takes_new = os.listdir("/work/yuqian_fu/Ego/data_segswap_test_20250508_new888_v2")
# print("takes_new:", len(takes_new))
# # 找出 takes 中有但 takes_new 中没有的元素
# diff_in_takes = set(takes) - set(takes_new)
# # 找出 takes_new 中有但 takes 中没有的元素
# diff_in_takes_new = set(takes_new) - set(takes)
# # 打印结果
# print("takes 中有但 takes_new 中没有的元素:", diff_in_takes)
# print("takes_new 中有但 takes 中没有的元素:", diff_in_takes_new)