import json import os from PIL import Image import numpy as np from pycocotools.mask import encode, decode, frPyObjects from tqdm import tqdm import copy from natsort import natsorted import string def extract_object_name(text): parts = text.split("is") if len(parts) > 1: return parts[1].strip() return None if __name__ == '__main__': # new_img_id = 0 sent_id = 1 data_new_list = [] json_path = "/data/work-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap/ExoQuery_val_newprompt_all_instruction.json" with open(json_path, 'r') as f: datas = json.load(f) root_path = "/data/work-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap" split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/SegSwap/data/split.json" with open(split_path, "r") as fp: data_split = json.load(fp) val_set = data_split["val"] for video_name in tqdm(val_set): data_thisvideo = [] for data in datas: if data["video_name"] == video_name: data_thisvideo.append(data) if len(data_thisvideo) == 0: continue first_frame = data_thisvideo[0] sample_unique_instances = [] for ann in first_frame['first_frame_anns']: sample_unique_instances.append(ann['category_id']) data_new_list.append(first_frame) for sample in data_thisvideo[1:]: # sample作为target if len(first_frame['first_frame_anns']) < len(sample['anns']): continue unique_instances = [] #参考帧的物体类别一定要包含当前帧的 for ann in sample['anns']: unique_instances.append(ann['category_id']) skip = False for id in unique_instances: if id not in sample_unique_instances: skip = True break if skip: continue first_frame_anns = copy.deepcopy(first_frame['first_frame_anns']) if len(sample['anns']) < len(first_frame_anns): first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in unique_instances] assert len(sample['anns']) == len(first_frame_anns) skip_text = False #debug instruct_list = [] for anno in first_frame_anns: text = anno["text"] # 提取is之后的句子 raw = extract_object_name(text) #将raw变小写 if raw == None: #debug skip_text = True # print(sample['image']) break raw_lower = raw.lower() # 删除 "green" 并去掉多余的空格 result = raw_lower.replace("green", "").strip() # 删除所有标点符号 sent = result.translate(str.maketrans('', '', string.punctuation)) tokens = sent.split() sample_text = { "tokens": tokens, "raw": raw, "sent_id": sent_id, "sent": sent } sent_id += 1 instruct_list.append(sample_text) if skip_text: continue # image_info = { # "file_name": sample['first_frame_image'], # "height": 704, # "width": 704, # } data_new = { 'image': sample['image'], 'image_info': sample['image_info'], 'anns': sample['anns'], 'first_frame_image':first_frame['first_frame_image'], 'first_frame_anns': first_frame_anns, 'new_img_id': sample['new_img_id'], 'video_name': sample['video_name'], "instruction": instruct_list } data_new_list.append(data_new) save_path = "/data/work-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap/Exo_firstframe_VideoTracking.json" with open(save_path, 'w') as f: json.dump(data_new_list, f) print(f'Save at {save_path}. Total sample: {len(data_new_list)}')