|
|
import json |
|
|
import os |
|
|
from PIL import Image |
|
|
import numpy as np |
|
|
from pycocotools.mask import encode, decode, frPyObjects |
|
|
from tqdm import tqdm |
|
|
import copy |
|
|
from natsort import natsorted |
|
|
import string |
|
|
|
|
|
def extract_object_name(text): |
|
|
parts = text.split("is") |
|
|
if len(parts) > 1: |
|
|
return parts[1].strip() |
|
|
return None |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
sent_id = 1 |
|
|
data_new_list = [] |
|
|
json_path = "/data/work-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap/ExoQuery_val_newprompt_all_instruction.json" |
|
|
with open(json_path, 'r') as f: |
|
|
datas = json.load(f) |
|
|
root_path = "/data/work-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap" |
|
|
split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/SegSwap/data/split.json" |
|
|
with open(split_path, "r") as fp: |
|
|
data_split = json.load(fp) |
|
|
val_set = data_split["val"] |
|
|
|
|
|
for video_name in tqdm(val_set): |
|
|
data_thisvideo = [] |
|
|
for data in datas: |
|
|
if data["video_name"] == video_name: |
|
|
data_thisvideo.append(data) |
|
|
if len(data_thisvideo) == 0: |
|
|
continue |
|
|
first_frame = data_thisvideo[0] |
|
|
sample_unique_instances = [] |
|
|
for ann in first_frame['first_frame_anns']: |
|
|
sample_unique_instances.append(ann['category_id']) |
|
|
|
|
|
|
|
|
data_new_list.append(first_frame) |
|
|
for sample in data_thisvideo[1:]: |
|
|
|
|
|
if len(first_frame['first_frame_anns']) < len(sample['anns']): |
|
|
continue |
|
|
|
|
|
unique_instances = [] |
|
|
|
|
|
for ann in sample['anns']: |
|
|
unique_instances.append(ann['category_id']) |
|
|
skip = False |
|
|
for id in unique_instances: |
|
|
if id not in sample_unique_instances: |
|
|
skip = True |
|
|
break |
|
|
if skip: |
|
|
continue |
|
|
|
|
|
first_frame_anns = copy.deepcopy(first_frame['first_frame_anns']) |
|
|
if len(sample['anns']) < len(first_frame_anns): |
|
|
first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in unique_instances] |
|
|
assert len(sample['anns']) == len(first_frame_anns) |
|
|
|
|
|
skip_text = False |
|
|
instruct_list = [] |
|
|
for anno in first_frame_anns: |
|
|
text = anno["text"] |
|
|
|
|
|
raw = extract_object_name(text) |
|
|
|
|
|
if raw == None: |
|
|
skip_text = True |
|
|
|
|
|
break |
|
|
raw_lower = raw.lower() |
|
|
|
|
|
result = raw_lower.replace("green", "").strip() |
|
|
|
|
|
sent = result.translate(str.maketrans('', '', string.punctuation)) |
|
|
tokens = sent.split() |
|
|
sample_text = { |
|
|
"tokens": tokens, |
|
|
"raw": raw, |
|
|
"sent_id": sent_id, |
|
|
"sent": sent |
|
|
} |
|
|
sent_id += 1 |
|
|
instruct_list.append(sample_text) |
|
|
if skip_text: |
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_new = { |
|
|
'image': sample['image'], |
|
|
'image_info': sample['image_info'], |
|
|
'anns': sample['anns'], |
|
|
'first_frame_image':first_frame['first_frame_image'], |
|
|
'first_frame_anns': first_frame_anns, |
|
|
'new_img_id': sample['new_img_id'], |
|
|
'video_name': sample['video_name'], |
|
|
"instruction": instruct_list |
|
|
} |
|
|
data_new_list.append(data_new) |
|
|
|
|
|
|
|
|
save_path = "/data/work-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap/Exo_firstframe_VideoTracking.json" |
|
|
with open(save_path, 'w') as f: |
|
|
json.dump(data_new_list, f) |
|
|
print(f'Save at {save_path}. Total sample: {len(data_new_list)}') |
|
|
|
|
|
|