|
|
import argparse |
|
|
import json |
|
|
import tqdm |
|
|
import cv2 |
|
|
import os |
|
|
import numpy as np |
|
|
from pycocotools import mask as mask_utils |
|
|
import random |
|
|
from PIL import Image |
|
|
import copy |
|
|
import string |
|
|
|
|
|
|
|
|
def extract_object_name(text): |
|
|
parts = text.split("is") |
|
|
if len(parts) > 1: |
|
|
return parts[1].strip() |
|
|
return None |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
augu_num = 4 |
|
|
original_path = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/DAVIS/2017/trainval_val_psalm_instruction_train_correct_new.json" |
|
|
with open(original_path, "r") as fp: |
|
|
datas_origianl = json.load(fp) |
|
|
|
|
|
new_data = [] |
|
|
json_path = '/data/work-gcp-europe-west4-a/yuqian_fu/datasets/DAVIS/2017/trainval_val_psalm_withtext_train_new_targetframe.json' |
|
|
save_path = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/DAVIS/2017/trainval_val_psalm_train_new_augument_n4_instruction.json" |
|
|
with open(json_path, "r") as fp: |
|
|
datas = json.load(fp) |
|
|
|
|
|
|
|
|
|
|
|
total_num = len(datas) |
|
|
k = 0 |
|
|
sent_id = 9444 |
|
|
|
|
|
|
|
|
set_path = "/data/work-gcp-europe-west4-a/yuqian_fu/datasets/DAVIS/2017/trainval/ImageSets/2017/train.txt" |
|
|
video_names = [] |
|
|
with open(set_path, 'r') as f: |
|
|
for line in f: |
|
|
video_names.append(line.strip()) |
|
|
|
|
|
|
|
|
for video in video_names: |
|
|
|
|
|
data_list = [] |
|
|
for data in datas: |
|
|
if data["video_name"] == video: |
|
|
data_list.append(data) |
|
|
for data in data_list: |
|
|
sample_unique_instances = [] |
|
|
for ann in data['anns']: |
|
|
sample_unique_instances.append(ann['category_id']) |
|
|
|
|
|
data_sample = random.sample(data_list, augu_num) |
|
|
for sample in data_sample: |
|
|
|
|
|
if data['new_img_id'] == sample['new_img_id']: |
|
|
continue |
|
|
|
|
|
if len(data['anns']) > len(sample['anns']): |
|
|
continue |
|
|
|
|
|
unique_instances = [] |
|
|
|
|
|
for ann in sample['anns']: |
|
|
unique_instances.append(ann['category_id']) |
|
|
|
|
|
skip = False |
|
|
for id in sample_unique_instances: |
|
|
if id not in unique_instances: |
|
|
skip = True |
|
|
break |
|
|
if skip: |
|
|
continue |
|
|
|
|
|
first_frame_anns = copy.deepcopy(sample['anns']) |
|
|
if len(data['anns']) < len(first_frame_anns): |
|
|
first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances] |
|
|
|
|
|
|
|
|
|
|
|
assert len(data['anns']) == len(first_frame_anns) |
|
|
|
|
|
skip_text = False |
|
|
instruct_list = [] |
|
|
for anno in first_frame_anns: |
|
|
text = anno["text"] |
|
|
|
|
|
raw = extract_object_name(text) |
|
|
|
|
|
if raw == None: |
|
|
skip_text = True |
|
|
print(sample['image']) |
|
|
break |
|
|
raw_lower = raw.lower() |
|
|
|
|
|
result = raw_lower.replace("green", "").strip() |
|
|
|
|
|
sent = result.translate(str.maketrans('', '', string.punctuation)) |
|
|
tokens = sent.split() |
|
|
sample_text = { |
|
|
"tokens": tokens, |
|
|
"raw": raw, |
|
|
"sent_id": sent_id, |
|
|
"sent": sent |
|
|
} |
|
|
sent_id += 1 |
|
|
instruct_list.append(sample_text) |
|
|
if skip_text: |
|
|
continue |
|
|
|
|
|
|
|
|
data_new = { |
|
|
'image': data['image'], |
|
|
'image_info': data['image_info'], |
|
|
'anns': data['anns'], |
|
|
'first_frame_image':sample['image'], |
|
|
'first_frame_anns': first_frame_anns, |
|
|
'new_img_id': total_num+k, |
|
|
'video_name': data['video_name'], |
|
|
"instruction": instruct_list |
|
|
} |
|
|
new_data.append(data_new) |
|
|
k += 1 |
|
|
|
|
|
|
|
|
data_all = datas_origianl + new_data |
|
|
with open(save_path, 'w') as f: |
|
|
json.dump(data_all, f) |
|
|
print(f'Save at {save_path}. Total sample: {len(data_all)}') |