ObjectRelator-Original / datasets /prepare_llava_1_5.py
YuqianFu's picture
Upload folder using huggingface_hub
625a17f verified
import json
import cv2
import os
from tqdm import tqdm
image_root = '/path/to/llava/image/root'
coco_root = '/path/to/coco/train2017'
filter_list = []
with open('/path/to/llava/image/root/llava_v1_5_mix665k.json') as f:
data = json.load(f)
for data_ in tqdm(data):
if 'image' in data_:
image_path = data_['image']
if 'coco' in image_path:
image_path = os.path.basename(image_path)
file_name = os.path.join(coco_root, image_path)
else:
file_name = os.path.join(image_root, image_path)
if os.path.exists(file_name):
img = cv2.imread(file_name)
if img is not None:
filter_list.append(data_)
else:
print(f'cant open {file_name}')
else:
print(f'cant find {file_name}')
print(f'after filter, data length is {len(filter_list)}')
with open('/home/hk/yyma/data/mm_data_zem/LLaVA-Instruct-150K/llava_v1_5_mix665k_onlyMM_filtered.json','w') as f:
json.dump(filter_list,f)