| import json | |
| import os | |
| with open( | |
| "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json" | |
| ) as f: | |
| data = json.load(f) | |
| new_data = [] | |
| for index, item in enumerate(data): | |
| print(index) | |
| folder_index = 1000 + (index // 10000) | |
| target_subfolder = f"{folder_index:05d}" | |
| target_image_name = f"{folder_index:05d}{index % 10000:04d}" | |
| target_image_path = os.path.join(target_subfolder, target_image_name) + ".jpg" | |
| item["id"] = target_image_name | |
| item["image"] = target_image_path | |
| new_data.append(item) | |
| # 将选择的元素写入新的JSON文件 | |
| with open( | |
| "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json", | |
| "w", | |
| ) as f: | |
| json.dump(new_data, f) | |