File size: 759 Bytes
bd4d522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import json
import os

with open(
    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json"
) as f:
    data = json.load(f)

new_data = []
for index, item in enumerate(data):
    print(index)

    folder_index = 1000 + (index // 10000)
    target_subfolder = f"{folder_index:05d}"

    target_image_name = f"{folder_index:05d}{index % 10000:04d}"
    target_image_path = os.path.join(target_subfolder, target_image_name) + ".jpg"
    item["id"] = target_image_name
    item["image"] = target_image_path

    new_data.append(item)


# 将选择的元素写入新的JSON文件
with open(
    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json",
    "w",
) as f:
    json.dump(new_data, f)