| import json | |
| import random | |
| # 加载第一个JSON | |
| json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/select_gen_100k.json" | |
| json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/select_raw_100k.json" | |
| with open(json1_path) as f: | |
| data1 = json.load(f) | |
| # 加载第二个JSON | |
| with open(json2_path) as f: | |
| data2 = json.load(f) | |
| # 检查data1和data2的类型 | |
| if isinstance(data1, dict) and isinstance(data2, dict): | |
| # 合并两个字典 | |
| merged_data = {**data1, **data2} | |
| elif isinstance(data1, list) and isinstance(data2, list): | |
| # 合并两个列表 | |
| merged_data = data1 + data2 | |
| else: | |
| raise TypeError("data1 and data2 should be of the same type, either list or dict") | |
| # 将合并后的JSON写入新的文件 | |
| with open( | |
| "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mixed_200k.json", | |
| "w", | |
| ) as f: | |
| json.dump(merged_data, f) | |
| print(len(merged_data)) | |