tools / utils /json /RS_merge.py
Adinosaur's picture
Upload folder using huggingface_hub
1c980b1 verified
'''
这个程序是需要先运行的一个程序它的作用是将哦answers和of questions合并为一个JSON文件便于去转换
使用方法很简单看程序最后的使用示例,只需填入两个文件的地址然后给定输出地址就可以
'''
import json
from collections import defaultdict
def deep_merge(base_dict, merge_dict):
"""递归合并字典,处理嵌套结构和冲突"""
for key in merge_dict:
if key in base_dict:
# 处理字典类型合并
if isinstance(base_dict[key], dict) and isinstance(merge_dict[key], dict):
deep_merge(base_dict[key], merge_dict[key])
# 处理数组类型合并(保留原数组)
elif isinstance(base_dict[key], list) and isinstance(merge_dict[key], list):
base_dict[key] = base_dict[key] + merge_dict[key]
# 处理其他类型冲突(保留原始值)
else:
pass # 保持base_dict原有值
else:
# 新增不存在字段
base_dict[key] = merge_dict[key]
return base_dict
def merge_json_files(answers_file, questions_file, output_file):
# 加载数据并建立索引
with open(answers_file) as f:
answers = {item['id']: item for item in json.load(f)['answers']}
with open(questions_file) as f:
questions = json.load(f)['questions']
# 智能合并处理
merged = []
for q in questions:
merged_q = q.copy()
# 处理answers_ids关联
for ans_id in q.get('answers_ids', []):
if ans_id in answers:
# 执行深度合并
merged_q = deep_merge(merged_q, answers[ans_id])
merged.append(merged_q)
# 保存结果
with open(output_file, 'w') as f:
json.dump({"merged_data": merged}, f, indent=2, ensure_ascii=False)
# 使用示例
merge_json_files("/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSanswers.json", "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSquestions.json", '/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/merged_output.json')