''' 这个程序是需要先运行的一个程序它的作用是将哦answers和of questions合并为一个JSON文件便于去转换 使用方法很简单看程序最后的使用示例,只需填入两个文件的地址然后给定输出地址就可以 ''' import json from collections import defaultdict def deep_merge(base_dict, merge_dict): """递归合并字典,处理嵌套结构和冲突""" for key in merge_dict: if key in base_dict: # 处理字典类型合并 if isinstance(base_dict[key], dict) and isinstance(merge_dict[key], dict): deep_merge(base_dict[key], merge_dict[key]) # 处理数组类型合并(保留原数组) elif isinstance(base_dict[key], list) and isinstance(merge_dict[key], list): base_dict[key] = base_dict[key] + merge_dict[key] # 处理其他类型冲突(保留原始值) else: pass # 保持base_dict原有值 else: # 新增不存在字段 base_dict[key] = merge_dict[key] return base_dict def merge_json_files(answers_file, questions_file, output_file): # 加载数据并建立索引 with open(answers_file) as f: answers = {item['id']: item for item in json.load(f)['answers']} with open(questions_file) as f: questions = json.load(f)['questions'] # 智能合并处理 merged = [] for q in questions: merged_q = q.copy() # 处理answers_ids关联 for ans_id in q.get('answers_ids', []): if ans_id in answers: # 执行深度合并 merged_q = deep_merge(merged_q, answers[ans_id]) merged.append(merged_q) # 保存结果 with open(output_file, 'w') as f: json.dump({"merged_data": merged}, f, indent=2, ensure_ascii=False) # 使用示例 merge_json_files("/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSanswers.json", "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSquestions.json", '/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/merged_output.json')