|
|
''' |
|
|
这个程序是需要先运行的一个程序它的作用是将哦answers和of questions合并为一个JSON文件便于去转换 |
|
|
使用方法很简单看程序最后的使用示例,只需填入两个文件的地址然后给定输出地址就可以 |
|
|
''' |
|
|
|
|
|
import json |
|
|
from collections import defaultdict |
|
|
|
|
|
def deep_merge(base_dict, merge_dict): |
|
|
"""递归合并字典,处理嵌套结构和冲突""" |
|
|
for key in merge_dict: |
|
|
if key in base_dict: |
|
|
|
|
|
if isinstance(base_dict[key], dict) and isinstance(merge_dict[key], dict): |
|
|
deep_merge(base_dict[key], merge_dict[key]) |
|
|
|
|
|
elif isinstance(base_dict[key], list) and isinstance(merge_dict[key], list): |
|
|
base_dict[key] = base_dict[key] + merge_dict[key] |
|
|
|
|
|
else: |
|
|
pass |
|
|
else: |
|
|
|
|
|
base_dict[key] = merge_dict[key] |
|
|
return base_dict |
|
|
|
|
|
def merge_json_files(answers_file, questions_file, output_file): |
|
|
|
|
|
with open(answers_file) as f: |
|
|
answers = {item['id']: item for item in json.load(f)['answers']} |
|
|
|
|
|
with open(questions_file) as f: |
|
|
questions = json.load(f)['questions'] |
|
|
|
|
|
|
|
|
merged = [] |
|
|
for q in questions: |
|
|
merged_q = q.copy() |
|
|
|
|
|
for ans_id in q.get('answers_ids', []): |
|
|
if ans_id in answers: |
|
|
|
|
|
merged_q = deep_merge(merged_q, answers[ans_id]) |
|
|
merged.append(merged_q) |
|
|
|
|
|
|
|
|
with open(output_file, 'w') as f: |
|
|
json.dump({"merged_data": merged}, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
|
|
|
merge_json_files("/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSanswers.json", "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSquestions.json", '/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/merged_output.json') |