File size: 2,180 Bytes
1c980b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
'''
这个程序是需要先运行的一个程序它的作用是将哦answers和of questions合并为一个JSON文件便于去转换
使用方法很简单看程序最后的使用示例,只需填入两个文件的地址然后给定输出地址就可以
'''

import json
from collections import defaultdict

def deep_merge(base_dict, merge_dict):
    """递归合并字典,处理嵌套结构和冲突"""
    for key in merge_dict:
        if key in base_dict:
            # 处理字典类型合并
            if isinstance(base_dict[key], dict) and isinstance(merge_dict[key], dict):
                deep_merge(base_dict[key], merge_dict[key])
            # 处理数组类型合并(保留原数组)
            elif isinstance(base_dict[key], list) and isinstance(merge_dict[key], list):
                base_dict[key] = base_dict[key] + merge_dict[key]
            # 处理其他类型冲突(保留原始值)
            else:
                pass  # 保持base_dict原有值
        else:
            # 新增不存在字段
            base_dict[key] = merge_dict[key]
    return base_dict

def merge_json_files(answers_file, questions_file, output_file):
    # 加载数据并建立索引
    with open(answers_file) as f:
        answers = {item['id']: item for item in json.load(f)['answers']}
    
    with open(questions_file) as f:
        questions = json.load(f)['questions']
    
    # 智能合并处理
    merged = []
    for q in questions:
        merged_q = q.copy()
        # 处理answers_ids关联
        for ans_id in q.get('answers_ids', []):
            if ans_id in answers:
                # 执行深度合并
                merged_q = deep_merge(merged_q, answers[ans_id])
        merged.append(merged_q)
    
    # 保存结果
    with open(output_file, 'w') as f:
        json.dump({"merged_data": merged}, f, indent=2, ensure_ascii=False)

# 使用示例
merge_json_files("/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSanswers.json", "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSquestions.json", '/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/merged_output.json')