Adinosaur
/

tools

Model card Files Files and versions

tools / utils /json /RS_merge.py

Adinosaur's picture

Upload folder using huggingface_hub

1c980b1 verified 9 months ago

history blame contribute delete

2.18 kB

	'''
	这个程序是需要先运行的一个程序它的作用是将哦answers和of questions合并为一个JSON文件便于去转换
	使用方法很简单看程序最后的使用示例，只需填入两个文件的地址然后给定输出地址就可以
	'''

	import json
	from collections import defaultdict

	def deep_merge(base_dict, merge_dict):
	"""递归合并字典，处理嵌套结构和冲突"""
	for key in merge_dict:
	if key in base_dict:
	# 处理字典类型合并
	if isinstance(base_dict[key], dict) and isinstance(merge_dict[key], dict):
	deep_merge(base_dict[key], merge_dict[key])
	# 处理数组类型合并（保留原数组）
	elif isinstance(base_dict[key], list) and isinstance(merge_dict[key], list):
	base_dict[key] = base_dict[key] + merge_dict[key]
	# 处理其他类型冲突（保留原始值）
	else:
	pass # 保持base_dict原有值
	else:
	# 新增不存在字段
	base_dict[key] = merge_dict[key]
	return base_dict

	def merge_json_files(answers_file, questions_file, output_file):
	# 加载数据并建立索引
	with open(answers_file) as f:
	answers = {item['id']: item for item in json.load(f)['answers']}

	with open(questions_file) as f:
	questions = json.load(f)['questions']

	# 智能合并处理
	merged = []
	for q in questions:
	merged_q = q.copy()
	# 处理answers_ids关联
	for ans_id in q.get('answers_ids', []):
	if ans_id in answers:
	# 执行深度合并
	merged_q = deep_merge(merged_q, answers[ans_id])
	merged.append(merged_q)

	# 保存结果
	with open(output_file, 'w') as f:
	json.dump({"merged_data": merged}, f, indent=2, ensure_ascii=False)

	# 使用示例
	merge_json_files("/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSanswers.json", "/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/USGSquestions.json", '/mnt/data/users/zys/proj/vlm_reasoning/unprocessed_data/Satellite/merged_output.json')