ICBCBench-Leaderboard

Running

App Files Files Community

ICBCBench-Leaderboard / utils /merge_raw_data.py

Leonnel1220

Upload folder using huggingface_hub

5148820 verified 1 day ago

Raw

History Blame Contribute Delete

7.23 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Merge per-model raw data and raw result files into data/data_viewer.jsonl.
	Supports ICBCBench dual-track format (objective / subjective) and legacy
	DeepResearch Bench format.

	Expected raw_data/<model>.jsonl fields:
	id, prompt, article (for subjective), answer (for objective), language, track

	Expected raw_results/<model>/results.jsonl fields:
	id, score, overall_score, track, language,
	objective_score / subjective_score / expert_score / citation_score / source_quality_score,
	confidence, correct
	"""

	import json
	from pathlib import Path

	EXCLUDED_SLUGS = {
	"baidu-qianfan-drs-pro",
	"baidu-qianfan-drs",
	}


	def _norm_score(val):
	"""Normalize scores to 0-100 scale."""
	if val is None:
	return None
	try:
	val = float(val)
	except (TypeError, ValueError):
	return val
	return val * 100 if val <= 1.0 else val


	def load_scores_for_model(model_results_dir: Path):
	"""Load per-article scores from results.jsonl (ICBCBench) or raw_results.jsonl (legacy)."""
	scores_by_id = {}

	# Try ICBCBench results.jsonl first.
	results_file = model_results_dir / "results.jsonl"
	if not results_file.exists():
	results_file = model_results_dir / "raw_results.jsonl"

	if not results_file.exists():
	print(f" 警告: 未找到模型 {model_results_dir.name} 的结果文件")
	return scores_by_id

	print(f" 正在从 {model_results_dir.name}/{results_file.name} 加载分数...")
	with open(results_file, 'r', encoding='utf-8') as f:
	for i, line in enumerate(f):
	try:
	data = json.loads(line.strip())
	article_id = str(data.get('id'))
	if not article_id:
	continue

	scores = {
	'overall_score': _norm_score(data.get('overall_score', data.get('score'))),
	'track': data.get('track', 'subjective'),
	'objective_score': _norm_score(data.get('objective_score')),
	'subjective_score': _norm_score(data.get('subjective_score')),
	'expert_score': _norm_score(data.get('expert_score')),
	'citation_score': _norm_score(data.get('citation_score')),
	'source_quality_score': _norm_score(data.get('source_quality_score')),
	'confidence': _norm_score(data.get('confidence')),
	'correct': data.get('correct'),
	# Legacy fields
	'comprehensiveness_score': _norm_score(data.get('comprehensiveness')),
	'insight_score': _norm_score(data.get('insight')),
	'instruction_following_score': _norm_score(data.get('instruction_following')),
	'readability_score': _norm_score(data.get('readability')),
	}
	scores_by_id[article_id] = scores
	except json.JSONDecodeError as e:
	print(f" 错误: 解析JSON时出错 ({model_results_dir.name}, 行 {i+1}): {e}")
	except Exception as e:
	print(f" 错误: 处理数据时出错 ({model_results_dir.name}, 行 {i+1}): {e}")

	print(f" 为模型 {model_results_dir.name} 加载了 {len(scores_by_id)} 条结果")
	return scores_by_id


	def merge_jsonl_files():
	project_root = Path(__file__).resolve().parent.parent
	raw_data_dir = project_root / "data" / "raw_data"
	raw_results_dir = project_root / "data" / "raw_results"
	output_file = project_root / "data" / "data_viewer.jsonl"

	input_files = list(raw_data_dir.glob("*.jsonl"))
	print(f"在 {raw_data_dir} 中找到 {len(input_files)} 个模型JSONL文件")

	if not input_files:
	print("未找到任何原始数据文件，已退出。")
	return

	all_merged_data = []

	for raw_data_file in input_files:
	model_name = raw_data_file.stem
	if model_name in EXCLUDED_SLUGS:
	print(f"跳过隐藏模型: {model_name}")
	continue
	print(f"正在处理原始数据文件: {raw_data_file.name} (模型: {model_name})")

	model_results_dir = raw_results_dir / model_name
	if not model_results_dir.exists():
	print(f" 警告: 未找到模型 {model_name} 对应的结果文件夹: {model_results_dir}")
	continue

	scores_for_current_model = load_scores_for_model(model_results_dir)

	processed_count = 0
	with open(raw_data_file, 'r', encoding='utf-8') as f_raw:
	for i, line in enumerate(f_raw):
	try:
	article_data = json.loads(line.strip())
	article_id = str(article_data.get('id'))
	if not article_id:
	continue

	article_scores = scores_for_current_model.get(article_id, {})

	merged_item = {
	'model_name': model_name,
	'id': article_id,
	'prompt': article_data.get('prompt'),
	'article': article_data.get('article'),
	'track': article_data.get('track') or article_scores.get('track') or 'subjective',
	'language': article_data.get('language') or 'en',
	'overall_score': article_scores.get('overall_score'),
	'objective_score': article_scores.get('objective_score'),
	'subjective_score': article_scores.get('subjective_score'),
	'expert_score': article_scores.get('expert_score'),
	'citation_score': article_scores.get('citation_score'),
	'source_quality_score': article_scores.get('source_quality_score'),
	'confidence': article_scores.get('confidence'),
	'correct': article_scores.get('correct'),
	'comprehensiveness_score': article_scores.get('comprehensiveness_score'),
	'insight_score': article_scores.get('insight_score'),
	'instruction_following_score': article_scores.get('instruction_following_score'),
	'readability_score': article_scores.get('readability_score'),
	}
	all_merged_data.append(merged_item)
	processed_count += 1
	except json.JSONDecodeError as e:
	print(f" 错误: 解析原始数据JSON时出错 ({raw_data_file.name}, 行 {i+1}): {e}")
	except Exception as e:
	print(f" 错误: 处理原始数据时出错 ({raw_data_file.name}, 行 {i+1}): {e}")

	print(f" 为模型 {model_name} 处理了 {processed_count} 条数据。")

	with open(output_file, 'w', encoding='utf-8') as f_out:
	for item in all_merged_data:
	f_out.write(json.dumps(item, ensure_ascii=False) + '\n')

	print(f"\n成功合并并保存到: {output_file}, 共 {len(all_merged_data)} 条记录")


	if __name__ == "__main__":
	merge_jsonl_files()
	print("所有文件处理完成！")