#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Merge per-model raw data and raw result files into data/data_viewer.jsonl. Supports ICBCBench dual-track format (objective / subjective) and legacy DeepResearch Bench format. Expected raw_data/.jsonl fields: id, prompt, article (for subjective), answer (for objective), language, track Expected raw_results//results.jsonl fields: id, score, overall_score, track, language, objective_score / subjective_score / expert_score / citation_score / source_quality_score, confidence, correct """ import json from pathlib import Path EXCLUDED_SLUGS = { "baidu-qianfan-drs-pro", "baidu-qianfan-drs", } def _norm_score(val): """Normalize scores to 0-100 scale.""" if val is None: return None try: val = float(val) except (TypeError, ValueError): return val return val * 100 if val <= 1.0 else val def load_scores_for_model(model_results_dir: Path): """Load per-article scores from results.jsonl (ICBCBench) or raw_results.jsonl (legacy).""" scores_by_id = {} # Try ICBCBench results.jsonl first. results_file = model_results_dir / "results.jsonl" if not results_file.exists(): results_file = model_results_dir / "raw_results.jsonl" if not results_file.exists(): print(f" 警告: 未找到模型 {model_results_dir.name} 的结果文件") return scores_by_id print(f" 正在从 {model_results_dir.name}/{results_file.name} 加载分数...") with open(results_file, 'r', encoding='utf-8') as f: for i, line in enumerate(f): try: data = json.loads(line.strip()) article_id = str(data.get('id')) if not article_id: continue scores = { 'overall_score': _norm_score(data.get('overall_score', data.get('score'))), 'track': data.get('track', 'subjective'), 'objective_score': _norm_score(data.get('objective_score')), 'subjective_score': _norm_score(data.get('subjective_score')), 'expert_score': _norm_score(data.get('expert_score')), 'citation_score': _norm_score(data.get('citation_score')), 'source_quality_score': _norm_score(data.get('source_quality_score')), 'confidence': _norm_score(data.get('confidence')), 'correct': data.get('correct'), # Legacy fields 'comprehensiveness_score': _norm_score(data.get('comprehensiveness')), 'insight_score': _norm_score(data.get('insight')), 'instruction_following_score': _norm_score(data.get('instruction_following')), 'readability_score': _norm_score(data.get('readability')), } scores_by_id[article_id] = scores except json.JSONDecodeError as e: print(f" 错误: 解析JSON时出错 ({model_results_dir.name}, 行 {i+1}): {e}") except Exception as e: print(f" 错误: 处理数据时出错 ({model_results_dir.name}, 行 {i+1}): {e}") print(f" 为模型 {model_results_dir.name} 加载了 {len(scores_by_id)} 条结果") return scores_by_id def merge_jsonl_files(): project_root = Path(__file__).resolve().parent.parent raw_data_dir = project_root / "data" / "raw_data" raw_results_dir = project_root / "data" / "raw_results" output_file = project_root / "data" / "data_viewer.jsonl" input_files = list(raw_data_dir.glob("*.jsonl")) print(f"在 {raw_data_dir} 中找到 {len(input_files)} 个模型JSONL文件") if not input_files: print("未找到任何原始数据文件，已退出。") return all_merged_data = [] for raw_data_file in input_files: model_name = raw_data_file.stem if model_name in EXCLUDED_SLUGS: print(f"跳过隐藏模型: {model_name}") continue print(f"正在处理原始数据文件: {raw_data_file.name} (模型: {model_name})") model_results_dir = raw_results_dir / model_name if not model_results_dir.exists(): print(f" 警告: 未找到模型 {model_name} 对应的结果文件夹: {model_results_dir}") continue scores_for_current_model = load_scores_for_model(model_results_dir) processed_count = 0 with open(raw_data_file, 'r', encoding='utf-8') as f_raw: for i, line in enumerate(f_raw): try: article_data = json.loads(line.strip()) article_id = str(article_data.get('id')) if not article_id: continue article_scores = scores_for_current_model.get(article_id, {}) merged_item = { 'model_name': model_name, 'id': article_id, 'prompt': article_data.get('prompt'), 'article': article_data.get('article'), 'track': article_data.get('track') or article_scores.get('track') or 'subjective', 'language': article_data.get('language') or 'en', 'overall_score': article_scores.get('overall_score'), 'objective_score': article_scores.get('objective_score'), 'subjective_score': article_scores.get('subjective_score'), 'expert_score': article_scores.get('expert_score'), 'citation_score': article_scores.get('citation_score'), 'source_quality_score': article_scores.get('source_quality_score'), 'confidence': article_scores.get('confidence'), 'correct': article_scores.get('correct'), 'comprehensiveness_score': article_scores.get('comprehensiveness_score'), 'insight_score': article_scores.get('insight_score'), 'instruction_following_score': article_scores.get('instruction_following_score'), 'readability_score': article_scores.get('readability_score'), } all_merged_data.append(merged_item) processed_count += 1 except json.JSONDecodeError as e: print(f" 错误: 解析原始数据JSON时出错 ({raw_data_file.name}, 行 {i+1}): {e}") except Exception as e: print(f" 错误: 处理原始数据时出错 ({raw_data_file.name}, 行 {i+1}): {e}") print(f" 为模型 {model_name} 处理了 {processed_count} 条数据。") with open(output_file, 'w', encoding='utf-8') as f_out: for item in all_merged_data: f_out.write(json.dumps(item, ensure_ascii=False) + '\n') print(f"\n成功合并并保存到: {output_file}, 共 {len(all_merged_data)} 条记录") if __name__ == "__main__": merge_jsonl_files() print("所有文件处理完成！")