| |
| |
| """ |
| Merge per-model raw data and raw result files into data/data_viewer.jsonl. |
| Supports ICBCBench dual-track format (objective / subjective) and legacy |
| DeepResearch Bench format. |
| |
| Expected raw_data/<model>.jsonl fields: |
| id, prompt, article (for subjective), answer (for objective), language, track |
| |
| Expected raw_results/<model>/results.jsonl fields: |
| id, score, overall_score, track, language, |
| objective_score / subjective_score / expert_score / citation_score / source_quality_score, |
| confidence, correct |
| """ |
|
|
| import json |
| from pathlib import Path |
|
|
| EXCLUDED_SLUGS = { |
| "baidu-qianfan-drs-pro", |
| "baidu-qianfan-drs", |
| } |
|
|
|
|
| def _norm_score(val): |
| """Normalize scores to 0-100 scale.""" |
| if val is None: |
| return None |
| try: |
| val = float(val) |
| except (TypeError, ValueError): |
| return val |
| return val * 100 if val <= 1.0 else val |
|
|
|
|
| def load_scores_for_model(model_results_dir: Path): |
| """Load per-article scores from results.jsonl (ICBCBench) or raw_results.jsonl (legacy).""" |
| scores_by_id = {} |
|
|
| |
| results_file = model_results_dir / "results.jsonl" |
| if not results_file.exists(): |
| results_file = model_results_dir / "raw_results.jsonl" |
|
|
| if not results_file.exists(): |
| print(f" 警告: 未找到模型 {model_results_dir.name} 的结果文件") |
| return scores_by_id |
|
|
| print(f" 正在从 {model_results_dir.name}/{results_file.name} 加载分数...") |
| with open(results_file, 'r', encoding='utf-8') as f: |
| for i, line in enumerate(f): |
| try: |
| data = json.loads(line.strip()) |
| article_id = str(data.get('id')) |
| if not article_id: |
| continue |
|
|
| scores = { |
| 'overall_score': _norm_score(data.get('overall_score', data.get('score'))), |
| 'track': data.get('track', 'subjective'), |
| 'objective_score': _norm_score(data.get('objective_score')), |
| 'subjective_score': _norm_score(data.get('subjective_score')), |
| 'expert_score': _norm_score(data.get('expert_score')), |
| 'citation_score': _norm_score(data.get('citation_score')), |
| 'source_quality_score': _norm_score(data.get('source_quality_score')), |
| 'confidence': _norm_score(data.get('confidence')), |
| 'correct': data.get('correct'), |
| |
| 'comprehensiveness_score': _norm_score(data.get('comprehensiveness')), |
| 'insight_score': _norm_score(data.get('insight')), |
| 'instruction_following_score': _norm_score(data.get('instruction_following')), |
| 'readability_score': _norm_score(data.get('readability')), |
| } |
| scores_by_id[article_id] = scores |
| except json.JSONDecodeError as e: |
| print(f" 错误: 解析JSON时出错 ({model_results_dir.name}, 行 {i+1}): {e}") |
| except Exception as e: |
| print(f" 错误: 处理数据时出错 ({model_results_dir.name}, 行 {i+1}): {e}") |
|
|
| print(f" 为模型 {model_results_dir.name} 加载了 {len(scores_by_id)} 条结果") |
| return scores_by_id |
|
|
|
|
| def merge_jsonl_files(): |
| project_root = Path(__file__).resolve().parent.parent |
| raw_data_dir = project_root / "data" / "raw_data" |
| raw_results_dir = project_root / "data" / "raw_results" |
| output_file = project_root / "data" / "data_viewer.jsonl" |
|
|
| input_files = list(raw_data_dir.glob("*.jsonl")) |
| print(f"在 {raw_data_dir} 中找到 {len(input_files)} 个模型JSONL文件") |
|
|
| if not input_files: |
| print("未找到任何原始数据文件,已退出。") |
| return |
|
|
| all_merged_data = [] |
|
|
| for raw_data_file in input_files: |
| model_name = raw_data_file.stem |
| if model_name in EXCLUDED_SLUGS: |
| print(f"跳过隐藏模型: {model_name}") |
| continue |
| print(f"正在处理原始数据文件: {raw_data_file.name} (模型: {model_name})") |
|
|
| model_results_dir = raw_results_dir / model_name |
| if not model_results_dir.exists(): |
| print(f" 警告: 未找到模型 {model_name} 对应的结果文件夹: {model_results_dir}") |
| continue |
|
|
| scores_for_current_model = load_scores_for_model(model_results_dir) |
|
|
| processed_count = 0 |
| with open(raw_data_file, 'r', encoding='utf-8') as f_raw: |
| for i, line in enumerate(f_raw): |
| try: |
| article_data = json.loads(line.strip()) |
| article_id = str(article_data.get('id')) |
| if not article_id: |
| continue |
|
|
| article_scores = scores_for_current_model.get(article_id, {}) |
|
|
| merged_item = { |
| 'model_name': model_name, |
| 'id': article_id, |
| 'prompt': article_data.get('prompt'), |
| 'article': article_data.get('article'), |
| 'track': article_data.get('track') or article_scores.get('track') or 'subjective', |
| 'language': article_data.get('language') or 'en', |
| 'overall_score': article_scores.get('overall_score'), |
| 'objective_score': article_scores.get('objective_score'), |
| 'subjective_score': article_scores.get('subjective_score'), |
| 'expert_score': article_scores.get('expert_score'), |
| 'citation_score': article_scores.get('citation_score'), |
| 'source_quality_score': article_scores.get('source_quality_score'), |
| 'confidence': article_scores.get('confidence'), |
| 'correct': article_scores.get('correct'), |
| 'comprehensiveness_score': article_scores.get('comprehensiveness_score'), |
| 'insight_score': article_scores.get('insight_score'), |
| 'instruction_following_score': article_scores.get('instruction_following_score'), |
| 'readability_score': article_scores.get('readability_score'), |
| } |
| all_merged_data.append(merged_item) |
| processed_count += 1 |
| except json.JSONDecodeError as e: |
| print(f" 错误: 解析原始数据JSON时出错 ({raw_data_file.name}, 行 {i+1}): {e}") |
| except Exception as e: |
| print(f" 错误: 处理原始数据时出错 ({raw_data_file.name}, 行 {i+1}): {e}") |
|
|
| print(f" 为模型 {model_name} 处理了 {processed_count} 条数据。") |
|
|
| with open(output_file, 'w', encoding='utf-8') as f_out: |
| for item in all_merged_data: |
| f_out.write(json.dumps(item, ensure_ascii=False) + '\n') |
|
|
| print(f"\n成功合并并保存到: {output_file}, 共 {len(all_merged_data)} 条记录") |
|
|
|
|
| if __name__ == "__main__": |
| merge_jsonl_files() |
| print("所有文件处理完成!") |
|
|