Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import sys | |
| from pathlib import Path | |
| PROJECT_ROOT = Path(__file__).resolve().parents[1] | |
| def classify(example: dict) -> str: | |
| if not example.get("answerable", True) and example.get("no_answer_correct", 0.0) < 1.0: | |
| return "false_positive" | |
| if example.get("answerable", True) and example.get("hit_at_1", 0.0) < 1.0: | |
| return "retrieval_miss" | |
| if example.get("answerable", True) and example.get("f1", 0.0) < 1.0: | |
| return "reader_or_span_issue" | |
| return "ok" | |
| def main() -> None: | |
| report_path = PROJECT_ROOT / "artifacts" / "real_qa" / "reports" / "evaluation_report.json" | |
| output_path = PROJECT_ROOT / "artifacts" / "real_qa" / "reports" / "error_analysis.md" | |
| report = json.loads(report_path.read_text(encoding="utf-8")) | |
| examples = report["examples"] | |
| grouped: dict[str, list[dict]] = {"retrieval_miss": [], "reader_or_span_issue": [], "false_positive": [], "ok": []} | |
| for example in examples: | |
| grouped[classify(example)].append(example) | |
| lines = [ | |
| "# Error Analysis", | |
| "", | |
| "## Counts", | |
| "", | |
| f"- Retrieval misses: `{len(grouped['retrieval_miss'])}`", | |
| f"- Reader/span issues: `{len(grouped['reader_or_span_issue'])}`", | |
| f"- False positives: `{len(grouped['false_positive'])}`", | |
| "", | |
| ] | |
| for section in ["retrieval_miss", "reader_or_span_issue", "false_positive"]: | |
| lines.extend([f"## {section}", ""]) | |
| if not grouped[section]: | |
| lines.append("- None") | |
| lines.append("") | |
| continue | |
| for example in grouped[section][:10]: | |
| top = example["evidence"][0] if example.get("evidence") else {} | |
| lines.extend([ | |
| f"### {example['id']}", | |
| f"- Question: {example['question']}", | |
| f"- Prediction: `{example.get('prediction', '')}`", | |
| f"- Hit@1: `{example.get('hit_at_1', 0.0):.4f}`", | |
| f"- F1: `{example.get('f1', 0.0):.4f}`", | |
| f"- Top URL: {top.get('source_url', '')}", | |
| "", | |
| ]) | |
| output_path.write_text("\n".join(lines).strip() + "\n", encoding="utf-8") | |
| print(json.dumps({"output_path": str(output_path)}, indent=2, ensure_ascii=False)) | |
| if __name__ == "__main__": | |
| main() | |