Technical-Docs-QA / scripts /analyze_real_qa_errors.py
mokarami's picture
Upload folder using huggingface_hub
529d699 verified
Raw
History Blame Contribute Delete
2.33 kB
from __future__ import annotations
import json
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parents[1]
def classify(example: dict) -> str:
if not example.get("answerable", True) and example.get("no_answer_correct", 0.0) < 1.0:
return "false_positive"
if example.get("answerable", True) and example.get("hit_at_1", 0.0) < 1.0:
return "retrieval_miss"
if example.get("answerable", True) and example.get("f1", 0.0) < 1.0:
return "reader_or_span_issue"
return "ok"
def main() -> None:
report_path = PROJECT_ROOT / "artifacts" / "real_qa" / "reports" / "evaluation_report.json"
output_path = PROJECT_ROOT / "artifacts" / "real_qa" / "reports" / "error_analysis.md"
report = json.loads(report_path.read_text(encoding="utf-8"))
examples = report["examples"]
grouped: dict[str, list[dict]] = {"retrieval_miss": [], "reader_or_span_issue": [], "false_positive": [], "ok": []}
for example in examples:
grouped[classify(example)].append(example)
lines = [
"# Error Analysis",
"",
"## Counts",
"",
f"- Retrieval misses: `{len(grouped['retrieval_miss'])}`",
f"- Reader/span issues: `{len(grouped['reader_or_span_issue'])}`",
f"- False positives: `{len(grouped['false_positive'])}`",
"",
]
for section in ["retrieval_miss", "reader_or_span_issue", "false_positive"]:
lines.extend([f"## {section}", ""])
if not grouped[section]:
lines.append("- None")
lines.append("")
continue
for example in grouped[section][:10]:
top = example["evidence"][0] if example.get("evidence") else {}
lines.extend([
f"### {example['id']}",
f"- Question: {example['question']}",
f"- Prediction: `{example.get('prediction', '')}`",
f"- Hit@1: `{example.get('hit_at_1', 0.0):.4f}`",
f"- F1: `{example.get('f1', 0.0):.4f}`",
f"- Top URL: {top.get('source_url', '')}",
"",
])
output_path.write_text("\n".join(lines).strip() + "\n", encoding="utf-8")
print(json.dumps({"output_path": str(output_path)}, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()