"""Tests for parser-disagreement and repair-success metrics.""" from __future__ import annotations import tempfile import unittest from pathlib import Path from zsgdp.merge.conflict_detection import build_candidate_conflict_report from zsgdp.pipeline import parse_document from zsgdp.schema import DocumentProfile, Element, ParseCandidate, PageProfile, TableObject from zsgdp.verify.parser_disagreement import compute_parser_disagreement from zsgdp.verify.repair_success import compute_repair_success def _profile() -> DocumentProfile: return DocumentProfile( doc_id="d1", source_path="/tmp/d1.md", file_type="markdown", page_count=1, extension=".md", pages=[PageProfile(page_num=1, digital_text_chars=400, digital_text_quality=0.9)], ) def _candidate(name: str, *, text: str, table_count: int = 0) -> ParseCandidate: elements = [ Element( element_id=f"{name}_e1", doc_id="d1", page_num=1, type="paragraph", text=text, reading_order=1, source_parser=name, ) ] tables: list[TableObject] = [] for index in range(table_count): tables.append( TableObject( table_id=f"{name}_t{index + 1}", page_nums=[1], markdown="| A | B |\n| --- | --- |\n| 1 | 2 |", source_parser=name, ) ) return ParseCandidate( parser_name=name, doc_id="d1", source_path="/tmp/d1.md", file_type="markdown", elements=elements, tables=tables, figures=[], pages=[{"page_num": 1, "source_parser": name}], confidence=0.8, ) class TestParserDisagreement(unittest.TestCase): def test_disagreement_rate_uses_pair_count_denominator(self): candidates = [ _candidate("docling", text="A" * 800, table_count=4), _candidate("pymupdf", text="A" * 100, table_count=0), ] report = build_candidate_conflict_report(candidates) parser_metrics = { "docling": {"parser": "docling", "failed": False}, "pymupdf": {"parser": "pymupdf", "failed": False}, } result = compute_parser_disagreement(report, parser_metrics) self.assertEqual(result["candidate_count"], 2) self.assertEqual(result["parser_pair_count"], 1) self.assertGreater(result["conflict_count"], 0) self.assertGreater(result["disagreement_rate"], 0.0) self.assertIn("text_coverage_gap", result["disagreement_by_type"]) self.assertIn("docling|pymupdf", result["disagreement_by_parser_pair"]) def test_disagreement_rate_zero_when_single_parser(self): result = compute_parser_disagreement( {"conflicts": []}, {"docling": {"parser": "docling", "failed": False}}, ) self.assertEqual(result["candidate_count"], 1) self.assertEqual(result["parser_pair_count"], 0) self.assertEqual(result["disagreement_rate"], 0.0) def test_failed_parsers_excluded_from_pair_count(self): result = compute_parser_disagreement( {"conflicts": []}, { "docling": {"parser": "docling", "failed": False}, "marker": {"parser": "marker", "failed": True, "error": "boom"}, "pymupdf": {"parser": "pymupdf", "failed": False}, }, ) self.assertEqual(result["candidate_count"], 2) self.assertEqual(result["parser_pair_count"], 1) class TestRepairSuccess(unittest.TestCase): def test_resolution_rate_when_blocking_issue_resolved(self): pre = {"score": 0.5, "issues": [{"issue_type": "invalid_table", "blocking": True, "page_num": 1, "region_id": "t1"}]} post = {"score": 0.9, "issues": []} history = [{"iteration": 1, "before_score": 0.5, "after_score": 0.9, "actions": [{"action": "repair_table"}]}] result = compute_repair_success(pre, post, history) self.assertEqual(result["pre_repair_blocking_count"], 1) self.assertEqual(result["post_repair_blocking_count"], 0) self.assertEqual(result["resolved_blocking_count"], 1) self.assertEqual(result["repair_resolution_rate"], 1.0) self.assertEqual(result["repair_regression_rate"], 0.0) self.assertEqual(result["iteration_count"], 1) self.assertAlmostEqual(result["score_delta"], 0.4, places=6) def test_regression_rate_counts_new_blocking_issues(self): pre = {"score": 0.7, "issues": [{"issue_type": "invalid_table", "blocking": True, "region_id": "t1"}]} post = { "score": 0.6, "issues": [ {"issue_type": "invalid_table", "blocking": True, "region_id": "t1"}, {"issue_type": "missing_text_coverage", "blocking": True, "page_num": 2}, ], } history = [{"iteration": 1, "before_score": 0.7, "after_score": 0.6, "actions": []}] result = compute_repair_success(pre, post, history) self.assertEqual(result["resolved_blocking_count"], 0) self.assertEqual(result["regressed_blocking_count"], 1) self.assertEqual(result["repair_regression_rate"], 1.0) self.assertEqual(result["repair_resolution_rate"], 0.0) def test_vacuous_success_when_no_pre_repair_blocking_issues(self): result = compute_repair_success( {"score": 1.0, "issues": []}, {"score": 1.0, "issues": []}, [], ) self.assertEqual(result["repair_resolution_rate"], 1.0) self.assertEqual(result["repair_regression_rate"], 0.0) self.assertEqual(result["iteration_count"], 0) class TestRepairSuccessIntegration(unittest.TestCase): def test_pipeline_records_resolution_for_iterative_table_repair(self): with tempfile.TemporaryDirectory() as tmp: input_path = Path(tmp) / "report.md" input_path.write_text( "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 | 3 |\n", encoding="utf-8", ) parsed = parse_document(input_path, Path(tmp) / "out") metrics = parsed.quality_report.metrics self.assertIn("repair_resolution_rate", metrics) self.assertIn("repair_regression_rate", metrics) self.assertIn("parser_disagreement_rate", metrics) success = parsed.provenance["repair_success"] self.assertGreaterEqual(success["pre_repair_issue_count"], 1) self.assertGreaterEqual(success["resolved_any_count"], 1) self.assertGreaterEqual(success["repair_resolution_rate_any"], 0.0) self.assertGreater(success["iteration_count"], 0) if __name__ == "__main__": unittest.main()