Spaces:
Running on Zero
Running on Zero
| """Tests for parser-disagreement and repair-success metrics.""" | |
| from __future__ import annotations | |
| import tempfile | |
| import unittest | |
| from pathlib import Path | |
| from zsgdp.merge.conflict_detection import build_candidate_conflict_report | |
| from zsgdp.pipeline import parse_document | |
| from zsgdp.schema import DocumentProfile, Element, ParseCandidate, PageProfile, TableObject | |
| from zsgdp.verify.parser_disagreement import compute_parser_disagreement | |
| from zsgdp.verify.repair_success import compute_repair_success | |
| def _profile() -> DocumentProfile: | |
| return DocumentProfile( | |
| doc_id="d1", | |
| source_path="/tmp/d1.md", | |
| file_type="markdown", | |
| page_count=1, | |
| extension=".md", | |
| pages=[PageProfile(page_num=1, digital_text_chars=400, digital_text_quality=0.9)], | |
| ) | |
| def _candidate(name: str, *, text: str, table_count: int = 0) -> ParseCandidate: | |
| elements = [ | |
| Element( | |
| element_id=f"{name}_e1", | |
| doc_id="d1", | |
| page_num=1, | |
| type="paragraph", | |
| text=text, | |
| reading_order=1, | |
| source_parser=name, | |
| ) | |
| ] | |
| tables: list[TableObject] = [] | |
| for index in range(table_count): | |
| tables.append( | |
| TableObject( | |
| table_id=f"{name}_t{index + 1}", | |
| page_nums=[1], | |
| markdown="| A | B |\n| --- | --- |\n| 1 | 2 |", | |
| source_parser=name, | |
| ) | |
| ) | |
| return ParseCandidate( | |
| parser_name=name, | |
| doc_id="d1", | |
| source_path="/tmp/d1.md", | |
| file_type="markdown", | |
| elements=elements, | |
| tables=tables, | |
| figures=[], | |
| pages=[{"page_num": 1, "source_parser": name}], | |
| confidence=0.8, | |
| ) | |
| class TestParserDisagreement(unittest.TestCase): | |
| def test_disagreement_rate_uses_pair_count_denominator(self): | |
| candidates = [ | |
| _candidate("docling", text="A" * 800, table_count=4), | |
| _candidate("pymupdf", text="A" * 100, table_count=0), | |
| ] | |
| report = build_candidate_conflict_report(candidates) | |
| parser_metrics = { | |
| "docling": {"parser": "docling", "failed": False}, | |
| "pymupdf": {"parser": "pymupdf", "failed": False}, | |
| } | |
| result = compute_parser_disagreement(report, parser_metrics) | |
| self.assertEqual(result["candidate_count"], 2) | |
| self.assertEqual(result["parser_pair_count"], 1) | |
| self.assertGreater(result["conflict_count"], 0) | |
| self.assertGreater(result["disagreement_rate"], 0.0) | |
| self.assertIn("text_coverage_gap", result["disagreement_by_type"]) | |
| self.assertIn("docling|pymupdf", result["disagreement_by_parser_pair"]) | |
| def test_disagreement_rate_zero_when_single_parser(self): | |
| result = compute_parser_disagreement( | |
| {"conflicts": []}, | |
| {"docling": {"parser": "docling", "failed": False}}, | |
| ) | |
| self.assertEqual(result["candidate_count"], 1) | |
| self.assertEqual(result["parser_pair_count"], 0) | |
| self.assertEqual(result["disagreement_rate"], 0.0) | |
| def test_failed_parsers_excluded_from_pair_count(self): | |
| result = compute_parser_disagreement( | |
| {"conflicts": []}, | |
| { | |
| "docling": {"parser": "docling", "failed": False}, | |
| "marker": {"parser": "marker", "failed": True, "error": "boom"}, | |
| "pymupdf": {"parser": "pymupdf", "failed": False}, | |
| }, | |
| ) | |
| self.assertEqual(result["candidate_count"], 2) | |
| self.assertEqual(result["parser_pair_count"], 1) | |
| class TestRepairSuccess(unittest.TestCase): | |
| def test_resolution_rate_when_blocking_issue_resolved(self): | |
| pre = {"score": 0.5, "issues": [{"issue_type": "invalid_table", "blocking": True, "page_num": 1, "region_id": "t1"}]} | |
| post = {"score": 0.9, "issues": []} | |
| history = [{"iteration": 1, "before_score": 0.5, "after_score": 0.9, "actions": [{"action": "repair_table"}]}] | |
| result = compute_repair_success(pre, post, history) | |
| self.assertEqual(result["pre_repair_blocking_count"], 1) | |
| self.assertEqual(result["post_repair_blocking_count"], 0) | |
| self.assertEqual(result["resolved_blocking_count"], 1) | |
| self.assertEqual(result["repair_resolution_rate"], 1.0) | |
| self.assertEqual(result["repair_regression_rate"], 0.0) | |
| self.assertEqual(result["iteration_count"], 1) | |
| self.assertAlmostEqual(result["score_delta"], 0.4, places=6) | |
| def test_regression_rate_counts_new_blocking_issues(self): | |
| pre = {"score": 0.7, "issues": [{"issue_type": "invalid_table", "blocking": True, "region_id": "t1"}]} | |
| post = { | |
| "score": 0.6, | |
| "issues": [ | |
| {"issue_type": "invalid_table", "blocking": True, "region_id": "t1"}, | |
| {"issue_type": "missing_text_coverage", "blocking": True, "page_num": 2}, | |
| ], | |
| } | |
| history = [{"iteration": 1, "before_score": 0.7, "after_score": 0.6, "actions": []}] | |
| result = compute_repair_success(pre, post, history) | |
| self.assertEqual(result["resolved_blocking_count"], 0) | |
| self.assertEqual(result["regressed_blocking_count"], 1) | |
| self.assertEqual(result["repair_regression_rate"], 1.0) | |
| self.assertEqual(result["repair_resolution_rate"], 0.0) | |
| def test_vacuous_success_when_no_pre_repair_blocking_issues(self): | |
| result = compute_repair_success( | |
| {"score": 1.0, "issues": []}, | |
| {"score": 1.0, "issues": []}, | |
| [], | |
| ) | |
| self.assertEqual(result["repair_resolution_rate"], 1.0) | |
| self.assertEqual(result["repair_regression_rate"], 0.0) | |
| self.assertEqual(result["iteration_count"], 0) | |
| class TestRepairSuccessIntegration(unittest.TestCase): | |
| def test_pipeline_records_resolution_for_iterative_table_repair(self): | |
| with tempfile.TemporaryDirectory() as tmp: | |
| input_path = Path(tmp) / "report.md" | |
| input_path.write_text( | |
| "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 | 3 |\n", | |
| encoding="utf-8", | |
| ) | |
| parsed = parse_document(input_path, Path(tmp) / "out") | |
| metrics = parsed.quality_report.metrics | |
| self.assertIn("repair_resolution_rate", metrics) | |
| self.assertIn("repair_regression_rate", metrics) | |
| self.assertIn("parser_disagreement_rate", metrics) | |
| success = parsed.provenance["repair_success"] | |
| self.assertGreaterEqual(success["pre_repair_issue_count"], 1) | |
| self.assertGreaterEqual(success["resolved_any_count"], 1) | |
| self.assertGreaterEqual(success["repair_resolution_rate_any"], 0.0) | |
| self.assertGreater(success["iteration_count"], 0) | |
| if __name__ == "__main__": | |
| unittest.main() | |