Spaces:
Running on Zero
Running on Zero
| """Tests for per-parser GT-comparison metrics within a single merged run.""" | |
| from __future__ import annotations | |
| import json | |
| import tempfile | |
| import unittest | |
| from pathlib import Path | |
| from types import SimpleNamespace | |
| from zsgdp.benchmarks.datasets import DatasetDocument, register_dataset_loader | |
| from zsgdp.benchmarks.datasets import _LOADERS as _DATASET_LOADERS | |
| from zsgdp.benchmarks.parser_quality import run_parser_benchmark | |
| from zsgdp.benchmarks.per_parser_metrics import compute_per_parser_metrics | |
| def _parsed_with_candidates(candidates: dict) -> SimpleNamespace: | |
| return SimpleNamespace(provenance={"candidates": candidates}) | |
| class TestComputePerParserMetrics(unittest.TestCase): | |
| def test_returns_one_block_per_parser_with_layout_truths(self): | |
| candidates = { | |
| "docling": { | |
| "elements": [ | |
| {"element_id": "e1", "type": "title", "page_num": 1, "bbox": [0, 0, 100, 30]}, | |
| ], | |
| "tables": [], | |
| "figures": [], | |
| }, | |
| "pymupdf": { | |
| "elements": [ | |
| {"element_id": "e2", "type": "paragraph", "page_num": 1, "bbox": [200, 200, 300, 300]}, | |
| ], | |
| "tables": [], | |
| "figures": [], | |
| }, | |
| } | |
| layout_truths = [{"bbox": (0, 0, 100, 30), "category": "title", "page_num": 1}] | |
| result = compute_per_parser_metrics( | |
| _parsed_with_candidates(candidates), | |
| layout_truths=layout_truths, | |
| ) | |
| self.assertEqual(set(result), {"docling", "pymupdf"}) | |
| self.assertEqual(result["docling"]["layout"]["class_aware_f1"], 1.0) | |
| # PyMuPDF predicted a paragraph far from any truth -> 0 F1. | |
| self.assertEqual(result["pymupdf"]["layout"]["class_aware_f1"], 0.0) | |
| # Element counts surfaced even when the parser scored zero. | |
| self.assertEqual(result["pymupdf"]["element_count"], 1) | |
| def test_omits_metric_block_when_truths_empty(self): | |
| candidates = { | |
| "docling": { | |
| "elements": [{"element_id": "e1", "type": "title", "page_num": 1, "bbox": [0, 0, 10, 10]}], | |
| "tables": [], | |
| "figures": [], | |
| }, | |
| } | |
| result = compute_per_parser_metrics(_parsed_with_candidates(candidates)) | |
| self.assertEqual(set(result["docling"]), {"parser", "element_count", "table_count", "figure_count"}) | |
| def test_table_and_formula_metrics_per_parser(self): | |
| candidates = { | |
| "docling": { | |
| "elements": [ | |
| {"element_id": "f1", "type": "formula", "page_num": 1, "text": "E = mc^2"}, | |
| ], | |
| "tables": [ | |
| {"table_id": "t1", "page_nums": [1], "markdown": "| A | B |\n| --- | --- |\n| 1 | 2 |"}, | |
| ], | |
| "figures": [], | |
| }, | |
| "pymupdf": { | |
| "elements": [ | |
| {"element_id": "f2", "type": "formula", "page_num": 1, "text": "E = mc^9"}, | |
| ], | |
| "tables": [], | |
| "figures": [], | |
| }, | |
| } | |
| table_truths = [{"markdown": "| A | B |\n| --- | --- |\n| 1 | 2 |", "page_num": 1}] | |
| formula_truths = [{"latex": "E = mc^2", "page_num": 1}] | |
| result = compute_per_parser_metrics( | |
| _parsed_with_candidates(candidates), | |
| table_truths=table_truths, | |
| formula_truths=formula_truths, | |
| ) | |
| # Docling matches table and formula exactly. | |
| self.assertEqual(result["docling"]["table_structure"]["mean_table_score"], 1.0) | |
| self.assertEqual(result["docling"]["formula"]["mean_cer"], 0.0) | |
| # PyMuPDF's formula is one char off; table predictions empty. | |
| self.assertGreater(result["pymupdf"]["formula"]["mean_cer"], 0.0) | |
| self.assertEqual(result["pymupdf"]["table_structure"]["matched_pair_count"], 0) | |
| def test_no_candidates_returns_empty_dict(self): | |
| parsed = SimpleNamespace(provenance={"candidates": {}}) | |
| self.assertEqual(compute_per_parser_metrics(parsed, layout_truths=[]), {}) | |
| class TestPipelinePopulatesCandidates(unittest.TestCase): | |
| def test_candidates_serialized_to_provenance(self): | |
| with tempfile.TemporaryDirectory() as tmp: | |
| input_path = Path(tmp) / "doc.md" | |
| input_path.write_text("# Doc\n\nSome content.\n", encoding="utf-8") | |
| from zsgdp.pipeline import parse_document | |
| parsed = parse_document(input_path, Path(tmp) / "out") | |
| candidates = parsed.provenance.get("candidates") | |
| self.assertIsInstance(candidates, dict) | |
| self.assertGreater(len(candidates), 0) | |
| # text parser should be one of the candidates for markdown. | |
| self.assertIn("text", candidates) | |
| self.assertIn("elements", candidates["text"]) | |
| class TestBenchmarkIntegration(unittest.TestCase): | |
| def test_per_parser_csv_emitted_with_omnidocbench_truths(self): | |
| ground_truth = { | |
| "layout_dets": [ | |
| {"category": "title", "bbox": [0, 0, 100, 30], "page_num": 1}, | |
| {"category": "table", "markdown": "| A | B |\n| --- | --- |\n| 1 | 2 |", "page_num": 1}, | |
| ] | |
| } | |
| with tempfile.TemporaryDirectory() as tmp: | |
| tmp = Path(tmp) | |
| src = tmp / "in" | |
| src.mkdir() | |
| md_path = src / "doc.md" | |
| md_path.write_text("# Doc\n\n| A | B |\n| --- | --- |\n| 1 | 2 |\n", encoding="utf-8") | |
| def fake_loader(root: Path): | |
| yield DatasetDocument( | |
| dataset_id="omnidocbench", | |
| doc_id="doc", | |
| path=md_path, | |
| ground_truth=ground_truth, | |
| metadata={}, | |
| ) | |
| register_dataset_loader("omnidocbench", fake_loader) | |
| try: | |
| summary = run_parser_benchmark(src, tmp / "out", dataset_name="omnidocbench") | |
| finally: | |
| from zsgdp.benchmarks.datasets import _load_omnidocbench | |
| _DATASET_LOADERS["omnidocbench"] = _load_omnidocbench | |
| doc = summary["documents"][0] | |
| self.assertIn("per_parser_metrics", doc) | |
| self.assertGreater(len(doc["per_parser_metrics"]), 0) | |
| csv_path = tmp / "out" / "per_parser_metrics.csv" | |
| self.assertTrue(csv_path.exists()) | |
| content = csv_path.read_text() | |
| self.assertIn("parser", content.splitlines()[0]) | |
| self.assertGreater(len(content.splitlines()), 1) | |
| if __name__ == "__main__": | |
| unittest.main() | |