Spaces:
Running on Zero
Running on Zero
File size: 6,672 Bytes
db06ffa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | """Tests for per-parser GT-comparison metrics within a single merged run."""
from __future__ import annotations
import json
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from zsgdp.benchmarks.datasets import DatasetDocument, register_dataset_loader
from zsgdp.benchmarks.datasets import _LOADERS as _DATASET_LOADERS
from zsgdp.benchmarks.parser_quality import run_parser_benchmark
from zsgdp.benchmarks.per_parser_metrics import compute_per_parser_metrics
def _parsed_with_candidates(candidates: dict) -> SimpleNamespace:
return SimpleNamespace(provenance={"candidates": candidates})
class TestComputePerParserMetrics(unittest.TestCase):
def test_returns_one_block_per_parser_with_layout_truths(self):
candidates = {
"docling": {
"elements": [
{"element_id": "e1", "type": "title", "page_num": 1, "bbox": [0, 0, 100, 30]},
],
"tables": [],
"figures": [],
},
"pymupdf": {
"elements": [
{"element_id": "e2", "type": "paragraph", "page_num": 1, "bbox": [200, 200, 300, 300]},
],
"tables": [],
"figures": [],
},
}
layout_truths = [{"bbox": (0, 0, 100, 30), "category": "title", "page_num": 1}]
result = compute_per_parser_metrics(
_parsed_with_candidates(candidates),
layout_truths=layout_truths,
)
self.assertEqual(set(result), {"docling", "pymupdf"})
self.assertEqual(result["docling"]["layout"]["class_aware_f1"], 1.0)
# PyMuPDF predicted a paragraph far from any truth -> 0 F1.
self.assertEqual(result["pymupdf"]["layout"]["class_aware_f1"], 0.0)
# Element counts surfaced even when the parser scored zero.
self.assertEqual(result["pymupdf"]["element_count"], 1)
def test_omits_metric_block_when_truths_empty(self):
candidates = {
"docling": {
"elements": [{"element_id": "e1", "type": "title", "page_num": 1, "bbox": [0, 0, 10, 10]}],
"tables": [],
"figures": [],
},
}
result = compute_per_parser_metrics(_parsed_with_candidates(candidates))
self.assertEqual(set(result["docling"]), {"parser", "element_count", "table_count", "figure_count"})
def test_table_and_formula_metrics_per_parser(self):
candidates = {
"docling": {
"elements": [
{"element_id": "f1", "type": "formula", "page_num": 1, "text": "E = mc^2"},
],
"tables": [
{"table_id": "t1", "page_nums": [1], "markdown": "| A | B |\n| --- | --- |\n| 1 | 2 |"},
],
"figures": [],
},
"pymupdf": {
"elements": [
{"element_id": "f2", "type": "formula", "page_num": 1, "text": "E = mc^9"},
],
"tables": [],
"figures": [],
},
}
table_truths = [{"markdown": "| A | B |\n| --- | --- |\n| 1 | 2 |", "page_num": 1}]
formula_truths = [{"latex": "E = mc^2", "page_num": 1}]
result = compute_per_parser_metrics(
_parsed_with_candidates(candidates),
table_truths=table_truths,
formula_truths=formula_truths,
)
# Docling matches table and formula exactly.
self.assertEqual(result["docling"]["table_structure"]["mean_table_score"], 1.0)
self.assertEqual(result["docling"]["formula"]["mean_cer"], 0.0)
# PyMuPDF's formula is one char off; table predictions empty.
self.assertGreater(result["pymupdf"]["formula"]["mean_cer"], 0.0)
self.assertEqual(result["pymupdf"]["table_structure"]["matched_pair_count"], 0)
def test_no_candidates_returns_empty_dict(self):
parsed = SimpleNamespace(provenance={"candidates": {}})
self.assertEqual(compute_per_parser_metrics(parsed, layout_truths=[]), {})
class TestPipelinePopulatesCandidates(unittest.TestCase):
def test_candidates_serialized_to_provenance(self):
with tempfile.TemporaryDirectory() as tmp:
input_path = Path(tmp) / "doc.md"
input_path.write_text("# Doc\n\nSome content.\n", encoding="utf-8")
from zsgdp.pipeline import parse_document
parsed = parse_document(input_path, Path(tmp) / "out")
candidates = parsed.provenance.get("candidates")
self.assertIsInstance(candidates, dict)
self.assertGreater(len(candidates), 0)
# text parser should be one of the candidates for markdown.
self.assertIn("text", candidates)
self.assertIn("elements", candidates["text"])
class TestBenchmarkIntegration(unittest.TestCase):
def test_per_parser_csv_emitted_with_omnidocbench_truths(self):
ground_truth = {
"layout_dets": [
{"category": "title", "bbox": [0, 0, 100, 30], "page_num": 1},
{"category": "table", "markdown": "| A | B |\n| --- | --- |\n| 1 | 2 |", "page_num": 1},
]
}
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
md_path = src / "doc.md"
md_path.write_text("# Doc\n\n| A | B |\n| --- | --- |\n| 1 | 2 |\n", encoding="utf-8")
def fake_loader(root: Path):
yield DatasetDocument(
dataset_id="omnidocbench",
doc_id="doc",
path=md_path,
ground_truth=ground_truth,
metadata={},
)
register_dataset_loader("omnidocbench", fake_loader)
try:
summary = run_parser_benchmark(src, tmp / "out", dataset_name="omnidocbench")
finally:
from zsgdp.benchmarks.datasets import _load_omnidocbench
_DATASET_LOADERS["omnidocbench"] = _load_omnidocbench
doc = summary["documents"][0]
self.assertIn("per_parser_metrics", doc)
self.assertGreater(len(doc["per_parser_metrics"]), 0)
csv_path = tmp / "out" / "per_parser_metrics.csv"
self.assertTrue(csv_path.exists())
content = csv_path.read_text()
self.assertIn("parser", content.splitlines()[0])
self.assertGreater(len(content.splitlines()), 1)
if __name__ == "__main__":
unittest.main()
|