Spaces:
Running on Zero
Running on Zero
File size: 5,417 Bytes
db06ffa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | """Tests for parser-contribution metrics and the ablation runner."""
from __future__ import annotations
import json
import tempfile
import unittest
from pathlib import Path
from zsgdp.benchmarks.ablation_runner import ABLATION_METRIC_KEYS, run_parser_ablations
from zsgdp.benchmarks.parser_quality import run_parser_benchmark
class TestParserContribution(unittest.TestCase):
def test_contribution_counts_appear_in_summary(self):
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
(src / "doc.md").write_text("# Doc\n\nA paragraph.\n", encoding="utf-8")
summary = run_parser_benchmark(src, tmp / "out", dataset_name="custom_folder")
doc = summary["documents"][0]
self.assertIn("parser_contribution_counts", doc)
self.assertIn("parser_contribution_fractions", doc)
self.assertGreater(sum(doc["parser_contribution_counts"].values()), 0)
# The sum of fractions should be ~1.0 across parsers.
total_fraction = sum(doc["parser_contribution_fractions"].values())
self.assertAlmostEqual(total_fraction, 1.0, places=6)
top_summary = summary["parser_contribution_summary"]
self.assertGreater(top_summary["total"], 0)
self.assertEqual(set(top_summary["counts"]), set(top_summary["fractions"]))
def test_text_parser_dominates_markdown_doc(self):
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
(src / "doc.md").write_text("# Doc\n\nPara one.\n\nPara two.\n", encoding="utf-8")
summary = run_parser_benchmark(src, tmp / "out", dataset_name="custom_folder")
top_counts = summary["parser_contribution_summary"]["counts"]
self.assertIn("text", top_counts)
text_count = top_counts["text"]
other_count = sum(value for parser, value in top_counts.items() if parser != "text")
self.assertGreaterEqual(text_count, other_count)
class TestRunParserAblations(unittest.TestCase):
def test_two_arms_plus_merged(self):
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
(src / "doc.md").write_text("# Doc\n\nPara one.\n\nPara two.\n", encoding="utf-8")
out = tmp / "out"
comparison = run_parser_ablations(
src,
out,
parsers=["text", "pymupdf"],
dataset_name="custom_folder",
)
self.assertEqual(comparison["arm_count"], 3)
arms = sorted(row["arm"] for row in comparison["rows"])
self.assertEqual(arms, ["merged", "pymupdf", "text"])
self.assertTrue((out / "arm_text").exists())
self.assertTrue((out / "arm_pymupdf").exists())
self.assertTrue((out / "arm_merged").exists())
self.assertTrue((out / "ablation_comparison.csv").exists())
self.assertTrue((out / "ablation_summary.json").exists())
# Each arm record carries the canonical metric keys (subset of those present).
for row in comparison["rows"]:
self.assertIn("mean_quality_score", row)
def test_no_merged_when_disabled(self):
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
(src / "doc.md").write_text("# Doc\n\nPara.\n", encoding="utf-8")
comparison = run_parser_ablations(
src,
tmp / "out",
parsers=["text", "pymupdf"],
dataset_name="custom_folder",
include_merged=False,
)
self.assertEqual(comparison["arm_count"], 2)
self.assertNotIn("merged", {row["arm"] for row in comparison["rows"]})
def test_single_parser_ablation_skips_merged_arm(self):
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
(src / "doc.md").write_text("# Doc\n\nPara.\n", encoding="utf-8")
comparison = run_parser_ablations(
src,
tmp / "out",
parsers=["text"],
dataset_name="custom_folder",
)
# Single parser + include_merged defaults true, but len(parsers) == 1
# so merged would be redundant and is skipped.
self.assertEqual(comparison["arm_count"], 1)
self.assertEqual(comparison["rows"][0]["arm"], "text")
def test_empty_parsers_raises(self):
with self.assertRaises(ValueError):
run_parser_ablations(".", "./out", parsers=[])
def test_metric_keys_constant_matches_summary_shape(self):
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
src = tmp / "in"
src.mkdir()
(src / "doc.md").write_text("# Doc\n\nPara.\n", encoding="utf-8")
summary = run_parser_benchmark(src, tmp / "out", dataset_name="custom_folder")
for key in ABLATION_METRIC_KEYS:
self.assertIn(key, summary, f"benchmark summary missing key {key}")
if __name__ == "__main__":
unittest.main()
|