File size: 1,869 Bytes
db06ffa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import unittest

from zsgdp.schema import DocumentProfile, Element, PageProfile, ParseCandidate, TableObject
from zsgdp.verify.parser_metrics import candidate_metrics, failure_metrics


class ParserMetricsTests(unittest.TestCase):
    def test_candidate_metrics_reports_coverage_and_valid_tables(self):
        profile = DocumentProfile(
            doc_id="d1",
            source_path="sample.md",
            file_type="markdown",
            page_count=1,
            extension=".md",
            pages=[PageProfile(page_num=1, digital_text_chars=11)],
        )
        candidate = ParseCandidate(
            parser_name="test",
            doc_id="d1",
            source_path="sample.md",
            file_type="markdown",
            pages=[{"page_num": 1}],
            elements=[
                Element("e1", "d1", 1, "paragraph", text="hello world", bbox=(0, 0, 10, 10)),
            ],
            tables=[
                TableObject(
                    table_id="t1",
                    page_nums=[1],
                    markdown="| A | B |\n| --- | --- |\n| 1 | 2 |",
                )
            ],
            confidence=0.9,
        )

        metrics = candidate_metrics(candidate, profile, elapsed_seconds=0.25)

        self.assertEqual(metrics["parser"], "test")
        self.assertEqual(metrics["text_coverage_ratio"], 1.0)
        self.assertEqual(metrics["valid_table_ratio"], 1.0)
        self.assertTrue(metrics["has_bboxes"])

    def test_failure_metrics_records_error(self):
        profile = DocumentProfile("d1", "sample.pdf", "pdf", 1, ".pdf")

        metrics = failure_metrics("docling", profile, "boom", elapsed_seconds=1.5)

        self.assertTrue(metrics["failed"])
        self.assertEqual(metrics["error"], "boom")
        self.assertEqual(metrics["elapsed_seconds"], 1.5)


if __name__ == "__main__":
    unittest.main()