Spaces:
Running on Zero
Running on Zero
File size: 1,311 Bytes
db06ffa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import unittest
from zsgdp.parsers.docling_parser import _export_markdown, normalize_docling_markdown
from zsgdp.schema import DocumentProfile, PageProfile
class FakeDoclingDocument:
def export_to_markdown(self):
return "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |"
class DoclingParserTests(unittest.TestCase):
def test_export_markdown_uses_docling_method(self):
self.assertEqual(_export_markdown(FakeDoclingDocument()), "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |")
def test_normalize_docling_markdown_emits_schema(self):
profile = DocumentProfile(
doc_id="d1",
source_path="sample.pdf",
file_type="pdf",
page_count=1,
extension=".pdf",
pages=[PageProfile(page_num=1, digital_text_chars=20)],
)
candidate = normalize_docling_markdown(
markdown="# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |",
profile=profile,
source_path="sample.pdf",
)
self.assertEqual(candidate.parser_name, "docling")
self.assertEqual(len(candidate.elements), 2)
self.assertEqual(len(candidate.tables), 1)
self.assertEqual(candidate.pages[0]["source_parser"], "docling")
if __name__ == "__main__":
unittest.main()
|