Spaces:
Running on Zero
Running on Zero
File size: 2,017 Bytes
db06ffa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import unittest
from zsgdp.normalize.markdown import markdown_to_blocks, normalize_markdown_candidate, normalize_markdown_table
class MarkdownNormalizerTests(unittest.TestCase):
def test_markdown_to_blocks_preserves_pages_tables_and_images(self):
markdown = """# Report
Intro paragraph.
| Region | Q1 |
| --- | --- |
| NA | 10 |
<!-- page:2 -->
## Figure Section

"""
candidate = normalize_markdown_candidate(
markdown=markdown,
doc_id="d1",
source_path="sample.md",
file_type="markdown",
parser_name="test",
)
self.assertEqual([page["page_num"] for page in candidate.pages], [1, 2])
self.assertEqual(len(candidate.tables), 1)
self.assertEqual(candidate.tables[0].page_nums, [1])
self.assertEqual(len(candidate.figures), 1)
self.assertEqual(candidate.figures[0].page_num, 2)
self.assertEqual(candidate.figures[0].image_path, "chart.png")
def test_normalize_markdown_table_repairs_separator(self):
table = "| A | B |\n| --- | --- |\n| 1 | 2 |"
self.assertEqual(normalize_markdown_table(table), "| A | B |\n| --- | --- |\n| 1 | 2 |")
def test_normalize_plain_aligned_table(self):
table = "Region Q1 Q2\nNorth America 10 12\nEurope 8 7"
self.assertEqual(
normalize_markdown_table(table),
"| Region | Q1 | Q2 |\n| --- | --- | --- |\n| North America | 10 | 12 |\n| Europe | 8 | 7 |",
)
def test_markdown_to_blocks_detects_plain_aligned_table(self):
blocks = markdown_to_blocks("# Report\n\nRegion Q1 Q2\nNorth America 10 12\nEurope 8 7")
self.assertEqual(blocks[1].block_type, "table")
def test_markdown_to_blocks_classifies_caption(self):
blocks = markdown_to_blocks("Figure 1 Revenue trend")
self.assertEqual(blocks[0].block_type, "caption")
if __name__ == "__main__":
unittest.main()
|