import unittest from zsgdp.normalize.markdown import markdown_to_blocks, normalize_markdown_candidate, normalize_markdown_table class MarkdownNormalizerTests(unittest.TestCase): def test_markdown_to_blocks_preserves_pages_tables_and_images(self): markdown = """# Report Intro paragraph. | Region | Q1 | | --- | --- | | NA | 10 | ## Figure Section ![Chart caption](chart.png) """ candidate = normalize_markdown_candidate( markdown=markdown, doc_id="d1", source_path="sample.md", file_type="markdown", parser_name="test", ) self.assertEqual([page["page_num"] for page in candidate.pages], [1, 2]) self.assertEqual(len(candidate.tables), 1) self.assertEqual(candidate.tables[0].page_nums, [1]) self.assertEqual(len(candidate.figures), 1) self.assertEqual(candidate.figures[0].page_num, 2) self.assertEqual(candidate.figures[0].image_path, "chart.png") def test_normalize_markdown_table_repairs_separator(self): table = "| A | B |\n| --- | --- |\n| 1 | 2 |" self.assertEqual(normalize_markdown_table(table), "| A | B |\n| --- | --- |\n| 1 | 2 |") def test_normalize_plain_aligned_table(self): table = "Region Q1 Q2\nNorth America 10 12\nEurope 8 7" self.assertEqual( normalize_markdown_table(table), "| Region | Q1 | Q2 |\n| --- | --- | --- |\n| North America | 10 | 12 |\n| Europe | 8 | 7 |", ) def test_markdown_to_blocks_detects_plain_aligned_table(self): blocks = markdown_to_blocks("# Report\n\nRegion Q1 Q2\nNorth America 10 12\nEurope 8 7") self.assertEqual(blocks[1].block_type, "table") def test_markdown_to_blocks_classifies_caption(self): blocks = markdown_to_blocks("Figure 1 Revenue trend") self.assertEqual(blocks[0].block_type, "caption") if __name__ == "__main__": unittest.main()