Spaces:
Running on Zero
Running on Zero
| import unittest | |
| from zsgdp.normalize.markdown import markdown_to_blocks, normalize_markdown_candidate, normalize_markdown_table | |
| class MarkdownNormalizerTests(unittest.TestCase): | |
| def test_markdown_to_blocks_preserves_pages_tables_and_images(self): | |
| markdown = """# Report | |
| Intro paragraph. | |
| | Region | Q1 | | |
| | --- | --- | | |
| | NA | 10 | | |
| <!-- page:2 --> | |
| ## Figure Section | |
|  | |
| """ | |
| candidate = normalize_markdown_candidate( | |
| markdown=markdown, | |
| doc_id="d1", | |
| source_path="sample.md", | |
| file_type="markdown", | |
| parser_name="test", | |
| ) | |
| self.assertEqual([page["page_num"] for page in candidate.pages], [1, 2]) | |
| self.assertEqual(len(candidate.tables), 1) | |
| self.assertEqual(candidate.tables[0].page_nums, [1]) | |
| self.assertEqual(len(candidate.figures), 1) | |
| self.assertEqual(candidate.figures[0].page_num, 2) | |
| self.assertEqual(candidate.figures[0].image_path, "chart.png") | |
| def test_normalize_markdown_table_repairs_separator(self): | |
| table = "| A | B |\n| --- | --- |\n| 1 | 2 |" | |
| self.assertEqual(normalize_markdown_table(table), "| A | B |\n| --- | --- |\n| 1 | 2 |") | |
| def test_normalize_plain_aligned_table(self): | |
| table = "Region Q1 Q2\nNorth America 10 12\nEurope 8 7" | |
| self.assertEqual( | |
| normalize_markdown_table(table), | |
| "| Region | Q1 | Q2 |\n| --- | --- | --- |\n| North America | 10 | 12 |\n| Europe | 8 | 7 |", | |
| ) | |
| def test_markdown_to_blocks_detects_plain_aligned_table(self): | |
| blocks = markdown_to_blocks("# Report\n\nRegion Q1 Q2\nNorth America 10 12\nEurope 8 7") | |
| self.assertEqual(blocks[1].block_type, "table") | |
| def test_markdown_to_blocks_classifies_caption(self): | |
| blocks = markdown_to_blocks("Figure 1 Revenue trend") | |
| self.assertEqual(blocks[0].block_type, "caption") | |
| if __name__ == "__main__": | |
| unittest.main() | |