zeroshotGPU / tests /test_markdown_normalizer.py
Arjunvir Singh
Initial commit: zeroshotGPU MVP with full eval surface
db06ffa
import unittest
from zsgdp.normalize.markdown import markdown_to_blocks, normalize_markdown_candidate, normalize_markdown_table
class MarkdownNormalizerTests(unittest.TestCase):
def test_markdown_to_blocks_preserves_pages_tables_and_images(self):
markdown = """# Report
Intro paragraph.
| Region | Q1 |
| --- | --- |
| NA | 10 |
<!-- page:2 -->
## Figure Section
![Chart caption](chart.png)
"""
candidate = normalize_markdown_candidate(
markdown=markdown,
doc_id="d1",
source_path="sample.md",
file_type="markdown",
parser_name="test",
)
self.assertEqual([page["page_num"] for page in candidate.pages], [1, 2])
self.assertEqual(len(candidate.tables), 1)
self.assertEqual(candidate.tables[0].page_nums, [1])
self.assertEqual(len(candidate.figures), 1)
self.assertEqual(candidate.figures[0].page_num, 2)
self.assertEqual(candidate.figures[0].image_path, "chart.png")
def test_normalize_markdown_table_repairs_separator(self):
table = "| A | B |\n| --- | --- |\n| 1 | 2 |"
self.assertEqual(normalize_markdown_table(table), "| A | B |\n| --- | --- |\n| 1 | 2 |")
def test_normalize_plain_aligned_table(self):
table = "Region Q1 Q2\nNorth America 10 12\nEurope 8 7"
self.assertEqual(
normalize_markdown_table(table),
"| Region | Q1 | Q2 |\n| --- | --- | --- |\n| North America | 10 | 12 |\n| Europe | 8 | 7 |",
)
def test_markdown_to_blocks_detects_plain_aligned_table(self):
blocks = markdown_to_blocks("# Report\n\nRegion Q1 Q2\nNorth America 10 12\nEurope 8 7")
self.assertEqual(blocks[1].block_type, "table")
def test_markdown_to_blocks_classifies_caption(self):
blocks = markdown_to_blocks("Figure 1 Revenue trend")
self.assertEqual(blocks[0].block_type, "caption")
if __name__ == "__main__":
unittest.main()