DeepSeek-OCR-2-Math / tests /test_markdown_preview.py
ricklon's picture
Update markdown preview test loader for spatial helpers
474fd39
import ast
import pathlib
import re
import html as html_lib
import types
import unittest
class _FakeMarkdown:
@staticmethod
def markdown(text, extensions=None):
blocks = []
lines = text.splitlines()
i = 0
while i < len(lines):
line = lines[i]
if line.startswith("```"):
i += 1
code_lines = []
while i < len(lines) and not lines[i].startswith("```"):
code_lines.append(lines[i])
i += 1
if i < len(lines) and lines[i].startswith("```"):
i += 1
code_text = "\n".join(code_lines)
blocks.append(f"<pre><code>{code_text}\n</code></pre>")
continue
if not line.strip():
i += 1
continue
para = [line]
i += 1
while i < len(lines) and lines[i].strip() and not lines[i].startswith("```"):
para.append(lines[i])
i += 1
paragraph = " ".join(para)
paragraph = re.sub(r"`([^`]+)`", r"<code>\1</code>", paragraph)
blocks.append(f"<p>{paragraph}</p>")
return "\n".join(blocks)
class _FakeConverter:
@staticmethod
def convert(latex):
return f"<math>{latex}</math>"
_FAKE_LATEX2MATHML = types.SimpleNamespace(converter=_FakeConverter())
def _load_preview_functions():
app_path = pathlib.Path(__file__).resolve().parents[1] / "app.py"
source = app_path.read_text(encoding="utf-8")
module = ast.parse(source, filename=str(app_path))
wanted = {
"_to_mathml",
"_inject_spatial_gap_placeholders",
"_restore_spatial_gap_placeholders",
"to_math_html",
}
fn_nodes = [n for n in module.body if isinstance(n, ast.FunctionDef) and n.name in wanted]
fn_nodes.sort(key=lambda n: n.lineno)
test_mod = ast.Module(body=fn_nodes, type_ignores=[])
code = compile(test_mod, filename=str(app_path), mode="exec")
scope = {
"re": re,
"html_lib": html_lib,
"md_lib": _FakeMarkdown,
"latex2mathml": _FAKE_LATEX2MATHML,
}
exec(code, scope)
return scope["to_math_html"]
class MarkdownPreviewTests(unittest.TestCase):
def test_math_not_replaced_inside_code_contexts(self):
to_math_html = _load_preview_functions()
text = (
"Inline code: `\\(a+b\\)`\n\n"
"Normal inline math: \\(c+d\\)\n\n"
"```text\n"
"\\[x+y\\]\n"
"\\(m+n\\)\n"
"```\n"
)
html = to_math_html(text)
# Math outside code should still render as MathML.
self.assertIn("<math", html)
# Code/pre contents should stay literal LaTeX delimiters.
self.assertIn("<code>\\(a+b\\)</code>", html)
self.assertIn("<pre><code>", html)
self.assertIn("\\[x+y\\]", html)
self.assertIn("\\(m+n\\)", html)
# Internal placeholder tokens should not leak to final output.
self.assertNotIn("ZZINLINEMATH", html)
self.assertNotIn("ZZDISPLAYMATH", html)
if __name__ == "__main__":
unittest.main()