ricklon commited on
Commit
093c1dd
·
1 Parent(s): 9d4d37e

Fix markdown preview math replacement inside code blocks

Browse files
Files changed (2) hide show
  1. app.py +29 -3
  2. tests/test_markdown_preview.py +107 -0
app.py CHANGED
@@ -178,18 +178,23 @@ def to_math_html(text: str) -> str:
178
  return ""
179
 
180
  blocks: dict[str, str] = {}
 
181
  counter = [0]
182
 
183
  def display_block(m):
184
  key = f'ZZDISPLAYMATH{counter[0]}ZZ'
185
  counter[0] += 1
186
- blocks[key] = f'<div class="math-display">{_to_mathml(m.group(1).strip(), display=True)}</div>'
 
 
187
  return f'\n\n{key}\n\n'
188
 
189
  def inline_math(m):
190
  key = f'ZZINLINEMATH{counter[0]}ZZ'
191
  counter[0] += 1
192
- blocks[key] = _to_mathml(m.group(1).strip(), display=False)
 
 
193
  return key
194
 
195
  # Replace display math \[...\] with placeholder tokens
@@ -202,11 +207,32 @@ def to_math_html(text: str) -> str:
202
  # Run markdown on text that now contains only safe placeholder tokens
203
  html = md_lib.markdown(text, extensions=['tables', 'fenced_code', 'sane_lists'])
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  # Swap placeholders back for MathML/HTML (handle <p>KEY</p> wrapping too)
206
  for key, value in blocks.items():
207
  html = html.replace(f'<p>{key}</p>', value)
208
  html = html.replace(key, value)
209
 
 
 
 
 
 
 
 
 
210
  return f'<div class="math-preview">{html}</div>'
211
 
212
  def embed_images(markdown, crops):
@@ -464,4 +490,4 @@ if __name__ == "__main__":
464
  server_name="0.0.0.0" if local else None,
465
  head=PREVIEW_CSS,
466
  ssr_mode=False, # SSR is experimental in Gradio 6 and breaks HF Spaces routing
467
- )
 
178
  return ""
179
 
180
  blocks: dict[str, str] = {}
181
+ literals: dict[str, str] = {}
182
  counter = [0]
183
 
184
  def display_block(m):
185
  key = f'ZZDISPLAYMATH{counter[0]}ZZ'
186
  counter[0] += 1
187
+ expr = m.group(1).strip()
188
+ blocks[key] = f'<div class="math-display">{_to_mathml(expr, display=True)}</div>'
189
+ literals[key] = f'\\[{expr}\\]'
190
  return f'\n\n{key}\n\n'
191
 
192
  def inline_math(m):
193
  key = f'ZZINLINEMATH{counter[0]}ZZ'
194
  counter[0] += 1
195
+ expr = m.group(1).strip()
196
+ blocks[key] = _to_mathml(expr, display=False)
197
+ literals[key] = f'\\({expr}\\)'
198
  return key
199
 
200
  # Replace display math \[...\] with placeholder tokens
 
207
  # Run markdown on text that now contains only safe placeholder tokens
208
  html = md_lib.markdown(text, extensions=['tables', 'fenced_code', 'sane_lists'])
209
 
210
+ # Protect rendered code/pre blocks so placeholder swap never mutates literal code.
211
+ protected_blocks: dict[str, str] = {}
212
+ protected_counter = [0]
213
+
214
+ def _protect_code_html(m):
215
+ token = f'ZZCODEHTML{protected_counter[0]}ZZ'
216
+ protected_counter[0] += 1
217
+ protected_blocks[token] = m.group(0)
218
+ return token
219
+
220
+ html = re.sub(r'<pre\b[^>]*>.*?</pre>', _protect_code_html, html, flags=re.DOTALL)
221
+ html = re.sub(r'<code\b[^>]*>.*?</code>', _protect_code_html, html, flags=re.DOTALL)
222
+
223
  # Swap placeholders back for MathML/HTML (handle <p>KEY</p> wrapping too)
224
  for key, value in blocks.items():
225
  html = html.replace(f'<p>{key}</p>', value)
226
  html = html.replace(key, value)
227
 
228
+ # Restore protected literal code/pre blocks unchanged.
229
+ for token, original in protected_blocks.items():
230
+ html = html.replace(token, original)
231
+
232
+ # Placeholders left at this stage occur inside code/pre; keep them literal.
233
+ for key, literal in literals.items():
234
+ html = html.replace(key, html_lib.escape(literal))
235
+
236
  return f'<div class="math-preview">{html}</div>'
237
 
238
  def embed_images(markdown, crops):
 
490
  server_name="0.0.0.0" if local else None,
491
  head=PREVIEW_CSS,
492
  ssr_mode=False, # SSR is experimental in Gradio 6 and breaks HF Spaces routing
493
+ )
tests/test_markdown_preview.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import pathlib
3
+ import re
4
+ import html as html_lib
5
+ import types
6
+ import unittest
7
+
8
+
9
+ class _FakeMarkdown:
10
+ @staticmethod
11
+ def markdown(text, extensions=None):
12
+ blocks = []
13
+ lines = text.splitlines()
14
+ i = 0
15
+ while i < len(lines):
16
+ line = lines[i]
17
+ if line.startswith("```"):
18
+ i += 1
19
+ code_lines = []
20
+ while i < len(lines) and not lines[i].startswith("```"):
21
+ code_lines.append(lines[i])
22
+ i += 1
23
+ if i < len(lines) and lines[i].startswith("```"):
24
+ i += 1
25
+ code_text = "\n".join(code_lines)
26
+ blocks.append(f"<pre><code>{code_text}\n</code></pre>")
27
+ continue
28
+
29
+ if not line.strip():
30
+ i += 1
31
+ continue
32
+
33
+ para = [line]
34
+ i += 1
35
+ while i < len(lines) and lines[i].strip() and not lines[i].startswith("```"):
36
+ para.append(lines[i])
37
+ i += 1
38
+
39
+ paragraph = " ".join(para)
40
+ paragraph = re.sub(r"`([^`]+)`", r"<code>\1</code>", paragraph)
41
+ blocks.append(f"<p>{paragraph}</p>")
42
+
43
+ return "\n".join(blocks)
44
+
45
+
46
+ class _FakeConverter:
47
+ @staticmethod
48
+ def convert(latex):
49
+ return f"<math>{latex}</math>"
50
+
51
+
52
+ _FAKE_LATEX2MATHML = types.SimpleNamespace(converter=_FakeConverter())
53
+
54
+
55
+ def _load_preview_functions():
56
+ app_path = pathlib.Path(__file__).resolve().parents[1] / "app.py"
57
+ source = app_path.read_text(encoding="utf-8")
58
+ module = ast.parse(source, filename=str(app_path))
59
+
60
+ wanted = {"_to_mathml", "to_math_html"}
61
+ fn_nodes = [n for n in module.body if isinstance(n, ast.FunctionDef) and n.name in wanted]
62
+ fn_nodes.sort(key=lambda n: n.lineno)
63
+
64
+ test_mod = ast.Module(body=fn_nodes, type_ignores=[])
65
+ code = compile(test_mod, filename=str(app_path), mode="exec")
66
+
67
+ scope = {
68
+ "re": re,
69
+ "html_lib": html_lib,
70
+ "md_lib": _FakeMarkdown,
71
+ "latex2mathml": _FAKE_LATEX2MATHML,
72
+ }
73
+ exec(code, scope)
74
+ return scope["to_math_html"]
75
+
76
+
77
+ class MarkdownPreviewTests(unittest.TestCase):
78
+ def test_math_not_replaced_inside_code_contexts(self):
79
+ to_math_html = _load_preview_functions()
80
+
81
+ text = (
82
+ "Inline code: `\\(a+b\\)`\n\n"
83
+ "Normal inline math: \\(c+d\\)\n\n"
84
+ "```text\n"
85
+ "\\[x+y\\]\n"
86
+ "\\(m+n\\)\n"
87
+ "```\n"
88
+ )
89
+
90
+ html = to_math_html(text)
91
+
92
+ # Math outside code should still render as MathML.
93
+ self.assertIn("<math", html)
94
+
95
+ # Code/pre contents should stay literal LaTeX delimiters.
96
+ self.assertIn("<code>\\(a+b\\)</code>", html)
97
+ self.assertIn("<pre><code>", html)
98
+ self.assertIn("\\[x+y\\]", html)
99
+ self.assertIn("\\(m+n\\)", html)
100
+
101
+ # Internal placeholder tokens should not leak to final output.
102
+ self.assertNotIn("ZZINLINEMATH", html)
103
+ self.assertNotIn("ZZDISPLAYMATH", html)
104
+
105
+
106
+ if __name__ == "__main__":
107
+ unittest.main()