ZhouChuYue commited on
Commit
a579dd2
·
1 Parent(s): aa1ad34
app.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ UltraData Math Parser - Hugging Face Space Demo
4
+ A unified HTML parser optimized for extracting mathematical content.
5
+ """
6
+
7
+ import gradio as gr
8
+ from ultradata_math_parser.parsers.unified_parser import UnifiedParser
9
+
10
+
11
+ def parse_html(
12
+ html_content: str,
13
+ base_url: str = "",
14
+ process_math: bool = True,
15
+ include_tables: bool = True,
16
+ enable_forum_assembly: bool = True,
17
+ ) -> dict:
18
+ """
19
+ Parse HTML content using UnifiedParser.
20
+
21
+ Args:
22
+ html_content: Raw HTML string to parse
23
+ base_url: Base URL for resolving relative links
24
+ process_math: Whether to process and convert math expressions
25
+ include_tables: Whether to preserve table elements
26
+ enable_forum_assembly: Whether to enable forum post assembly
27
+
28
+ Returns:
29
+ Dictionary containing parsed results
30
+ """
31
+ if not html_content or not html_content.strip():
32
+ return {
33
+ "title": "",
34
+ "html": "",
35
+ "text_length": 0,
36
+ "xp_num": "",
37
+ "fallback_strategy": "",
38
+ "forum_assembled": False,
39
+ "error": "Please provide HTML content to parse.",
40
+ }
41
+
42
+ parser = UnifiedParser()
43
+
44
+ try:
45
+ result = parser.extract(
46
+ html=html_content,
47
+ base_url=base_url,
48
+ process_math=process_math,
49
+ include_tables=include_tables,
50
+ enable_forum_assembly=enable_forum_assembly,
51
+ )
52
+
53
+ return {
54
+ "title": result.get("title", ""),
55
+ "html": result.get("html", ""),
56
+ "text_length": result.get("text_length", 0),
57
+ "xp_num": result.get("xp_num", ""),
58
+ "fallback_strategy": result.get("fallback_strategy", ""),
59
+ "forum_assembled": result.get("forum_assembled", False),
60
+ "error": None,
61
+ }
62
+ except Exception as e:
63
+ return {
64
+ "title": "",
65
+ "html": "",
66
+ "text_length": 0,
67
+ "xp_num": "",
68
+ "fallback_strategy": "",
69
+ "forum_assembled": False,
70
+ "error": str(e),
71
+ }
72
+
73
+
74
+ def format_output(result: dict) -> tuple:
75
+ """Format the parser output for Gradio display."""
76
+ if result.get("error"):
77
+ return (
78
+ f"❌ Error: {result['error']}",
79
+ "",
80
+ "",
81
+ "",
82
+ )
83
+
84
+ # Build metadata string
85
+ metadata = f"""📊 **Parsing Statistics**
86
+ - **Title**: {result['title'] or 'N/A'}
87
+ - **Text Length**: {result['text_length']} characters
88
+ - **XPath Match**: {result['xp_num']}
89
+ - **Fallback Strategy**: {result['fallback_strategy']}
90
+ - **Forum Assembled**: {'✅ Yes' if result['forum_assembled'] else '❌ No'}
91
+ """
92
+
93
+ return (
94
+ metadata,
95
+ result.get("title", ""),
96
+ result.get("html", ""),
97
+ result.get("html", ""), # For HTML preview
98
+ )
99
+
100
+
101
+ def process_input(html_content, base_url, process_math, include_tables, enable_forum):
102
+ """Main processing function for Gradio interface."""
103
+ result = parse_html(
104
+ html_content=html_content,
105
+ base_url=base_url,
106
+ process_math=process_math,
107
+ include_tables=include_tables,
108
+ enable_forum_assembly=enable_forum,
109
+ )
110
+ return format_output(result)
111
+
112
+
113
+ # Example HTML content for demo
114
+ EXAMPLE_HTML = """<!DOCTYPE html>
115
+ <html>
116
+ <head>
117
+ <title>Quadratic Formula Example</title>
118
+ </head>
119
+ <body>
120
+ <article class="post-content">
121
+ <h1>Understanding the Quadratic Formula</h1>
122
+ <p>The quadratic formula is used to solve equations of the form ax² + bx + c = 0.</p>
123
+ <p>The solution is given by:</p>
124
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
125
+ <mi>x</mi>
126
+ <mo>=</mo>
127
+ <mfrac>
128
+ <mrow>
129
+ <mo>-</mo>
130
+ <mi>b</mi>
131
+ <mo>±</mo>
132
+ <msqrt>
133
+ <mrow>
134
+ <msup><mi>b</mi><mn>2</mn></msup>
135
+ <mo>-</mo>
136
+ <mn>4</mn>
137
+ <mi>a</mi>
138
+ <mi>c</mi>
139
+ </mrow>
140
+ </msqrt>
141
+ </mrow>
142
+ <mrow>
143
+ <mn>2</mn>
144
+ <mi>a</mi>
145
+ </mrow>
146
+ </mfrac>
147
+ </math>
148
+ <p>Where a, b, and c are coefficients of the quadratic equation.</p>
149
+ <h2>Example Problem</h2>
150
+ <p>Solve: x² - 5x + 6 = 0</p>
151
+ <p>Here, a = 1, b = -5, c = 6</p>
152
+ <p>Using the formula: x = (5 ± √(25-24))/2 = (5 ± 1)/2</p>
153
+ <p>Therefore, x = 3 or x = 2</p>
154
+ </article>
155
+ <footer>
156
+ <nav>Related articles...</nav>
157
+ </footer>
158
+ </body>
159
+ </html>"""
160
+
161
+
162
+ # Custom CSS for better aesthetics
163
+ custom_css = """
164
+ @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500&family=Space+Grotesk:wght@400;500;600;700&display=swap');
165
+
166
+ .gradio-container {
167
+ font-family: 'Space Grotesk', sans-serif !important;
168
+ background: linear-gradient(135deg, #0f0f23 0%, #1a1a3e 50%, #0f0f23 100%) !important;
169
+ min-height: 100vh;
170
+ }
171
+
172
+ .main-title {
173
+ font-family: 'Space Grotesk', sans-serif !important;
174
+ font-weight: 700 !important;
175
+ font-size: 2.5rem !important;
176
+ background: linear-gradient(90deg, #00d4ff, #7c3aed, #f472b6) !important;
177
+ -webkit-background-clip: text !important;
178
+ -webkit-text-fill-color: transparent !important;
179
+ background-clip: text !important;
180
+ text-align: center !important;
181
+ margin-bottom: 0.5rem !important;
182
+ }
183
+
184
+ .subtitle {
185
+ text-align: center !important;
186
+ color: #94a3b8 !important;
187
+ font-size: 1.1rem !important;
188
+ margin-bottom: 2rem !important;
189
+ }
190
+
191
+ .gr-box {
192
+ border-radius: 12px !important;
193
+ border: 1px solid rgba(124, 58, 237, 0.3) !important;
194
+ background: rgba(15, 15, 35, 0.8) !important;
195
+ backdrop-filter: blur(10px) !important;
196
+ }
197
+
198
+ .gr-input, .gr-textarea {
199
+ font-family: 'JetBrains Mono', monospace !important;
200
+ background: rgba(30, 30, 60, 0.6) !important;
201
+ border: 1px solid rgba(124, 58, 237, 0.4) !important;
202
+ border-radius: 8px !important;
203
+ color: #e2e8f0 !important;
204
+ }
205
+
206
+ .gr-button-primary {
207
+ background: linear-gradient(135deg, #7c3aed 0%, #00d4ff 100%) !important;
208
+ border: none !important;
209
+ font-weight: 600 !important;
210
+ font-size: 1rem !important;
211
+ padding: 12px 32px !important;
212
+ border-radius: 8px !important;
213
+ transition: all 0.3s ease !important;
214
+ text-transform: uppercase !important;
215
+ letter-spacing: 1px !important;
216
+ }
217
+
218
+ .gr-button-primary:hover {
219
+ transform: translateY(-2px) !important;
220
+ box-shadow: 0 8px 25px rgba(124, 58, 237, 0.4) !important;
221
+ }
222
+
223
+ .gr-button-secondary {
224
+ background: transparent !important;
225
+ border: 2px solid rgba(124, 58, 237, 0.5) !important;
226
+ color: #a78bfa !important;
227
+ font-weight: 500 !important;
228
+ border-radius: 8px !important;
229
+ }
230
+
231
+ .section-header {
232
+ color: #00d4ff !important;
233
+ font-weight: 600 !important;
234
+ font-size: 1.2rem !important;
235
+ margin-bottom: 1rem !important;
236
+ padding-bottom: 0.5rem !important;
237
+ border-bottom: 2px solid rgba(0, 212, 255, 0.3) !important;
238
+ }
239
+
240
+ .output-box {
241
+ background: rgba(20, 20, 45, 0.9) !important;
242
+ border: 1px solid rgba(0, 212, 255, 0.3) !important;
243
+ border-radius: 12px !important;
244
+ padding: 1rem !important;
245
+ }
246
+
247
+ .gr-markdown {
248
+ color: #e2e8f0 !important;
249
+ }
250
+
251
+ .gr-markdown code {
252
+ background: rgba(124, 58, 237, 0.2) !important;
253
+ padding: 2px 6px !important;
254
+ border-radius: 4px !important;
255
+ font-family: 'JetBrains Mono', monospace !important;
256
+ }
257
+
258
+ footer {
259
+ display: none !important;
260
+ }
261
+
262
+ .gr-accordion {
263
+ border: 1px solid rgba(124, 58, 237, 0.3) !important;
264
+ border-radius: 8px !important;
265
+ background: rgba(20, 20, 45, 0.6) !important;
266
+ }
267
+
268
+ .gr-check-radio {
269
+ accent-color: #7c3aed !important;
270
+ }
271
+
272
+ label {
273
+ color: #cbd5e1 !important;
274
+ }
275
+ """
276
+
277
+ # Build Gradio interface
278
+ with gr.Blocks(css=custom_css, title="UltraData Math Parser") as demo:
279
+ gr.HTML('<h1 class="main-title">📐 UltraData Math Parser</h1>')
280
+ gr.HTML('<p class="subtitle">Unified HTML Parser for Mathematical Content Extraction</p>')
281
+
282
+ with gr.Row():
283
+ with gr.Column(scale=1):
284
+ gr.HTML('<div class="section-header">📥 Input</div>')
285
+
286
+ html_input = gr.Textbox(
287
+ label="HTML Content",
288
+ placeholder="Paste your HTML content here...",
289
+ lines=15,
290
+ max_lines=30,
291
+ value=EXAMPLE_HTML,
292
+ )
293
+
294
+ base_url_input = gr.Textbox(
295
+ label="Base URL (Optional)",
296
+ placeholder="https://example.com/page",
297
+ lines=1,
298
+ )
299
+
300
+ with gr.Accordion("⚙️ Advanced Options", open=False):
301
+ process_math = gr.Checkbox(
302
+ label="Process Math Expressions",
303
+ value=True,
304
+ info="Convert MathML and LaTeX to unified format",
305
+ )
306
+ include_tables = gr.Checkbox(
307
+ label="Include Tables",
308
+ value=True,
309
+ info="Preserve table elements in output",
310
+ )
311
+ enable_forum = gr.Checkbox(
312
+ label="Enable Forum Assembly",
313
+ value=True,
314
+ info="Assemble forum posts and comments",
315
+ )
316
+
317
+ with gr.Row():
318
+ parse_btn = gr.Button("🚀 Parse HTML", variant="primary", size="lg")
319
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg")
320
+
321
+ with gr.Column(scale=1):
322
+ gr.HTML('<div class="section-header">📤 Output</div>')
323
+
324
+ metadata_output = gr.Markdown(
325
+ label="Parsing Statistics",
326
+ elem_classes=["output-box"],
327
+ )
328
+
329
+ title_output = gr.Textbox(
330
+ label="Extracted Title",
331
+ lines=1,
332
+ interactive=False,
333
+ )
334
+
335
+ with gr.Tabs():
336
+ with gr.TabItem("📝 Raw HTML"):
337
+ html_output = gr.Textbox(
338
+ label="Extracted HTML",
339
+ lines=12,
340
+ max_lines=20,
341
+ interactive=False,
342
+ )
343
+ with gr.TabItem("👁️ Preview"):
344
+ preview_output = gr.HTML(
345
+ label="HTML Preview",
346
+ )
347
+
348
+ # Event handlers
349
+ parse_btn.click(
350
+ fn=process_input,
351
+ inputs=[html_input, base_url_input, process_math, include_tables, enable_forum],
352
+ outputs=[metadata_output, title_output, html_output, preview_output],
353
+ )
354
+
355
+ def clear_all():
356
+ return "", "", "", "", "", ""
357
+
358
+ clear_btn.click(
359
+ fn=clear_all,
360
+ outputs=[html_input, base_url_input, metadata_output, title_output, html_output, preview_output],
361
+ )
362
+
363
+ # Footer info
364
+ gr.HTML("""
365
+ <div style="text-align: center; margin-top: 2rem; padding: 1rem; color: #64748b; font-size: 0.9rem;">
366
+ <p>🔬 <strong>UltraData Math Parser</strong> - Part of the UltraData-Math Project</p>
367
+ <p>Specialized in extracting mathematical content from web pages with MathML, LaTeX, and formula support.</p>
368
+ </div>
369
+ """)
370
+
371
+
372
+ if __name__ == "__main__":
373
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ Brotli
3
+ cchardet==2.2.0a2
4
+ charset_normalizer
5
+ lxml<5.2.0
6
+ numpy
7
+ py_asciimath
8
+ urllib3
9
+ tldextract
ultradata_math_parser/__init__.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import json
3
+ import logging
4
+ from typing import Optional, Type
5
+ from urllib.parse import urlparse
6
+ import tldextract
7
+
8
+ from ultradata_math_parser.parsers.article_parser import ArticleParser
9
+ from ultradata_math_parser.parsers.forum_parser import ForumParser
10
+ from ultradata_math_parser.parsers.custom_parser import CustomParser
11
+ from ultradata_math_parser.parsers.unified_parser import UnifiedParser
12
+ from ultradata_math_parser.utils import text_len, run_w3m_dump, W3MError
13
+ from ultradata_math_parser.config import URL_PATTERNS_TO_HTML_TYPE, BUILTIN_SITE_RULES
14
+
15
+
16
+ class GeneralParser:
17
+ def __init__(self, config_path="", w3m_path: str = "w3m"):
18
+ self.logger = logging.getLogger(__name__)
19
+ if config_path:
20
+ try:
21
+ with open(config_path, 'r', encoding='utf-8') as f:
22
+ self.rule = json.loads(f.read())
23
+ except:
24
+ pass
25
+ else:
26
+ self.rule = {}
27
+ self.w3m_path = w3m_path or "w3m"
28
+ self.tld_extractor = tldextract.TLDExtract()
29
+
30
+ def extract(self, html="", w3m_path: Optional[str] = None, **kwargs) -> dict:
31
+ base_url = kwargs.get("base_url", "")
32
+ netloc = urlparse(base_url).netloc if base_url else ""
33
+ html_type = kwargs.pop("html_type", None)
34
+
35
+ current_w3m_path = w3m_path or self.w3m_path
36
+
37
+ # 检查 URL 是否匹配内置规则
38
+ if base_url and self._quick_check_builtin_rules(base_url):
39
+ try:
40
+ extracted = self.tld_extractor(base_url)
41
+ domain = f"{extracted.domain}.{extracted.suffix}"
42
+ self.logger.debug("TLD Extract result for %s: domain=%s, suffix=%s -> key=%s", base_url, extracted.domain, extracted.suffix, domain)
43
+
44
+ if domain in BUILTIN_SITE_RULES:
45
+ try:
46
+ builtin_rule = BUILTIN_SITE_RULES[domain]
47
+ new_kwargs = dict()
48
+ new_kwargs["rule"] = builtin_rule
49
+ new_kwargs.update(kwargs)
50
+ self.logger.debug("Using builtin rule for domain: %s", domain)
51
+ return self._run_extractor(CustomParser, html, new_kwargs, w3m_path=current_w3m_path)
52
+ except Exception as exc:
53
+ self.logger.debug("Builtin rule extractor failed for %s: %s", domain, exc)
54
+ except Exception as e:
55
+ self.logger.debug("Error extracting domain or checking builtin rules: %s", e)
56
+
57
+ # 检查 URL 类型模式
58
+ if not html_type and base_url:
59
+ for pattern, type in URL_PATTERNS_TO_HTML_TYPE.items():
60
+ if pattern in base_url:
61
+ html_type = type
62
+ break
63
+
64
+ # 使用用户配置的规则
65
+ if netloc in self.rule:
66
+ try:
67
+ new_kwargs = dict()
68
+ new_kwargs["rule"] = self.rule[netloc]
69
+ new_kwargs.update(kwargs)
70
+ return self._run_extractor(CustomParser, html, new_kwargs, w3m_path=current_w3m_path)
71
+ except Exception as exc:
72
+ self.logger.debug("Custom extractor failed for %s: %s", netloc, exc)
73
+
74
+ # 根据 html_type 选择提取模式
75
+ if html_type == "forum":
76
+ return self._run_extractor(ForumParser, html, kwargs, w3m_path=current_w3m_path)
77
+ if html_type == "article":
78
+ return self._run_extractor(ArticleParser, html, kwargs, w3m_path=current_w3m_path)
79
+ if html_type == "unified":
80
+ return self._run_extractor(UnifiedParser, html, kwargs, w3m_path=current_w3m_path)
81
+
82
+ # 默认使用统一模式
83
+ return self._run_extractor(UnifiedParser, html, kwargs, w3m_path=current_w3m_path)
84
+
85
+ def _quick_check_builtin_rules(self, url: str) -> bool:
86
+ if not url:
87
+ return False
88
+ url_lower = url.lower()
89
+ for domain in BUILTIN_SITE_RULES:
90
+ if domain in url_lower:
91
+ return True
92
+ return False
93
+
94
+ def _run_extractor(self, extractor_cls: Type, html: str, kwargs: dict, w3m_path: str):
95
+ result = extractor_cls().extract(html=html, **dict(kwargs))
96
+ return self._apply_w3m(result, w3m_path=w3m_path)
97
+
98
+ def _apply_w3m(self, result: Optional[dict], w3m_path: str) -> Optional[dict]:
99
+ if not result:
100
+ return result
101
+ html_fragment = result.get("html")
102
+ if not html_fragment:
103
+ raise RuntimeError("Extraction result does not contain 'html' for w3m")
104
+ text = run_w3m_dump(html_fragment, w3m_path)
105
+ enriched = dict(result)
106
+ enriched["text"] = text
107
+ enriched["w3m_text"] = text
108
+ enriched["text_length"] = text_len(text)
109
+ return enriched
ultradata_math_parser/config.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ Unique_ID = "ultradata_math_parser_id_internal"
4
+
5
+ PAYWALL_DISCARD_XPATH = [
6
+ """.//*[(self::div or self::p)][
7
+ contains(@id, "paywall") or contains(@id, "premium") or
8
+ contains(@class, "paid-content") or contains(@class, "paidcontent") or
9
+ contains(@class, "obfuscated") or contains(@class, "blurred") or
10
+ contains(@class, "restricted") or contains(@class, "overlay")
11
+ ]""",
12
+ ]
13
+
14
+ OVERALL_DISCARD_XPATH = [
15
+ # navigation + footers, news outlets related posts, sharing, jp-post-flair jp-relatedposts
16
+ """.//*[(self::div or self::item or self::ul
17
+ or self::p or self::section or self::span)][
18
+ contains(translate(@id, "F","f"), "footer") or contains(translate(@class, "F","f"), "footer")
19
+ or contains(@id, "related") or contains(translate(@class, "R", "r"), "related") or
20
+ contains(@id, "viral") or contains(@class, "viral") or
21
+ starts-with(@id, "shar") or starts-with(@class, "shar") or
22
+ contains(@class, "share-") or
23
+ contains(translate(@id, "S", "s"), "share") or
24
+ contains(@id, "social") or contains(@class, "social") or contains(@class, "sociable") or
25
+ contains(@id, "syndication") or contains(@class, "syndication") or
26
+ starts-with(@id, "jp-") or starts-with(@id, "dpsp-content") or
27
+ contains(@class, "embedded") or contains(@class, "embed")
28
+ or contains(@id, "newsletter") or contains(@class, "newsletter")
29
+ or contains(@class, "subnav") or
30
+ contains(@id, "cookie") or contains(@class, "cookie") or contains(@id, "tags")
31
+ or contains(@class, "tags") or contains(@id, "sidebar") or
32
+ contains(@class, "sidebar") or contains(@id, "banner") or contains(@class, "banner")
33
+ or contains(@class, "meta") or
34
+ contains(@id, "menu") or contains(@class, "menu") or
35
+ contains(translate(@id, "N", "n"), "nav") or contains(translate(@role, "N", "n"), "nav")
36
+ or starts-with(@class, "nav") or contains(translate(@class, "N", "n"), "navigation") or
37
+ contains(@class, "navbar") or contains(@class, "navbox") or starts-with(@class, "post-nav")
38
+ or contains(@id, "breadcrumb") or contains(@class, "breadcrumb") or
39
+ contains(@id, "bread-crumb") or contains(@class, "bread-crumb") or
40
+ contains(@id, "author") or contains(@class, "author") or
41
+ contains(@id, "button") or contains(@class, "button")
42
+ or contains(translate(@class, "B", "b"), "byline")
43
+ or contains(@class, "rating") or starts-with(@class, "widget") or
44
+ contains(@class, "attachment") or contains(@class, "timestamp") or
45
+ contains(@class, "user-info") or contains(@class, "user-profile") or
46
+ contains(@class, "-ad-") or contains(@class, "-icon")
47
+ or contains(@class, "article-infos") or
48
+ contains(translate(@class, "I", "i"), "infoline")
49
+ or contains(@data-component, "MostPopularStories")
50
+ or contains(@class, "outbrain") or contains(@class, "taboola")
51
+ or contains(@class, "criteo") or contains(@class, "options")
52
+ or contains(@class, "consent") or contains(@class, "modal-content")
53
+ or contains(@class, "paid-content") or contains(@class, "paidcontent")
54
+ or contains(@id, "premium-") or contains(@id, "paywall")
55
+ or contains(@class, "obfuscated") or contains(@class, "blurred")
56
+ or contains(@class, " ad ")
57
+ or contains(@class, "next-post")
58
+ or contains(@class, "yin") or contains(@class, "zlylin") or
59
+ contains(@class, "xg1") or contains(@id, "bmdh")
60
+ or @data-lp-replacement-content]""",
61
+ # hidden parts
62
+ """.//*[starts-with(@class, "hide-") or contains(@class, "hide-print") or contains(@id, "hidden")
63
+ or contains(@style, "hidden") or contains(@hidden, "hidden") or contains(@class, "noprint")
64
+ or contains(@style, "display:none") or contains(@class, " hidden") or @aria-hidden="true"
65
+ or contains(@class, "notloaded")]""",
66
+ # comment debris
67
+ # or contains(@class, "message-container") or contains(@id, "message_container")
68
+ """.//*[@class="comments-title" or contains(@class, "comments-title") or
69
+ contains(@class, "nocomments") or starts-with(@id, "reply-") or starts-with(@class, "reply-") or
70
+ contains(@class, "-reply-") or contains(@class, "message") or contains(@id, "message_container")
71
+ or contains(@id, "akismet") or contains(@class, "akismet")] """,
72
+ ]
73
+
74
+ TEASER_DISCARD_XPATH = [
75
+ """.//*[(self::div or self::item or self::ul
76
+ or self::p or self::section or self::span)][
77
+ contains(translate(@id, "T", "t"), "teaser") or contains(translate(@class, "T", "t"), "teaser")
78
+ ]""",
79
+ ]
80
+
81
+ PRECISION_DISCARD_XPATH = [
82
+ ".//header",
83
+ """.//*[(self::div or self::item or self::ul
84
+ or self::p or self::section or self::span)][
85
+ contains(@id, "bottom") or contains(@class, "bottom") or
86
+ contains(@id, "link") or contains(@class, "link")
87
+ or contains(@style, "border")
88
+ ]""",
89
+ ]
90
+
91
+ DISCARD_IMAGE_ELEMENTS = [
92
+ """.//*[(self::div or self::item or self::ul
93
+ or self::p or self::section or self::span)][
94
+ contains(@id, "caption") or contains(@class, "caption")
95
+ ]
96
+ """
97
+ ]
98
+
99
+ REMOVE_COMMENTS_XPATH = [
100
+ """.//*[(self::div or self::ul or self::section)][
101
+ starts-with(translate(@id, "C","c"), 'comment') or
102
+ starts-with(translate(@class, "C","c"), 'comment') or starts-with(translate(@name, "C","c"), 'comment') or
103
+ contains(@class, 'article-comments') or contains(@class, 'post-comments')
104
+ or starts-with(@id, 'comol') or starts-with(@id, 'disqus_thread')
105
+ or starts-with(@id, 'dsq-comments')
106
+ ]"""
107
+ ]
108
+
109
+ CONTENT_EXTRACTOR_NOISE_XPATHS = [
110
+ # '//div[contains(@class, "comment") or contains(@name, "comment") or contains(@id, "comment")]',
111
+ '//div[starts-with(@class, "advert") or starts-with(@name, "advert") or starts-with(@id, "advert")]',
112
+ '//div[contains(@style, "display: none")]',
113
+ '//div[contains(@style, "display:none")]',
114
+ ]
115
+
116
+ # 保留图片,音频,视频
117
+ MANUALLY_CLEANED = [
118
+ "aside",
119
+ "embed",
120
+ "footer",
121
+ "head",
122
+ "iframe",
123
+ "menu",
124
+ "object",
125
+ "script",
126
+ "applet",
127
+ "canvas",
128
+ "map",
129
+ "svg",
130
+ "area",
131
+ "blink",
132
+ "button",
133
+ "datalist",
134
+ "dialog",
135
+ "frame",
136
+ "frameset",
137
+ "fieldset",
138
+ "hr",
139
+ "link",
140
+ "input",
141
+ "ins",
142
+ "label",
143
+ "legend",
144
+ "marquee",
145
+ "menuitem",
146
+ "nav",
147
+ "noscript",
148
+ "optgroup",
149
+ "option",
150
+ "output",
151
+ "param",
152
+ "progress",
153
+ "rp",
154
+ "rt",
155
+ "rtc",
156
+ "select",
157
+ "style",
158
+ "track",
159
+ "textarea",
160
+ "time",
161
+ "use",
162
+ ]
163
+
164
+ MANUALLY_STRIPPED = [
165
+ "abbr",
166
+ "acronym",
167
+ "address",
168
+ "bdi",
169
+ "bdo",
170
+ "big",
171
+ "cite",
172
+ "data",
173
+ "dfn",
174
+ "font",
175
+ "hgroup",
176
+ "ins",
177
+ "mark",
178
+ "meta",
179
+ "ruby",
180
+ "small",
181
+ "tbody",
182
+ "template",
183
+ "tfoot",
184
+ "thead",
185
+ ]
186
+
187
+ CUT_EMPTY_ELEMS = {
188
+ "article",
189
+ "b",
190
+ "blockquote",
191
+ "dd",
192
+ "div",
193
+ "dt",
194
+ "em",
195
+ "h1",
196
+ "h2",
197
+ "h3",
198
+ "h4",
199
+ "h5",
200
+ "h6",
201
+ "i",
202
+ "li",
203
+ "main",
204
+ "p",
205
+ "pre",
206
+ "q",
207
+ "section",
208
+ "span",
209
+ "strong",
210
+ }
211
+
212
+ USELESS_ATTR = [
213
+ "share",
214
+ "contribution",
215
+ "copyright",
216
+ "copy-right",
217
+ "disclaimer",
218
+ "recommend",
219
+ "related",
220
+ "footer",
221
+ "social",
222
+ "submeta",
223
+ "report-infor",
224
+ ]
225
+
226
+ BODY_XPATH = [
227
+ """.//*[(self::article or self::div or self::main or self::section)][
228
+ @class="post" or @class="entry" or
229
+ contains(@class, "post-text") or contains(@class, "post_text") or
230
+ contains(@class, "post-body") or contains(@class, "post-entry") or contains(@class, "postentry") or
231
+ contains(@class, "post-content") or contains(@class, "post_content") or
232
+ contains(@class, "postcontent") or contains(@class, "postContent") or
233
+ contains(@class, "article-text") or contains(@class, "articletext") or contains(@class, "articleText")
234
+ or contains(@id, "entry-content") or
235
+ contains(@class, "entry-content") or contains(@id, "article-content") or
236
+ contains(@class, "article-content") or contains(@id, "article__content") or
237
+ contains(@class, "article__content") or contains(@id, "article-body") or
238
+ contains(@class, "article-body") or contains(@id, "article__body") or
239
+ contains(@class, "article__body") or @itemprop="articleBody" or
240
+ contains(translate(@id, "B", "b"), "articlebody") or contains(translate(@class, "B", "b"), "articlebody")
241
+ or @id="articleContent" or contains(@class, "ArticleContent") or
242
+ contains(@class, "page-content") or contains(@class, "text-content") or
243
+ contains(@id, "body-text") or contains(@class, "body-text") or contains(@class, "body-content") or contains(translate(@class, "B", "b"), "textbody") or
244
+ contains(@class, "article__container") or contains(@id, "art-content") or contains(@class, "art-content")][1]""",
245
+ "(.//article)[1]",
246
+ """(.//*[(self::article or self::div or self::main or self::section)][
247
+ contains(@class, 'post-bodycopy') or
248
+ contains(@class, 'storycontent') or contains(@class, 'story-content') or
249
+ @class='postarea' or @class='art-postcontent' or
250
+ contains(@class, 'theme-content') or contains(@class, 'blog-content') or
251
+ contains(@class, 'section-content') or contains(@class, 'single-content') or
252
+ contains(@class, 'single-post') or
253
+ contains(@class, 'main-column') or contains(@class, 'wpb_text_column') or
254
+ starts-with(@id, 'primary') or starts-with(@class, 'article ') or @class="text" or
255
+ @id="article" or @class="cell" or @id="story" or @class="story" or
256
+ contains(@class, "story-body") or contains(@class, "field-body") or
257
+ contains(translate(@class, "FULTEX","fultex"), "fulltext")
258
+ or @role='article'])[1]""",
259
+ """(.//*[(self::article or self::div or self::main or self::section)][
260
+ contains(@id, "content-main") or contains(@class, "content-main") or contains(@class, "content_main") or
261
+ contains(@id, "content-body") or contains(@class, "content-body") or contains(@id, "contentBody")
262
+ or contains(@class, "content__body") or contains(translate(@id, "CM","cm"), "main-content") or contains(translate(@class, "CM","cm"), "main-content")
263
+ or contains(translate(@class, "CP","cp"), "page-content") or
264
+ @id="content" or @class="content"])[1]""",
265
+ '(.//*[(self::article or self::div or self::section)][starts-with(@class, "main") or starts-with(@id, "main") or starts-with(@role, "main")])[1]|(.//main)[1]',
266
+ ]
267
+
268
+ Forum_XPATH = [
269
+ """.//*[(self::article or self::div or self::main or self::section or self::li or self::tr)][
270
+ contains(@id, 'question') or contains(@class, 'question')]""",
271
+ """.//*[(self::article or self::div or self::main or self::section or self::li or self::tr)][
272
+ contains(@id, 'answer') or contains(@class, 'answer')]""",
273
+ """.//*[(self::article or self::div or self::main or self::section or self::li or self::tr)][
274
+ contains(@id, 'comment') or contains(@class, 'comment') or contains(@class, 'Comment')]""",
275
+ """.//*[(self::article or self::div or self::main or self::section or self::li or self::tr)][contains(@class, "message-container") or contains(@id, "message_container") or contains(@class, "Messages_container")]""",
276
+ """.//*[(self::article or self::div or self::main or self::section or self::p or self::span or self::li or self::tr)][
277
+ contains(@id, 'comment-content') or contains(@class, 'comment-content') or contains(@class, 'comment-body') or contains(@class, 'comment-body') or contains(@class, "post-reply") or contains(@class, "reply_content") or contains(@class, "reply-content") or contains(@class, "reply_post") or contains(@class, "post-reply") or contains(@id, "reply") or contains(@class, "post-text") or contains(@class, "post_text") or
278
+ contains(@class, "post-body") or contains(@class, "postbody") or contains(@class, "post-entry") or contains(@class, "postentry") or contains(@component, 'post') or
279
+ contains(@class, "post-content") or contains(@class, "post_content") or contains(@class, "p_content") or contains(@class, "Post_content") or contains(@class, "message-post") or contains(@class, "js-post")]""",
280
+ # id 包含post-加数字组成的形式
281
+ """.//*[(self::article or self::div or self::main or self::section or self::p or self::span or self::li or self::tr)][contains(@id, 'post-') or contains(@id, 'post_')]"""
282
+ ]
283
+
284
+ METAS = [
285
+ '//meta[starts-with(@property, "og:title")]/@content',
286
+ '//meta[starts-with(@name, "og:title")]/@content',
287
+ '//meta[starts-with(@property, "title")]/@content',
288
+ '//meta[starts-with(@name, "title")]/@content',
289
+ '//meta[starts-with(@property, "page:title")]/@content',
290
+ '//meta[starts-with(@name, "page:title")]/@content',
291
+ ]
292
+ URL_PATTERNS_TO_HTML_TYPE = {
293
+ }
294
+
295
+ # 内置的网站适配规则(根据 URL 模式匹配,使用 CustomParser)
296
+ BUILTIN_SITE_RULES = {
297
+ # answers.com 系列网站适配
298
+ "answers.com": {
299
+ "clean": [
300
+ "//script",
301
+ "//style",
302
+ ],
303
+ "title": {
304
+ "mode": "xpath",
305
+ "value": "//h1[@property='name']//text() | //h1[contains(@class, 'headline1')]//text()"
306
+ },
307
+ "content": {
308
+ "mode": "xpath",
309
+ # 只提取答案内容
310
+ "value": "//div[@property='content'] | //div[contains(@class, 'markdownStyles')]"
311
+ }
312
+ },
313
+ }
314
+
315
+ SCORING_WEIGHTS = {
316
+ "content_length": 1.0,
317
+ "paragraph_quality": 0.0,
318
+ "link_density": 0.0,
319
+ "text_density": 0.0,
320
+ "punctuation_density": 0.0,
321
+ "structure_completeness": 0.0,
322
+ "xpath_confidence": 0.0,
323
+ "noise_elements": 0.0,
324
+ "code_block_quality": 0.0,
325
+ "list_structure": 0.0,
326
+ }
327
+
328
+ SCORE_THRESHOLDS = {
329
+ "min_acceptable_score": 3.0,
330
+ "similar_threshold": 0.5,
331
+ }
332
+
ultradata_math_parser/mmltex/cmarkup.xsl ADDED
@@ -0,0 +1,1093 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet
3
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
4
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
5
+ version='1.0'>
6
+
7
+ <!-- ====================================================================== -->
8
+ <!-- $id: tokens.xsl, 2002/22/11 Exp $
9
+ This file is part of the XSLT MathML Library distribution.
10
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
11
+ copyright and other information -->
12
+ <!-- ====================================================================== -->
13
+
14
+ <!-- 4.4.1.1 cn -->
15
+ <xsl:template match="m:cn"><xsl:apply-templates/></xsl:template>
16
+
17
+ <xsl:template match="m:cn[@type='complex-cartesian']">
18
+ <xsl:apply-templates select="text()[1]"/>
19
+ <xsl:text>+</xsl:text>
20
+ <xsl:apply-templates select="text()[2]"/>
21
+ <xsl:text>i</xsl:text>
22
+ </xsl:template>
23
+
24
+ <xsl:template match="m:cn[@type='rational']">
25
+ <xsl:apply-templates select="text()[1]"/>
26
+ <xsl:text>/</xsl:text>
27
+ <xsl:apply-templates select="text()[2]"/>
28
+ </xsl:template>
29
+
30
+ <xsl:template match="m:cn[@type='integer' and @base!=10]">
31
+ <xsl:apply-templates/>
32
+ <xsl:text>_{</xsl:text><xsl:value-of select="@base"/><xsl:text>}</xsl:text>
33
+ </xsl:template>
34
+
35
+ <xsl:template match="m:cn[@type='complex-polar']">
36
+ <xsl:apply-templates select="text()[1]"/>
37
+ <xsl:text>e^{i </xsl:text>
38
+ <xsl:apply-templates select="text()[2]"/>
39
+ <xsl:text>}</xsl:text>
40
+ </xsl:template>
41
+
42
+ <xsl:template match="m:cn[@type='e-notation']">
43
+ <xsl:apply-templates select="text()[1]"/>
44
+ <xsl:text>E</xsl:text>
45
+ <xsl:apply-templates select="text()[2]"/>
46
+ </xsl:template>
47
+
48
+ <!-- 4.4.1.1 ci 4.4.1.2 csymbol -->
49
+ <xsl:template match="m:ci | m:csymbol">
50
+ <xsl:choose>
51
+ <xsl:when test="string-length(normalize-space(text()))>1">
52
+ <xsl:text>\mathrm{</xsl:text><xsl:apply-templates/><xsl:text>}</xsl:text>
53
+ </xsl:when>
54
+ <xsl:otherwise><xsl:apply-templates/></xsl:otherwise>
55
+ </xsl:choose>
56
+ </xsl:template>
57
+
58
+ <!-- 4.4.2.1 apply 4.4.2.2 reln -->
59
+ <xsl:template match="m:apply | m:reln">
60
+ <xsl:apply-templates select="*[1]">
61
+ <!-- <? -->
62
+ <xsl:with-param name="p" select="10"/>
63
+ </xsl:apply-templates>
64
+ <!-- ?> -->
65
+ <xsl:text>(</xsl:text>
66
+ <xsl:for-each select="*[position()>1]">
67
+ <xsl:apply-templates select="."/>
68
+ <xsl:if test="not(position()=last())"><xsl:text>, </xsl:text></xsl:if>
69
+ </xsl:for-each>
70
+ <xsl:text>)</xsl:text>
71
+ </xsl:template>
72
+
73
+ <!-- 4.4.2.3 fn -->
74
+ <xsl:template match="m:fn[m:apply[1]]"> <!-- for m:fn using default rule -->
75
+ <xsl:text>(</xsl:text><xsl:apply-templates/><xsl:text>)</xsl:text>
76
+ </xsl:template>
77
+
78
+ <!-- 4.4.2.4 interval -->
79
+ <xsl:template match="m:interval[*[2]]">
80
+ <xsl:choose>
81
+ <xsl:when test="@closure='open' or @closure='open-closed'">
82
+ <xsl:text>\left(</xsl:text>
83
+ </xsl:when>
84
+ <xsl:otherwise><xsl:text>\left[</xsl:text></xsl:otherwise>
85
+ </xsl:choose>
86
+ <xsl:apply-templates select="*[1]"/>
87
+ <xsl:text> , </xsl:text>
88
+ <xsl:apply-templates select="*[2]"/>
89
+ <xsl:choose>
90
+ <xsl:when test="@closure='open' or @closure='closed-open'">
91
+ <xsl:text>\right)</xsl:text>
92
+ </xsl:when>
93
+ <xsl:otherwise><xsl:text>\right]</xsl:text></xsl:otherwise>
94
+ </xsl:choose>
95
+ </xsl:template>
96
+
97
+ <xsl:template match="m:interval">
98
+ <xsl:text>\left\{</xsl:text><xsl:apply-templates/><xsl:text>\right\}</xsl:text>
99
+ </xsl:template>
100
+
101
+ <!-- 4.4.2.5 inverse -->
102
+ <xsl:template match="m:apply[*[1][self::m:inverse]]">
103
+ <xsl:apply-templates select="*[2]"/><xsl:text>^{(-1)}</xsl:text>
104
+ </xsl:template>
105
+
106
+ <!-- 4.4.2.6 sep 4.4.2.7 condition -->
107
+ <xsl:template match="m:sep | m:condition"><xsl:apply-templates/></xsl:template>
108
+
109
+ <!-- 4.4.2.9 lambda -->
110
+ <xsl:template match="m:lambda">
111
+ <xsl:text>\mathrm{lambda}\: </xsl:text>
112
+ <xsl:apply-templates select="m:bvar/*"/>
113
+ <xsl:text>.\: </xsl:text>
114
+ <xsl:apply-templates select="*[last()]"/>
115
+ </xsl:template>
116
+
117
+ <!-- 4.4.2.10 compose -->
118
+ <xsl:template match="m:apply[*[1][self::m:compose]]">
119
+ <xsl:param name="p" select="0"/>
120
+ <xsl:call-template name="infix">
121
+ <xsl:with-param name="this-p" select="1"/>
122
+ <xsl:with-param name="p" select="$p"/>
123
+ <xsl:with-param name="mo">\circ </xsl:with-param>
124
+ </xsl:call-template>
125
+ </xsl:template>
126
+
127
+ <!-- 4.4.2.11 ident -->
128
+ <xsl:template match="m:ident"><xsl:text>\mathrm{id}</xsl:text></xsl:template>
129
+
130
+ <!-- 4.4.2.12 domain 4.4.2.13 codomain 4.4.2.14 image 4.4.3.21 arg 4.4.3.24 lcm
131
+ 4.4.5.9 grad 4.4.5.10 curl 4.4.9.4 median 4.4.9.5 mode-->
132
+ <xsl:template match="m:domain | m:codomain | m:image | m:arg | m:lcm | m:grad |
133
+ m:curl | m:median | m:mode">
134
+ <xsl:text>\mathop{\mathrm{</xsl:text>
135
+ <xsl:value-of select="local-name()"/>
136
+ <xsl:text>}}</xsl:text>
137
+ </xsl:template>
138
+
139
+ <!-- 4.4.2.15 domainofapplication -->
140
+ <xsl:template match="m:domainofapplication"/>
141
+
142
+ <!-- 4.4.2.16 piecewise -->
143
+ <xsl:template match="m:piecewise">
144
+ <xsl:text>\begin{cases}</xsl:text>
145
+ <xsl:apply-templates select="m:piece"/>
146
+ <xsl:apply-templates select="m:otherwise"/>
147
+ <xsl:text>\end{cases}</xsl:text>
148
+ </xsl:template>
149
+
150
+ <xsl:template match="m:piece">
151
+ <xsl:apply-templates select="*[1]"/>
152
+ <xsl:text> &amp; \text{if $</xsl:text>
153
+ <xsl:apply-templates select="*[2]"/>
154
+ <xsl:text>$}</xsl:text>
155
+ <xsl:if test="not(position()=last()) or ../m:otherwise"><xsl:text>\\ </xsl:text></xsl:if>
156
+ </xsl:template>
157
+
158
+ <xsl:template match="m:otherwise">
159
+ <xsl:apply-templates select="*[1]"/>
160
+ <xsl:text> &amp; \text{otherwise}</xsl:text>
161
+ </xsl:template>
162
+
163
+ <!-- 4.4.3.1 quotient -->
164
+ <xsl:template match="m:apply[*[1][self::m:quotient]]">
165
+ <xsl:text>\left\lfloor\frac{</xsl:text>
166
+ <xsl:apply-templates select="*[2]"/>
167
+ <xsl:text>}{</xsl:text>
168
+ <xsl:apply-templates select="*[3]"/>
169
+ <xsl:text>}\right\rfloor </xsl:text>
170
+ </xsl:template>
171
+
172
+ <!-- 4.4.3.2 factorial -->
173
+ <xsl:template match="m:apply[*[1][self::m:factorial]]">
174
+ <xsl:apply-templates select="*[2]">
175
+ <xsl:with-param name="p" select="7"/>
176
+ </xsl:apply-templates>
177
+ <xsl:text>!</xsl:text>
178
+ </xsl:template>
179
+
180
+ <!-- 4.4.3.3 divide -->
181
+ <xsl:template match="m:apply[*[1][self::m:divide]]">
182
+ <xsl:param name="p" select="0"/>
183
+ <xsl:param name="this-p" select="3"/>
184
+ <xsl:if test="$this-p &lt; $p"><xsl:text>\left(</xsl:text></xsl:if>
185
+ <xsl:text>\frac{</xsl:text>
186
+ <xsl:apply-templates select="*[2]"/>
187
+ <!-- <xsl:with-param name="p" select="$this-p"/>
188
+ </xsl:apply-templates>-->
189
+ <xsl:text>}{</xsl:text>
190
+ <xsl:apply-templates select="*[3]"/>
191
+ <!-- <xsl:with-param name="p" select="$this-p"/>
192
+ </xsl:apply-templates>-->
193
+ <xsl:text>}</xsl:text>
194
+ <xsl:if test="$this-p &lt; $p"><xsl:text>\right)</xsl:text></xsl:if>
195
+ </xsl:template>
196
+
197
+ <!-- 4.4.3.4 max min -->
198
+ <xsl:template match="m:apply[*[1][self::m:max or self::m:min]]">
199
+ <xsl:text>\</xsl:text>
200
+ <xsl:value-of select="local-name(*[1])"/>
201
+ <xsl:text>\{</xsl:text>
202
+ <xsl:choose>
203
+ <xsl:when test="m:condition">
204
+ <xsl:apply-templates select="*[last()]"/>
205
+ <xsl:text>, </xsl:text>
206
+ <xsl:apply-templates select="m:condition/node()"/>
207
+ </xsl:when>
208
+ <xsl:otherwise>
209
+ <xsl:for-each select="*[position() &gt; 1]">
210
+ <xsl:apply-templates select="."/>
211
+ <xsl:if test="position() !=last()"><xsl:text> , </xsl:text></xsl:if>
212
+ </xsl:for-each>
213
+ </xsl:otherwise>
214
+ </xsl:choose>
215
+ <xsl:text>\}</xsl:text>
216
+ </xsl:template>
217
+
218
+ <!-- 4.4.3.5 minus-->
219
+ <xsl:template match="m:apply[*[1][self::m:minus] and count(*)=2]">
220
+ <xsl:text>-</xsl:text>
221
+ <xsl:apply-templates select="*[2]">
222
+ <xsl:with-param name="p" select="5"/>
223
+ </xsl:apply-templates>
224
+ </xsl:template>
225
+
226
+ <xsl:template match="m:apply[*[1][self::m:minus] and count(*)&gt;2]">
227
+ <xsl:param name="p" select="0"/>
228
+ <xsl:call-template name="binary">
229
+ <xsl:with-param name="mo">-</xsl:with-param>
230
+ <xsl:with-param name="p" select="$p"/>
231
+ <xsl:with-param name="this-p" select="2"/>
232
+ </xsl:call-template>
233
+ </xsl:template>
234
+
235
+ <!-- 4.4.3.6 plus-->
236
+ <xsl:template match="m:apply[*[1][self::m:plus]]">
237
+ <xsl:param name="p" select="0"/>
238
+ <xsl:if test="$p &gt; 2">
239
+ <xsl:text>(</xsl:text>
240
+ </xsl:if>
241
+ <xsl:for-each select="*[position()&gt;1]">
242
+ <xsl:if test="position() &gt; 1">
243
+ <xsl:choose>
244
+ <xsl:when test="self::m:apply[*[1][self::m:times] and
245
+ *[2][self::m:apply/*[1][self::m:minus] or self::m:cn[not(m:sep) and
246
+ (number(.) &lt; 0)]]]">-</xsl:when>
247
+ <xsl:otherwise>+</xsl:otherwise>
248
+ </xsl:choose>
249
+ </xsl:if>
250
+ <xsl:choose>
251
+ <xsl:when test="self::m:apply[*[1][self::m:times] and
252
+ *[2][self::m:cn[not(m:sep) and (number(.) &lt;0)]]]">
253
+ <xsl:value-of select="-(*[2])"/>
254
+ <xsl:apply-templates select=".">
255
+ <xsl:with-param name="first" select="2"/>
256
+ <xsl:with-param name="p" select="2"/>
257
+ </xsl:apply-templates>
258
+ </xsl:when>
259
+ <xsl:when test="self::m:apply[*[1][self::m:times] and
260
+ *[2][self::m:apply/*[1][self::m:minus]]]">
261
+ <xsl:apply-templates select="./*[2]/*[2]"/>
262
+ <xsl:apply-templates select=".">
263
+ <xsl:with-param name="first" select="2"/>
264
+ <xsl:with-param name="p" select="2"/>
265
+ </xsl:apply-templates>
266
+ </xsl:when>
267
+ <xsl:otherwise>
268
+ <xsl:apply-templates select=".">
269
+ <xsl:with-param name="p" select="2"/>
270
+ </xsl:apply-templates>
271
+ </xsl:otherwise>
272
+ </xsl:choose>
273
+ </xsl:for-each>
274
+ <xsl:if test="$p &gt; 2">
275
+ <xsl:text>)</xsl:text>
276
+ </xsl:if>
277
+ </xsl:template>
278
+
279
+ <!-- 4.4.3.7 power -->
280
+ <xsl:template match="m:apply[*[1][self::m:power]]">
281
+ <xsl:apply-templates select="*[2]">
282
+ <xsl:with-param name="p" select="5"/>
283
+ </xsl:apply-templates>
284
+ <xsl:text>^{</xsl:text>
285
+ <xsl:apply-templates select="*[3]">
286
+ <xsl:with-param name="p" select="5"/>
287
+ </xsl:apply-templates>
288
+ <xsl:text>}</xsl:text>
289
+ </xsl:template>
290
+
291
+ <!-- 4.4.3.8 remainder -->
292
+ <xsl:template match="m:apply[*[1][self::m:rem]]">
293
+ <xsl:param name="p" select="0"/>
294
+ <xsl:call-template name="binary">
295
+ <xsl:with-param name="mo">\mod </xsl:with-param>
296
+ <xsl:with-param name="p" select="$p"/>
297
+ <xsl:with-param name="this-p" select="3"/>
298
+ </xsl:call-template>
299
+ </xsl:template>
300
+
301
+ <!-- 4.4.3.9 times-->
302
+ <xsl:template match="m:apply[*[1][self::m:times]]" name="times">
303
+ <xsl:param name="p" select="0"/>
304
+ <xsl:param name="first" select="1"/>
305
+ <xsl:if test="$p &gt; 3"><xsl:text>(</xsl:text></xsl:if>
306
+ <xsl:for-each select="*[position()&gt;1]">
307
+ <xsl:if test="position() &gt; 1">
308
+ <xsl:choose>
309
+ <xsl:when test="self::m:cn">\times <!-- times --></xsl:when>
310
+ <xsl:otherwise><!--invisible times--></xsl:otherwise>
311
+ </xsl:choose>
312
+ </xsl:if>
313
+ <xsl:if test="position()&gt;= $first">
314
+ <xsl:apply-templates select=".">
315
+ <xsl:with-param name="p" select="3"/>
316
+ </xsl:apply-templates>
317
+ </xsl:if>
318
+ </xsl:for-each>
319
+ <xsl:if test="$p &gt; 3"><xsl:text>)</xsl:text></xsl:if>
320
+ </xsl:template>
321
+
322
+ <!-- 4.4.3.10 root -->
323
+ <xsl:template match="m:apply[*[1][self::m:root]]">
324
+ <xsl:text>\sqrt</xsl:text>
325
+ <xsl:if test="m:degree!=2">
326
+ <xsl:text>[</xsl:text>
327
+ <xsl:apply-templates select="m:degree/*"/>
328
+ <xsl:text>]</xsl:text>
329
+ </xsl:if>
330
+ <xsl:text>{</xsl:text>
331
+ <xsl:apply-templates select="*[position()&gt;1 and not(self::m:degree)]"/>
332
+ <xsl:text>}</xsl:text>
333
+ </xsl:template>
334
+
335
+ <!-- 4.4.3.11 gcd -->
336
+ <xsl:template match="m:gcd"><xsl:text>\gcd </xsl:text></xsl:template>
337
+
338
+ <!-- 4.4.3.12 and -->
339
+ <xsl:template match="m:apply[*[1][self::m:and]]">
340
+ <xsl:param name="p" select="0"/>
341
+ <xsl:call-template name="infix">
342
+ <xsl:with-param name="this-p" select="2"/>
343
+ <xsl:with-param name="p" select="$p"/>
344
+ <xsl:with-param name="mo">\land <!-- and --></xsl:with-param>
345
+ </xsl:call-template>
346
+ </xsl:template>
347
+
348
+ <!-- 4.4.3.13 or -->
349
+ <xsl:template match="m:apply[*[1][self::m:or]]">
350
+ <xsl:param name="p" select="0"/>
351
+ <xsl:call-template name="infix">
352
+ <xsl:with-param name="this-p" select="3"/>
353
+ <xsl:with-param name="p" select="$p"/>
354
+ <xsl:with-param name="mo">\lor </xsl:with-param>
355
+ </xsl:call-template>
356
+ </xsl:template>
357
+
358
+ <!-- 4.4.3.14 xor -->
359
+ <xsl:template match="m:apply[*[1][self::m:xor]]">
360
+ <xsl:param name="p" select="0"/>
361
+ <xsl:call-template name="infix">
362
+ <xsl:with-param name="this-p" select="3"/>
363
+ <xsl:with-param name="p" select="$p"/>
364
+ <xsl:with-param name="mo">\mathop{\mathrm{xor}}</xsl:with-param>
365
+ </xsl:call-template>
366
+ </xsl:template>
367
+
368
+ <!-- 4.4.3.15 not -->
369
+ <xsl:template match="m:apply[*[1][self::m:not]]">
370
+ <xsl:text>\neg </xsl:text>
371
+ <xsl:apply-templates select="*[2]">
372
+ <xsl:with-param name="p" select="7"/>
373
+ </xsl:apply-templates>
374
+ </xsl:template>
375
+
376
+ <!-- 4.4.3.16 implies -->
377
+ <xsl:template match="m:apply[*[1][self::m:implies]]">
378
+ <xsl:param name="p" select="0"/>
379
+ <xsl:call-template name="binary">
380
+ <xsl:with-param name="mo">\implies </xsl:with-param>
381
+ <xsl:with-param name="p" select="$p"/>
382
+ <xsl:with-param name="this-p" select="3"/>
383
+ </xsl:call-template>
384
+ </xsl:template>
385
+
386
+ <!-- 4.4.3.17 forall 4.4.3.18 exists -->
387
+ <xsl:template match="m:apply[*[1][self::m:forall or self::m:exists]]">
388
+ <xsl:text>\</xsl:text>
389
+ <xsl:value-of select="local-name(*[1])"/>
390
+ <xsl:text> </xsl:text>
391
+ <xsl:apply-templates select="m:bvar"/>
392
+ <xsl:if test="m:condition">
393
+ <xsl:text>, </xsl:text><xsl:apply-templates select="m:condition"/>
394
+ </xsl:if>
395
+ <xsl:if test="*[last()][local-name()!='condition'][local-name()!='bvar']">
396
+ <xsl:text>\colon </xsl:text>
397
+ <xsl:apply-templates select="*[last()]"/>
398
+ </xsl:if>
399
+ </xsl:template>
400
+
401
+ <!-- 4.4.3.19 abs -->
402
+ <xsl:template match="m:apply[*[1][self::m:abs]]">
403
+ <xsl:text>\left|</xsl:text>
404
+ <xsl:apply-templates select="*[2]"/>
405
+ <xsl:text>\right|</xsl:text>
406
+ </xsl:template>
407
+
408
+ <!-- 4.4.3.20 conjugate -->
409
+ <xsl:template match="m:apply[*[1][self::m:conjugate]]">
410
+ <xsl:text>\overline{</xsl:text><xsl:apply-templates select="*[2]"/><xsl:text>}</xsl:text>
411
+ </xsl:template>
412
+
413
+ <!-- 4.4.3.22 real -->
414
+ <xsl:template match="m:real"><xsl:text>\Re </xsl:text></xsl:template>
415
+
416
+ <!-- 4.4.3.23 imaginary -->
417
+ <xsl:template match="m:imaginary"><xsl:text>\Im </xsl:text></xsl:template>
418
+
419
+ <!-- 4.4.3.25 floor -->
420
+ <xsl:template match="m:apply[*[1][self::m:floor]]">
421
+ <xsl:text>\lfloor </xsl:text>
422
+ <xsl:apply-templates select="*[2]"/>
423
+ <xsl:text>\rfloor </xsl:text>
424
+ </xsl:template>
425
+
426
+ <!-- 4.4.3.25 ceiling -->
427
+ <xsl:template match="m:apply[*[1][self::m:ceiling]]">
428
+ <xsl:text>\lceil </xsl:text>
429
+ <xsl:apply-templates select="*[2]"/>
430
+ <xsl:text>\rceil </xsl:text>
431
+ </xsl:template>
432
+
433
+ <!-- 4.4.4.1 eq -->
434
+ <xsl:template match="m:apply[*[1][self::m:eq]]">
435
+ <xsl:param name="p" select="0"/>
436
+ <xsl:call-template name="infix">
437
+ <xsl:with-param name="this-p" select="1"/>
438
+ <xsl:with-param name="p" select="$p"/>
439
+ <xsl:with-param name="mo">=</xsl:with-param>
440
+ </xsl:call-template>
441
+ </xsl:template>
442
+
443
+ <!-- 4.4.4.2 neq -->
444
+ <xsl:template match="m:apply[*[1][self::m:neq]]">
445
+ <xsl:param name="p" select="0"/>
446
+ <xsl:call-template name="infix">
447
+ <xsl:with-param name="this-p" select="1"/>
448
+ <xsl:with-param name="p" select="$p"/>
449
+ <xsl:with-param name="mo">\neq </xsl:with-param>
450
+ </xsl:call-template>
451
+ </xsl:template>
452
+
453
+ <!-- 4.4.4.3 gt -->
454
+ <xsl:template match="m:apply[*[1][self::m:gt]]">
455
+ <xsl:param name="p" select="0"/>
456
+ <xsl:call-template name="infix">
457
+ <xsl:with-param name="this-p" select="1"/>
458
+ <xsl:with-param name="p" select="$p"/>
459
+ <xsl:with-param name="mo">&gt; </xsl:with-param>
460
+ </xsl:call-template>
461
+ </xsl:template>
462
+
463
+ <!-- 4.4.4.4 lt -->
464
+ <xsl:template match="m:apply[*[1][self::m:lt]]">
465
+ <xsl:param name="p" select="0"/>
466
+ <xsl:call-template name="infix">
467
+ <xsl:with-param name="this-p" select="1"/>
468
+ <xsl:with-param name="p" select="$p"/>
469
+ <xsl:with-param name="mo">&lt; </xsl:with-param>
470
+ </xsl:call-template>
471
+ </xsl:template>
472
+
473
+ <!-- 4.4.4.5 geq -->
474
+ <xsl:template match="m:apply[*[1][self::m:geq]]">
475
+ <xsl:param name="p" select="0"/>
476
+ <xsl:call-template name="infix">
477
+ <xsl:with-param name="this-p" select="1"/>
478
+ <xsl:with-param name="p" select="$p"/>
479
+ <xsl:with-param name="mo">\ge </xsl:with-param>
480
+ </xsl:call-template>
481
+ </xsl:template>
482
+
483
+ <!-- 4.4.4.6 leq -->
484
+ <xsl:template match="m:apply[*[1][self::m:leq]]">
485
+ <xsl:param name="p" select="0"/>
486
+ <xsl:call-template name="infix">
487
+ <xsl:with-param name="this-p" select="1"/>
488
+ <xsl:with-param name="p" select="$p"/>
489
+ <xsl:with-param name="mo">\le </xsl:with-param>
490
+ </xsl:call-template>
491
+ </xsl:template>
492
+
493
+ <!-- 4.4.4.7 equivalent -->
494
+ <xsl:template match="m:apply[*[1][self::m:equivalent]]">
495
+ <xsl:param name="p" select="0"/>
496
+ <xsl:call-template name="infix">
497
+ <xsl:with-param name="this-p" select="1"/>
498
+ <xsl:with-param name="p" select="$p"/>
499
+ <xsl:with-param name="mo">\equiv </xsl:with-param>
500
+ </xsl:call-template>
501
+ </xsl:template>
502
+
503
+ <!-- 4.4.4.8 approx -->
504
+ <xsl:template match="m:apply[*[1][self::m:approx]]">
505
+ <xsl:param name="p" select="0"/>
506
+ <xsl:call-template name="infix">
507
+ <xsl:with-param name="this-p" select="1"/>
508
+ <xsl:with-param name="p" select="$p"/>
509
+ <xsl:with-param name="mo">\approx </xsl:with-param>
510
+ </xsl:call-template>
511
+ </xsl:template>
512
+
513
+ <!-- 4.4.4.9 factorof -->
514
+ <xsl:template match="m:apply[*[1][self::m:factorof]]">
515
+ <xsl:param name="p" select="0"/>
516
+ <xsl:call-template name="binary">
517
+ <xsl:with-param name="mo"> | </xsl:with-param>
518
+ <xsl:with-param name="p" select="$p"/>
519
+ <xsl:with-param name="this-p" select="3"/>
520
+ </xsl:call-template>
521
+ </xsl:template>
522
+
523
+ <!-- 4.4.5.1 int -->
524
+ <xsl:template match="m:apply[*[1][self::m:int]]">
525
+ <xsl:text>\int</xsl:text>
526
+ <xsl:if test="m:lowlimit/*|m:interval/*[1]|m:condition/*">
527
+ <xsl:text>_{</xsl:text>
528
+ <xsl:apply-templates select="m:lowlimit/*|m:interval/*[1]|m:condition/*"/>
529
+ <xsl:text>}</xsl:text>
530
+ </xsl:if>
531
+ <xsl:if test="m:uplimit/*|m:interval/*[2]">
532
+ <xsl:text>^{</xsl:text>
533
+ <xsl:apply-templates select="m:uplimit/*|m:interval/*[2]"/>
534
+ <xsl:text>}</xsl:text>
535
+ </xsl:if>
536
+ <xsl:text> </xsl:text>
537
+ <xsl:apply-templates select="*[last()]"/>
538
+ <xsl:text>\,d </xsl:text>
539
+ <xsl:apply-templates select="m:bvar"/>
540
+ </xsl:template>
541
+
542
+ <!-- 4.4.5.2 diff -->
543
+ <xsl:template match="m:apply[*[1][self::m:diff] and m:ci and count(*)=2]" priority="2">
544
+ <xsl:apply-templates select="*[2]"/>
545
+ <xsl:text>^\prime </xsl:text>
546
+ </xsl:template>
547
+
548
+ <xsl:template match="m:apply[*[1][self::m:diff]]" priority="1">
549
+ <xsl:text>\frac{</xsl:text>
550
+ <xsl:choose>
551
+ <xsl:when test="m:bvar/m:degree">
552
+ <xsl:text>d^{</xsl:text>
553
+ <xsl:apply-templates select="m:bvar/m:degree/node()"/>
554
+ <xsl:text>}</xsl:text>
555
+ <xsl:apply-templates select="*[last()]"/>
556
+ <xsl:text>}{d</xsl:text>
557
+ <xsl:apply-templates select="m:bvar/node()"/>
558
+ <xsl:text>^{</xsl:text>
559
+ <xsl:apply-templates select="m:bvar/m:degree/node()"/>
560
+ <xsl:text>}</xsl:text>
561
+ </xsl:when>
562
+ <xsl:otherwise>
563
+ <xsl:text>d </xsl:text>
564
+ <xsl:apply-templates select="*[last()]"/>
565
+ <xsl:text>}{d </xsl:text>
566
+ <xsl:apply-templates select="m:bvar"/>
567
+ <xsl:text>}</xsl:text>
568
+ </xsl:otherwise>
569
+ </xsl:choose>
570
+ <xsl:text>}</xsl:text>
571
+ </xsl:template>
572
+
573
+ <!-- 4.4.5.3 partialdiff -->
574
+ <xsl:template match="m:apply[*[1][self::m:partialdiff] and m:list and m:ci and count(*)=3]" priority="2">
575
+ <xsl:text>D_{</xsl:text>
576
+ <xsl:for-each select="m:list[1]/*">
577
+ <xsl:apply-templates select="."/>
578
+ <xsl:if test="position()&lt;last()"><xsl:text>, </xsl:text></xsl:if>
579
+ </xsl:for-each>
580
+ <xsl:text>}</xsl:text>
581
+ <xsl:apply-templates select="*[3]"/>
582
+ </xsl:template>
583
+
584
+ <xsl:template match="m:apply[*[1][self::m:partialdiff]]" priority="1">
585
+ <xsl:text>\frac{\partial^{</xsl:text>
586
+ <xsl:choose>
587
+ <xsl:when test="m:degree">
588
+ <xsl:apply-templates select="m:degree/node()"/>
589
+ </xsl:when>
590
+ <xsl:when test="m:bvar/m:degree[string(number(.))='NaN']">
591
+ <xsl:for-each select="m:bvar/m:degree">
592
+ <xsl:apply-templates select="node()"/>
593
+ <xsl:if test="position()&lt;last()"><xsl:text>+</xsl:text></xsl:if>
594
+ </xsl:for-each>
595
+ <xsl:if test="count(m:bvar[not(m:degree)])&gt;0">
596
+ <xsl:text>+</xsl:text>
597
+ <xsl:value-of select="count(m:bvar[not(m:degree)])"/>
598
+ </xsl:if>
599
+ </xsl:when>
600
+ <xsl:otherwise>
601
+ <xsl:value-of select="sum(m:bvar/m:degree)+count(m:bvar[not(m:degree)])"/>
602
+ </xsl:otherwise>
603
+ </xsl:choose>
604
+ <xsl:text>}</xsl:text>
605
+ <xsl:apply-templates select="*[last()]"/>
606
+ <xsl:text>}{</xsl:text>
607
+ <xsl:for-each select="m:bvar">
608
+ <xsl:text>\partial </xsl:text>
609
+ <xsl:apply-templates select="node()"/>
610
+ <xsl:if test="m:degree">
611
+ <xsl:text>^{</xsl:text>
612
+ <xsl:apply-templates select="m:degree/node()"/>
613
+ <xsl:text>}</xsl:text>
614
+ </xsl:if>
615
+ </xsl:for-each>
616
+ <xsl:text>}</xsl:text>
617
+ </xsl:template>
618
+
619
+ <!-- 4.4.2.8 declare 4.4.5.4 lowlimit 4.4.5.5 uplimit 4.4.5.7 degree 4.4.9.5 momentabout -->
620
+ <xsl:template match="m:declare | m:lowlimit | m:uplimit | m:degree | m:momentabout"/>
621
+
622
+ <!-- 4.4.5.6 bvar-->
623
+ <xsl:template match="m:bvar">
624
+ <xsl:apply-templates/>
625
+ <xsl:if test="following-sibling::m:bvar"><xsl:text>, </xsl:text></xsl:if>
626
+ </xsl:template>
627
+
628
+ <!-- 4.4.5.8 divergence-->
629
+ <xsl:template match="m:divergence"><xsl:text>\mathop{\mathrm{div}}</xsl:text></xsl:template>
630
+
631
+ <!-- 4.4.5.11 laplacian-->
632
+ <xsl:template match="m:laplacian"><xsl:text>\nabla^2 </xsl:text></xsl:template>
633
+
634
+ <!-- 4.4.6.1 set -->
635
+ <xsl:template match="m:set">
636
+ <xsl:text>\{</xsl:text><xsl:call-template name="set"/><xsl:text>\}</xsl:text>
637
+ </xsl:template>
638
+
639
+ <!-- 4.4.6.2 list -->
640
+ <xsl:template match="m:list">
641
+ <xsl:text>\left[</xsl:text><xsl:call-template name="set"/><xsl:text>\right]</xsl:text>
642
+ </xsl:template>
643
+
644
+ <xsl:template name="set">
645
+ <xsl:choose>
646
+ <xsl:when test="m:condition">
647
+ <xsl:apply-templates select="m:bvar/*[not(self::bvar or self::condition)]"/>
648
+ <xsl:text>\colon </xsl:text>
649
+ <xsl:apply-templates select="m:condition/node()"/>
650
+ </xsl:when>
651
+ <xsl:otherwise>
652
+ <xsl:for-each select="*">
653
+ <xsl:apply-templates select="."/>
654
+ <xsl:if test="position()!=last()"><xsl:text>, </xsl:text></xsl:if>
655
+ </xsl:for-each>
656
+ </xsl:otherwise>
657
+ </xsl:choose>
658
+ </xsl:template>
659
+
660
+ <!-- 4.4.6.3 union -->
661
+ <xsl:template match="m:apply[*[1][self::m:union]]">
662
+ <xsl:param name="p" select="0"/>
663
+ <xsl:call-template name="infix">
664
+ <xsl:with-param name="this-p" select="2"/>
665
+ <xsl:with-param name="p" select="$p"/>
666
+ <xsl:with-param name="mo">\cup </xsl:with-param>
667
+ </xsl:call-template>
668
+ </xsl:template>
669
+
670
+ <!-- 4.4.6.4 intersect -->
671
+ <xsl:template match="m:apply[*[1][self::m:intersect]]">
672
+ <xsl:param name="p" select="0"/>
673
+ <xsl:call-template name="infix">
674
+ <xsl:with-param name="this-p" select="3"/>
675
+ <xsl:with-param name="p" select="$p"/>
676
+ <xsl:with-param name="mo">\cap </xsl:with-param>
677
+ </xsl:call-template>
678
+ </xsl:template>
679
+
680
+ <!-- 4.4.6.5 in -->
681
+ <xsl:template match="m:apply[*[1][self::m:in]]">
682
+ <xsl:param name="p" select="0"/>
683
+ <xsl:call-template name="binary">
684
+ <xsl:with-param name="mo">\in </xsl:with-param>
685
+ <xsl:with-param name="p" select="$p"/>
686
+ <xsl:with-param name="this-p" select="3"/>
687
+ </xsl:call-template>
688
+ </xsl:template>
689
+
690
+ <!-- 4.4.6.5 notin -->
691
+ <xsl:template match="m:apply[*[1][self::m:notin]]">
692
+ <xsl:param name="p" select="0"/>
693
+ <xsl:call-template name="binary">
694
+ <xsl:with-param name="mo">\notin </xsl:with-param>
695
+ <xsl:with-param name="p" select="$p"/>
696
+ <xsl:with-param name="this-p" select="3"/>
697
+ </xsl:call-template>
698
+ </xsl:template>
699
+
700
+ <!-- 4.4.6.7 subset -->
701
+ <xsl:template match="m:apply[*[1][self::m:subset]]">
702
+ <xsl:param name="p" select="0"/>
703
+ <xsl:call-template name="infix">
704
+ <xsl:with-param name="this-p" select="2"/>
705
+ <xsl:with-param name="p" select="$p"/>
706
+ <xsl:with-param name="mo">\subseteq </xsl:with-param>
707
+ </xsl:call-template>
708
+ </xsl:template>
709
+
710
+ <!-- 4.4.6.8 prsubset -->
711
+ <xsl:template match="m:apply[*[1][self::m:prsubset]]">
712
+ <xsl:param name="p" select="0"/>
713
+ <xsl:call-template name="infix">
714
+ <xsl:with-param name="this-p" select="2"/>
715
+ <xsl:with-param name="p" select="$p"/>
716
+ <xsl:with-param name="mo">\subset </xsl:with-param>
717
+ </xsl:call-template>
718
+ </xsl:template>
719
+
720
+ <!-- 4.4.6.9 notsubset -->
721
+ <xsl:template match="m:apply[*[1][self::m:notsubset]]">
722
+ <xsl:param name="p" select="0"/>
723
+ <xsl:call-template name="binary">
724
+ <xsl:with-param name="this-p" select="2"/>
725
+ <xsl:with-param name="p" select="$p"/>
726
+ <xsl:with-param name="mo">\nsubseteq </xsl:with-param>
727
+ </xsl:call-template>
728
+ </xsl:template>
729
+
730
+ <!-- 4.4.6.10 notprsubset -->
731
+ <xsl:template match="m:apply[*[1][self::m:notprsubset]]">
732
+ <xsl:param name="p" select="0"/>
733
+ <xsl:call-template name="binary">
734
+ <xsl:with-param name="this-p" select="2"/>
735
+ <xsl:with-param name="p" select="$p"/>
736
+ <xsl:with-param name="mo">\not\subset </xsl:with-param>
737
+ </xsl:call-template>
738
+ </xsl:template>
739
+
740
+ <!-- 4.4.6.11 setdiff -->
741
+ <xsl:template match="m:apply[*[1][self::m:setdiff]]">
742
+ <xsl:param name="p" select="0"/>
743
+ <xsl:call-template name="binary">
744
+ <xsl:with-param name="this-p" select="2"/>
745
+ <xsl:with-param name="p" select="$p"/>
746
+ <xsl:with-param name="mo">\setminus </xsl:with-param>
747
+ </xsl:call-template>
748
+ </xsl:template>
749
+
750
+ <!-- 4.4.6.12 card -->
751
+ <xsl:template match="m:apply[*[1][self::m:card]]">
752
+ <xsl:text>|</xsl:text>
753
+ <xsl:apply-templates select="*[2]"/>
754
+ <xsl:text>|</xsl:text>
755
+ </xsl:template>
756
+
757
+ <!-- 4.4.6.13 cartesianproduct 4.4.10.6 vectorproduct -->
758
+ <xsl:template match="m:apply[*[1][self::m:cartesianproduct or self::m:vectorproduct]]">
759
+ <xsl:param name="p" select="0"/>
760
+ <xsl:call-template name="infix">
761
+ <xsl:with-param name="this-p" select="2"/>
762
+ <xsl:with-param name="p" select="$p"/>
763
+ <xsl:with-param name="mo">\times </xsl:with-param>
764
+ </xsl:call-template>
765
+ </xsl:template>
766
+
767
+ <xsl:template
768
+ match="m:apply[*[1][self::m:cartesianproduct][count(following-sibling::m:reals)=count(following-sibling::*)]]"
769
+ priority="2">
770
+ <xsl:apply-templates select="*[2]">
771
+ <xsl:with-param name="p" select="5"/>
772
+ </xsl:apply-templates>
773
+ <xsl:text>^{</xsl:text>
774
+ <xsl:value-of select="count(*)-1"/>
775
+ <xsl:text>}</xsl:text>
776
+ </xsl:template>
777
+
778
+ <!-- 4.4.7.1 sum -->
779
+ <xsl:template match="m:apply[*[1][self::m:sum]]">
780
+ <xsl:text>\sum</xsl:text><xsl:call-template name="series"/>
781
+ </xsl:template>
782
+
783
+ <!-- 4.4.7.2 product -->
784
+ <xsl:template match="m:apply[*[1][self::m:product]]">
785
+ <xsl:text>\prod</xsl:text><xsl:call-template name="series"/>
786
+ </xsl:template>
787
+
788
+ <xsl:template name="series">
789
+ <xsl:if test="m:lowlimit/*|m:interval/*[1]|m:condition/*">
790
+ <xsl:text>_{</xsl:text>
791
+ <xsl:if test="not(m:condition)">
792
+ <xsl:apply-templates select="m:bvar"/>
793
+ <xsl:text>=</xsl:text>
794
+ </xsl:if>
795
+ <xsl:apply-templates select="m:lowlimit/*|m:interval/*[1]|m:condition/*"/>
796
+ <xsl:text>}</xsl:text>
797
+ </xsl:if>
798
+ <xsl:if test="m:uplimit/*|m:interval/*[2]">
799
+ <xsl:text>^{</xsl:text>
800
+ <xsl:apply-templates select="m:uplimit/*|m:interval/*[2]"/>
801
+ <xsl:text>}</xsl:text>
802
+ </xsl:if>
803
+ <xsl:text> </xsl:text>
804
+ <xsl:apply-templates select="*[last()]"/>
805
+ </xsl:template>
806
+
807
+ <!-- 4.4.7.3 limit -->
808
+ <xsl:template match="m:apply[*[1][self::m:limit]]">
809
+ <xsl:text>\lim_{</xsl:text>
810
+ <xsl:apply-templates select="m:lowlimit|m:condition/*"/>
811
+ <xsl:text>}</xsl:text>
812
+ <xsl:apply-templates select="*[last()]"/>
813
+ </xsl:template>
814
+
815
+ <xsl:template match="m:apply[m:limit]/m:lowlimit" priority="3">
816
+ <xsl:apply-templates select="../m:bvar/node()"/>
817
+ <xsl:text>\to </xsl:text>
818
+ <xsl:apply-templates/>
819
+ </xsl:template>
820
+
821
+ <!-- 4.4.7.4 tendsto -->
822
+ <xsl:template match="m:apply[*[1][self::m:tendsto]]">
823
+ <xsl:param name="p"/>
824
+ <xsl:call-template name="binary">
825
+ <xsl:with-param name="this-p" select="2"/>
826
+ <xsl:with-param name="p" select="$p"/>
827
+ <xsl:with-param name="mo">
828
+ <xsl:choose>
829
+ <xsl:when test="@type='above'">\searrow </xsl:when>
830
+ <xsl:when test="@type='below'">\nearrow </xsl:when>
831
+ <xsl:when test="@type='two-sided'">\rightarrow </xsl:when>
832
+ <xsl:otherwise>\to </xsl:otherwise>
833
+ </xsl:choose>
834
+ </xsl:with-param>
835
+ </xsl:call-template>
836
+ </xsl:template>
837
+
838
+ <!-- 4.4.8.1 common tringonometric functions 4.4.8.3 natural logarithm -->
839
+ <xsl:template match="m:apply[*[1][
840
+ self::m:sin or self::m:cos or self::m:tan or self::m:sec or
841
+ self::m:csc or self::m:cot or self::m:sinh or self::m:cosh or
842
+ self::m:tanh or self::m:coth or self::m:arcsin or self::m:arccos or
843
+ self::m:arctan or self::m:ln]]">
844
+ <xsl:text>\</xsl:text>
845
+ <xsl:value-of select="local-name(*[1])"/>
846
+ <xsl:text> </xsl:text>
847
+ <xsl:apply-templates select="*[2]">
848
+ <xsl:with-param name="p" select="7"/>
849
+ </xsl:apply-templates>
850
+ </xsl:template>
851
+
852
+ <xsl:template match="m:sin | m:cos | m:tan | m:sec | m:csc |
853
+ m:cot | m:sinh | m:cosh | m:tanh | m:coth |
854
+ m:arcsin | m:arccos | m:arctan | m:ln">
855
+ <xsl:text>\</xsl:text>
856
+ <xsl:value-of select="local-name(.)"/>
857
+ <xsl:text> </xsl:text>
858
+ </xsl:template>
859
+
860
+ <xsl:template match="m:apply[*[1][
861
+ self::m:sech or self::m:csch or self::m:arccosh or
862
+ self::m:arccot or self::m:arccoth or self::m:arccsc or
863
+ self::m:arccsch or self::m:arcsec or self::m:arcsech or
864
+ self::m:arcsinh or self::m:arctanh]]">
865
+ <xsl:text>\mathrm{</xsl:text>
866
+ <xsl:value-of select="local-name(*[1])"/>
867
+ <xsl:text>\,}</xsl:text>
868
+ <xsl:apply-templates select="*[2]">
869
+ <xsl:with-param name="p" select="7"/>
870
+ </xsl:apply-templates>
871
+ </xsl:template>
872
+
873
+ <xsl:template match="m:sech | m:csch | m:arccosh | m:arccot |
874
+ m:arccoth | m:arccsc |m:arccsch |m:arcsec |
875
+ m:arcsech | m:arcsinh | m:arctanh">
876
+ <xsl:text>\mathrm{</xsl:text>
877
+ <xsl:value-of select="local-name(.)"/>
878
+ <xsl:text>}</xsl:text>
879
+ </xsl:template>
880
+
881
+ <!-- 4.4.8.2 exp -->
882
+ <xsl:template match="m:apply[*[1][self::m:exp]]">
883
+ <xsl:text>e^{</xsl:text><xsl:apply-templates select="*[2]"/><xsl:text>}</xsl:text>
884
+ </xsl:template>
885
+
886
+ <!-- 4.4.8.4 log -->
887
+ <xsl:template match="m:apply[*[1][self::m:log]]">
888
+ <xsl:text>\lg </xsl:text>
889
+ <xsl:apply-templates select="*[last()]">
890
+ <xsl:with-param name="p" select="7"/>
891
+ </xsl:apply-templates>
892
+ </xsl:template>
893
+
894
+ <xsl:template match="m:apply[*[1][self::m:log] and m:logbase != 10]">
895
+ <xsl:text>\log_{</xsl:text>
896
+ <xsl:apply-templates select="m:logbase/node()"/>
897
+ <xsl:text>}</xsl:text>
898
+ <xsl:apply-templates select="*[last()]">
899
+ <xsl:with-param name="p" select="7"/>
900
+ </xsl:apply-templates>
901
+ </xsl:template>
902
+
903
+ <!-- 4.4.9.1 mean -->
904
+ <xsl:template match="m:apply[*[1][self::m:mean]]">
905
+ <xsl:text>\langle </xsl:text>
906
+ <xsl:for-each select="*[position()&gt;1]">
907
+ <xsl:apply-templates select="."/>
908
+ <xsl:if test="position() !=last()"><xsl:text>, </xsl:text></xsl:if>
909
+ </xsl:for-each>
910
+ <xsl:text>\rangle </xsl:text>
911
+ </xsl:template>
912
+
913
+ <!-- 4.4.9.2 sdef -->
914
+ <xsl:template match="m:sdev"><xsl:text>\sigma </xsl:text></xsl:template>
915
+
916
+ <!-- 4.4.9.3 variance -->
917
+ <xsl:template match="m:apply[*[1][self::m:variance]]">
918
+ <xsl:text>\sigma(</xsl:text>
919
+ <xsl:apply-templates select="*[2]"/>
920
+ <xsl:text>)^2</xsl:text>
921
+ </xsl:template>
922
+
923
+ <!-- 4.4.9.5 moment -->
924
+ <xsl:template match="m:apply[*[1][self::m:moment]]">
925
+ <xsl:text>\langle </xsl:text>
926
+ <xsl:apply-templates select="*[last()]"/>
927
+ <xsl:text>^{</xsl:text>
928
+ <xsl:apply-templates select="m:degree/node()"/>
929
+ <xsl:text>}\rangle</xsl:text>
930
+ <xsl:if test="m:momentabout">
931
+ <xsl:text>_{</xsl:text>
932
+ <xsl:apply-templates select="m:momentabout/node()"/>
933
+ <xsl:text>}</xsl:text>
934
+ </xsl:if>
935
+ <xsl:text> </xsl:text>
936
+ </xsl:template>
937
+
938
+ <!-- 4.4.10.1 vector -->
939
+ <xsl:template match="m:vector">
940
+ <xsl:text>\left(\begin{array}{c}</xsl:text>
941
+ <xsl:for-each select="*">
942
+ <xsl:apply-templates select="."/>
943
+ <xsl:if test="position()!=last()"><xsl:text>\\ </xsl:text></xsl:if>
944
+ </xsl:for-each>
945
+ <xsl:text>\end{array}\right)</xsl:text>
946
+ </xsl:template>
947
+
948
+ <!-- 4.4.10.2 matrix -->
949
+ <xsl:template match="m:matrix">
950
+ <xsl:text>\begin{pmatrix}</xsl:text>
951
+ <xsl:apply-templates/>
952
+ <xsl:text>\end{pmatrix}</xsl:text>
953
+ </xsl:template>
954
+
955
+ <!-- 4.4.10.3 matrixrow -->
956
+ <xsl:template match="m:matrixrow">
957
+ <xsl:for-each select="*">
958
+ <xsl:apply-templates select="."/>
959
+ <xsl:if test="position()!=last()"><xsl:text> &amp; </xsl:text></xsl:if>
960
+ </xsl:for-each>
961
+ <xsl:if test="position()!=last()"><xsl:text>\\ </xsl:text></xsl:if>
962
+ </xsl:template>
963
+
964
+ <!-- 4.4.10.4 determinant -->
965
+ <xsl:template match="m:apply[*[1][self::m:determinant]]">
966
+ <xsl:text>\det </xsl:text>
967
+ <xsl:apply-templates select="*[2]">
968
+ <xsl:with-param name="p" select="7"/>
969
+ </xsl:apply-templates>
970
+ </xsl:template>
971
+
972
+ <xsl:template match="m:apply[*[1][self::m:determinant]][*[2][self::m:matrix]]" priority="2">
973
+ <xsl:text>\begin{vmatrix}</xsl:text>
974
+ <xsl:apply-templates select="m:matrix/*"/>
975
+ <xsl:text>\end{vmatrix}</xsl:text>
976
+ </xsl:template>
977
+
978
+ <!-- 4.4.10.5 transpose -->
979
+ <xsl:template match="m:apply[*[1][self::m:transpose]]">
980
+ <xsl:apply-templates select="*[2]">
981
+ <xsl:with-param name="p" select="7"/>
982
+ </xsl:apply-templates>
983
+ <xsl:text>^T</xsl:text>
984
+ </xsl:template>
985
+
986
+ <!-- 4.4.10.5 selector -->
987
+ <xsl:template match="m:apply[*[1][self::m:selector]]">
988
+ <xsl:apply-templates select="*[2]">
989
+ <xsl:with-param name="p" select="7"/>
990
+ </xsl:apply-templates>
991
+ <xsl:text>_{</xsl:text>
992
+ <xsl:for-each select="*[position()&gt;2]">
993
+ <xsl:apply-templates select="."/>
994
+ <xsl:if test="position() !=last()"><xsl:text>, </xsl:text></xsl:if>
995
+ </xsl:for-each>
996
+ <xsl:text>}</xsl:text>
997
+ </xsl:template>
998
+
999
+ <!-- 4.4.10.7 scalarproduct 4.4.10.8 outerproduct -->
1000
+ <xsl:template match="m:apply[*[1][self::m:scalarproduct or self::m:outerproduct]]">
1001
+ <xsl:param name="p" select="0"/>
1002
+ <xsl:call-template name="infix">
1003
+ <xsl:with-param name="this-p" select="2"/>
1004
+ <xsl:with-param name="p" select="$p"/>
1005
+ <xsl:with-param name="mo">\dot </xsl:with-param>
1006
+ </xsl:call-template>
1007
+ </xsl:template>
1008
+
1009
+ <!-- 4.4.11.2 semantics -->
1010
+ <xsl:template match="m:semantics"><xsl:apply-templates select="*[1]"/></xsl:template>
1011
+
1012
+ <xsl:template match="m:semantics[m:annotation/@encoding='TeX']">
1013
+ <xsl:apply-templates select="m:annotation[@encoding='TeX']/node()"/>
1014
+ </xsl:template>
1015
+
1016
+ <!-- 4.4.12.1 integers -->
1017
+ <xsl:template match="m:integers"><xsl:text>\mathbb{Z}</xsl:text></xsl:template>
1018
+
1019
+ <!-- 4.4.12.2 reals -->
1020
+ <xsl:template match="m:reals"><xsl:text>\mathbb{R}</xsl:text></xsl:template>
1021
+
1022
+ <!-- 4.4.12.3 rationals -->
1023
+ <xsl:template match="m:rationals"><xsl:text>\mathbb{Q}</xsl:text></xsl:template>
1024
+
1025
+ <!-- 4.4.12.4 naturalnumbers -->
1026
+ <xsl:template match="m:naturalnumbers"><xsl:text>\mathbb{N}</xsl:text></xsl:template>
1027
+
1028
+ <!-- 4.4.12.5 complexes -->
1029
+ <xsl:template match="m:complexes"><xsl:text>\mathbb{C}</xsl:text></xsl:template>
1030
+
1031
+ <!-- 4.4.12.6 primes -->
1032
+ <xsl:template match="m:primes"><xsl:text>\mathbb{P}</xsl:text></xsl:template>
1033
+
1034
+ <!-- 4.4.12.7 exponentiale -->
1035
+ <xsl:template match="m:exponentiale"><xsl:text>e</xsl:text></xsl:template>
1036
+
1037
+ <!-- 4.4.12.8 imaginaryi -->
1038
+ <xsl:template match="m:imaginaryi"><xsl:text>i</xsl:text></xsl:template>
1039
+
1040
+ <!-- 4.4.12.9 notanumber -->
1041
+ <xsl:template match="m:notanumber"><xsl:text>NaN</xsl:text></xsl:template>
1042
+
1043
+ <!-- 4.4.12.10 true -->
1044
+ <xsl:template match="m:true"><xsl:text>\mbox{true}</xsl:text></xsl:template>
1045
+
1046
+ <!-- 4.4.12.11 false -->
1047
+ <xsl:template match="m:false"><xsl:text>\mbox{false}</xsl:text></xsl:template>
1048
+
1049
+ <!-- 4.4.12.12 emptyset -->
1050
+ <xsl:template match="m:emptyset"><xsl:text>\emptyset </xsl:text></xsl:template>
1051
+
1052
+ <!-- 4.4.12.13 pi -->
1053
+ <xsl:template match="m:pi"><xsl:text>\pi </xsl:text></xsl:template>
1054
+
1055
+ <!-- 4.4.12.14 eulergamma -->
1056
+ <xsl:template match="m:eulergamma"><xsl:text>\gamma </xsl:text></xsl:template>
1057
+
1058
+ <!-- 4.4.12.15 infinity -->
1059
+ <xsl:template match="m:infinity"><xsl:text>\infty </xsl:text></xsl:template>
1060
+
1061
+ <!-- ****************************** -->
1062
+ <xsl:template name="infix" >
1063
+ <xsl:param name="mo"/>
1064
+ <xsl:param name="p" select="0"/>
1065
+ <xsl:param name="this-p" select="0"/>
1066
+ <xsl:if test="$this-p &lt; $p"><xsl:text>(</xsl:text></xsl:if>
1067
+ <xsl:for-each select="*[position()&gt;1]">
1068
+ <xsl:if test="position() &gt; 1">
1069
+ <xsl:copy-of select="$mo"/>
1070
+ </xsl:if>
1071
+ <xsl:apply-templates select=".">
1072
+ <xsl:with-param name="p" select="$this-p"/>
1073
+ </xsl:apply-templates>
1074
+ </xsl:for-each>
1075
+ <xsl:if test="$this-p &lt; $p"><xsl:text>)</xsl:text></xsl:if>
1076
+ </xsl:template>
1077
+
1078
+ <xsl:template name="binary" >
1079
+ <xsl:param name="mo"/>
1080
+ <xsl:param name="p" select="0"/>
1081
+ <xsl:param name="this-p" select="0"/>
1082
+ <xsl:if test="$this-p &lt; $p"><xsl:text>(</xsl:text></xsl:if>
1083
+ <xsl:apply-templates select="*[2]">
1084
+ <xsl:with-param name="p" select="$this-p"/>
1085
+ </xsl:apply-templates>
1086
+ <xsl:value-of select="$mo"/>
1087
+ <xsl:apply-templates select="*[3]">
1088
+ <xsl:with-param name="p" select="$this-p"/>
1089
+ </xsl:apply-templates>
1090
+ <xsl:if test="$this-p &lt; $p"><xsl:text>)</xsl:text></xsl:if>
1091
+ </xsl:template>
1092
+
1093
+ </xsl:stylesheet>
ultradata_math_parser/mmltex/entities.xsl ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
4
+ version='1.0'>
5
+
6
+ <!-- ====================================================================== -->
7
+ <!-- $id: entities.xsl, 2002/22/11 Exp $
8
+ This file is part of the XSLT MathML Library distribution.
9
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
10
+ copyright and other information -->
11
+ <!-- ====================================================================== -->
12
+
13
+ <xsl:template name="replaceEntities">
14
+ <xsl:param name="content"/>
15
+ <xsl:if test="string-length($content)>0">
16
+ <xsl:choose>
17
+ <xsl:when test="starts-with($content,'&#x0025B;')"><xsl:value-of select="'\varepsilon '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0025B;')"/></xsl:call-template></xsl:when> <!--/varepsilon -->
18
+
19
+ <!-- ====================================================================== -->
20
+ <!-- Unicode 3.2
21
+ Greek
22
+ Range: 0370-03FF
23
+ http://www.unicode.org/charts/PDF/U0370.pdf -->
24
+ <!-- ====================================================================== -->
25
+ <xsl:when test="starts-with($content,'&#x00393;')"><xsl:value-of select="'\Gamma '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x00393;')"/></xsl:call-template></xsl:when> <!--/Gamma capital Gamma, Greek -->
26
+ <xsl:when test="starts-with($content,'&#x00394;')"><xsl:value-of select="'\Delta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x00394;')"/></xsl:call-template></xsl:when> <!--/Delta capital Delta, Greek -->
27
+ <xsl:when test="starts-with($content,'&#x00398;')"><xsl:value-of select="'\Theta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x00398;')"/></xsl:call-template></xsl:when> <!--/Theta capital Theta, Greek -->
28
+ <xsl:when test="starts-with($content,'&#x0039B;')"><xsl:value-of select="'\Lambda '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0039B;')"/></xsl:call-template></xsl:when> <!--/Lambda capital Lambda, Greek -->
29
+ <xsl:when test="starts-with($content,'&#x0039E;')"><xsl:value-of select="'\Xi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0039E;')"/></xsl:call-template></xsl:when> <!--/Xi capital Xi, Greek -->
30
+ <xsl:when test="starts-with($content,'&#x003A0;')"><xsl:value-of select="'\Pi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003A0;')"/></xsl:call-template></xsl:when> <!--/Pi capital Pi, Greek -->
31
+ <xsl:when test="starts-with($content,'&#x003A3;')"><xsl:value-of select="'\Sigma '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003A3;')"/></xsl:call-template></xsl:when> <!--/Sigma capital Sigma, Greek -->
32
+ <xsl:when test="starts-with($content,'&#x003A6;')"><xsl:value-of select="'\Phi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003A6;')"/></xsl:call-template></xsl:when> <!--/Phi capital Phi, Greek -->
33
+ <xsl:when test="starts-with($content,'&#x003A8;')"><xsl:value-of select="'\Psi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003A8;')"/></xsl:call-template></xsl:when> <!--/Psi capital Psi, Greek -->
34
+ <xsl:when test="starts-with($content,'&#x003A9;')"><xsl:value-of select="'\Omega '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003A9;')"/></xsl:call-template></xsl:when> <!--/Omega capital Omega, Greek -->
35
+ <xsl:when test="starts-with($content,'&#x003B1;')"><xsl:value-of select="'\alpha '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B1;')"/></xsl:call-template></xsl:when> <!--/alpha small alpha, Greek -->
36
+ <xsl:when test="starts-with($content,'&#x003B2;')"><xsl:value-of select="'\beta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B2;')"/></xsl:call-template></xsl:when> <!--/beta small beta, Greek -->
37
+ <xsl:when test="starts-with($content,'&#x003B3;')"><xsl:value-of select="'\gamma '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B3;')"/></xsl:call-template></xsl:when> <!--/gamma small gamma, Greek -->
38
+ <xsl:when test="starts-with($content,'&#x003B4;')"><xsl:value-of select="'\delta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B4;')"/></xsl:call-template></xsl:when> <!--/delta small delta, Greek -->
39
+ <xsl:when test="starts-with($content,'&#x003B5;')"><xsl:value-of select="'\epsilon '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B5;')"/></xsl:call-template></xsl:when> <!--/straightepsilon, small epsilon, Greek -->
40
+ <xsl:when test="starts-with($content,'&#x003B6;')"><xsl:value-of select="'\zeta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B6;')"/></xsl:call-template></xsl:when> <!--/zeta small zeta, Greek -->
41
+ <xsl:when test="starts-with($content,'&#x003B7;')"><xsl:value-of select="'\eta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B7;')"/></xsl:call-template></xsl:when> <!--/eta small eta, Greek -->
42
+ <xsl:when test="starts-with($content,'&#x003B8;')"><xsl:value-of select="'\theta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B8;')"/></xsl:call-template></xsl:when> <!--/theta straight theta, small theta, Greek -->
43
+ <xsl:when test="starts-with($content,'&#x003B9;')"><xsl:value-of select="'\iota '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003B9;')"/></xsl:call-template></xsl:when> <!--/iota small iota, Greek -->
44
+ <xsl:when test="starts-with($content,'&#x003BA;')"><xsl:value-of select="'\kappa '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003BA;')"/></xsl:call-template></xsl:when> <!--/kappa small kappa, Greek -->
45
+ <xsl:when test="starts-with($content,'&#x003BB;')"><xsl:value-of select="'\lambda '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003BB;')"/></xsl:call-template></xsl:when> <!--/lambda small lambda, Greek -->
46
+ <xsl:when test="starts-with($content,'&#x003BC;')"><xsl:value-of select="'\mu '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003BC;')"/></xsl:call-template></xsl:when> <!--/mu small mu, Greek -->
47
+ <xsl:when test="starts-with($content,'&#x003BD;')"><xsl:value-of select="'\nu '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003BD;')"/></xsl:call-template></xsl:when> <!--/nu small nu, Greek -->
48
+ <xsl:when test="starts-with($content,'&#x003BE;')"><xsl:value-of select="'\xi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003BE;')"/></xsl:call-template></xsl:when> <!--/xi small xi, Greek -->
49
+ <xsl:when test="starts-with($content,'&#x003C0;')"><xsl:value-of select="'\pi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C0;')"/></xsl:call-template></xsl:when> <!--/pi small pi, Greek -->
50
+ <xsl:when test="starts-with($content,'&#x003C1;')"><xsl:value-of select="'\rho '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C1;')"/></xsl:call-template></xsl:when> <!--/rho small rho, Greek -->
51
+ <xsl:when test="starts-with($content,'&#x003C2;')"><xsl:value-of select="'\varsigma '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C2;')"/></xsl:call-template></xsl:when> <!--/varsigma -->
52
+ <xsl:when test="starts-with($content,'&#x003C3;')"><xsl:value-of select="'\sigma '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C3;')"/></xsl:call-template></xsl:when> <!--/sigma small sigma, Greek -->
53
+ <xsl:when test="starts-with($content,'&#x003C4;')"><xsl:value-of select="'\tau '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C4;')"/></xsl:call-template></xsl:when> <!--/tau small tau, Greek -->
54
+ <xsl:when test="starts-with($content,'&#x003C5;')"><xsl:value-of select="'\upsilon '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C5;')"/></xsl:call-template></xsl:when> <!--/upsilon small upsilon, Greek -->
55
+ <xsl:when test="starts-with($content,'&#x003C6;')"><xsl:value-of select="'\phi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C6;')"/></xsl:call-template></xsl:when> <!--/straightphi - small phi, Greek -->
56
+ <xsl:when test="starts-with($content,'&#x003C7;')"><xsl:value-of select="'\chi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C7;')"/></xsl:call-template></xsl:when> <!--/chi small chi, Greek -->
57
+ <xsl:when test="starts-with($content,'&#x003C8;')"><xsl:value-of select="'\psi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C8;')"/></xsl:call-template></xsl:when> <!--/psi small psi, Greek -->
58
+ <xsl:when test="starts-with($content,'&#x003C9;')"><xsl:value-of select="'\omega '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003C9;')"/></xsl:call-template></xsl:when> <!--/omega small omega, Greek -->
59
+ <xsl:when test="starts-with($content,'&#x003D1;')"><xsl:value-of select="'\vartheta '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003D1;')"/></xsl:call-template></xsl:when> <!--/vartheta - curly or open theta -->
60
+ <xsl:when test="starts-with($content,'&#x003D2;')"><xsl:value-of select="'\Upsilon '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003D2;')"/></xsl:call-template></xsl:when> <!--/Upsilon capital Upsilon, Greek -->
61
+ <xsl:when test="starts-with($content,'&#x003D5;')"><xsl:value-of select="'\varphi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003D5;')"/></xsl:call-template></xsl:when> <!--/varphi - curly or open phi -->
62
+ <xsl:when test="starts-with($content,'&#x003D6;')"><xsl:value-of select="'\varpi '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003D6;')"/></xsl:call-template></xsl:when> <!--/varpi -->
63
+ <xsl:when test="starts-with($content,'&#x003F0;')"><xsl:value-of select="'\varkappa '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003F0;')"/></xsl:call-template></xsl:when> <!--/varkappa -->
64
+ <xsl:when test="starts-with($content,'&#x003F1;')"><xsl:value-of select="'\varrho '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x003F1;')"/></xsl:call-template></xsl:when> <!--/varrho -->
65
+
66
+ <!-- ====================================================================== -->
67
+ <xsl:when test="starts-with($content,'&#x0200B;')"><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0200B;')"/></xsl:call-template></xsl:when> <!--short form of &InvisibleComma; -->
68
+ <xsl:when test="starts-with($content,'&#x02026;')"><xsl:value-of select="'\dots '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02026;')"/></xsl:call-template></xsl:when>
69
+ <xsl:when test="starts-with($content,'&#x02032;')"><xsl:value-of select="'\prime '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02032;')"/></xsl:call-template></xsl:when> <!--/prime prime or minute -->
70
+ <xsl:when test="starts-with($content,'&#x02061;')"><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02061;')"/></xsl:call-template></xsl:when> <!-- ApplyFunction -->
71
+ <xsl:when test="starts-with($content,'&#x02062;')"><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02062;')"/></xsl:call-template></xsl:when> <!-- InvisibleTimes -->
72
+ <!-- ====================================================================== -->
73
+ <!-- Unicode 3.2
74
+ Letterlike Symbols
75
+ Range: 2100-214F
76
+ http://www.unicode.org/charts/PDF/U2100.pdf -->
77
+ <!-- ====================================================================== -->
78
+ <xsl:when test="starts-with($content,'&#x0210F;&#x0FE00;')"><xsl:value-of select="'\hbar '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0210F;&#x0FE00;')"/></xsl:call-template></xsl:when> <!--/hbar - Planck's over 2pi -->
79
+ <xsl:when test="starts-with($content,'&#x0210F;')"><xsl:value-of select="'\hslash '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0210F;')"/></xsl:call-template></xsl:when> <!--/hslash - variant Planck's over 2pi --> <!-- Required amssymb -->
80
+ <xsl:when test="starts-with($content,'&#x02111;')"><xsl:value-of select="'\Im '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02111;')"/></xsl:call-template></xsl:when> <!--/Im - imaginary -->
81
+ <xsl:when test="starts-with($content,'&#x02113;')"><xsl:value-of select="'\ell '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02113;')"/></xsl:call-template></xsl:when> <!--/ell - cursive small l -->
82
+ <xsl:when test="starts-with($content,'&#x02118;')"><xsl:value-of select="'\wp '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02118;')"/></xsl:call-template></xsl:when> <!--/wp - Weierstrass p -->
83
+ <xsl:when test="starts-with($content,'&#x0211C;')"><xsl:value-of select="'\Re '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0211C;')"/></xsl:call-template></xsl:when> <!--/Re - real -->
84
+ <xsl:when test="starts-with($content,'&#x02127;')"><xsl:value-of select="'\mho '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02127;')"/></xsl:call-template></xsl:when> <!--/mho - conductance -->
85
+ <xsl:when test="starts-with($content,'&#x02135;')"><xsl:value-of select="'\aleph '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02135;')"/></xsl:call-template></xsl:when> <!--/aleph aleph, Hebrew -->
86
+ <xsl:when test="starts-with($content,'&#x02136;')"><xsl:value-of select="'\beth '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02136;')"/></xsl:call-template></xsl:when> <!--/beth - beth, Hebrew --> <!-- Required amssymb -->
87
+ <xsl:when test="starts-with($content,'&#x02137;')"><xsl:value-of select="'\gimel '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02137;')"/></xsl:call-template></xsl:when> <!--/gimel - gimel, Hebrew --> <!-- Required amssymb -->
88
+ <xsl:when test="starts-with($content,'&#x02138;')"><xsl:value-of select="'\daleth '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02138;')"/></xsl:call-template></xsl:when> <!--/daleth - daleth, Hebrew --> <!-- Required amssymb -->
89
+ <xsl:when test="starts-with($content,'&#x02145;')"><xsl:value-of select="'D'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02145;')"/></xsl:call-template></xsl:when> <!--D for use in differentials, e.g., within integrals -->
90
+ <xsl:when test="starts-with($content,'&#x02146;')"><xsl:value-of select="'d'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02146;')"/></xsl:call-template></xsl:when> <!--d for use in differentials, e.g., within integrals -->
91
+ <xsl:when test="starts-with($content,'&#x02147;')"><xsl:value-of select="'e'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02147;')"/></xsl:call-template></xsl:when> <!--e use for the exponential base of the natural logarithms -->
92
+ <xsl:when test="starts-with($content,'&#x02148;')"><xsl:value-of select="'i'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02148;')"/></xsl:call-template></xsl:when> <!--i for use as a square root of -1 -->
93
+
94
+ <!-- ====================================================================== -->
95
+ <xsl:when test="starts-with($content,'&#x02192;')"><xsl:value-of select="'\to '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02192;')"/></xsl:call-template></xsl:when> <!--/rightarrow /to A: =rightward arrow -->
96
+
97
+ <!-- ====================================================================== -->
98
+ <!-- Unicode 3.2
99
+ Mathematical Operators
100
+ Range: 2200-22FF
101
+ http://www.unicode.org/charts/PDF/U2200.pdf -->
102
+ <!-- ====================================================================== -->
103
+ <xsl:when test="starts-with($content,'&#x02200;')"><xsl:value-of select="'\forall '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02200;')"/></xsl:call-template></xsl:when> <!--/forall for all -->
104
+ <xsl:when test="starts-with($content,'&#x02201;')"><xsl:value-of select="'\complement '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02201;')"/></xsl:call-template></xsl:when> <!--/complement - complement sign --> <!-- Required amssymb -->
105
+ <xsl:when test="starts-with($content,'&#x02202;')"><xsl:value-of select="'\partial '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02202;')"/></xsl:call-template></xsl:when> <!--/partial partial differential -->
106
+ <xsl:when test="starts-with($content,'&#x02203;')"><xsl:value-of select="'\exists '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02203;')"/></xsl:call-template></xsl:when> <!--/exists at least one exists -->
107
+ <xsl:when test="starts-with($content,'&#x02204;')"><xsl:value-of select="'\nexists '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02204;')"/></xsl:call-template></xsl:when> <!--/nexists - negated exists --> <!-- Required amssymb -->
108
+ <xsl:when test="starts-with($content,'&#x02205;&#x0FE00;')"><xsl:value-of select="'\emptyset '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02205;&#x0FE00;')"/></xsl:call-template></xsl:when> <!--/emptyset - zero, slash -->
109
+ <xsl:when test="starts-with($content,'&#x02205;')"><xsl:value-of select="'\varnothing '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02205;')"/></xsl:call-template></xsl:when> <!--/varnothing - circle, slash --> <!-- Required amssymb -->
110
+ <!-- <xsl:when test="starts-with($content,'&#x02206;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02206;')"/></xsl:call-template></xsl:when>-->
111
+ <xsl:when test="starts-with($content,'&#x02207;')"><xsl:value-of select="'\nabla '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02207;')"/></xsl:call-template></xsl:when> <!--/nabla del, Hamilton operator -->
112
+ <xsl:when test="starts-with($content,'&#x02208;')"><xsl:value-of select="'\in '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02208;')"/></xsl:call-template></xsl:when> <!--/in R: set membership -->
113
+ <xsl:when test="starts-with($content,'&#x02209;')"><xsl:value-of select="'\notin '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02209;')"/></xsl:call-template></xsl:when> <!--/notin N: negated set membership -->
114
+ <xsl:when test="starts-with($content,'&#x0220B;')"><xsl:value-of select="'\ni '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0220B;')"/></xsl:call-template></xsl:when> <!--/ni /owns R: contains -->
115
+ <xsl:when test="starts-with($content,'&#x0220C;')"><xsl:value-of select="'\not\ni '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0220C;')"/></xsl:call-template></xsl:when> <!--negated contains -->
116
+ <xsl:when test="starts-with($content,'&#x0220F;')"><xsl:value-of select="'\prod '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0220F;')"/></xsl:call-template></xsl:when> <!--/prod L: product operator -->
117
+ <xsl:when test="starts-with($content,'&#x02210;')"><xsl:value-of select="'\coprod '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02210;')"/></xsl:call-template></xsl:when> <!--/coprod L: coproduct operator -->
118
+ <xsl:when test="starts-with($content,'&#x02211;')"><xsl:value-of select="'\sum '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02211;')"/></xsl:call-template></xsl:when> <!--/sum L: summation operator -->
119
+ <xsl:when test="starts-with($content,'&#x02212;')"><xsl:value-of select="'-'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02212;')"/></xsl:call-template></xsl:when> <!--B: minus sign -->
120
+ <xsl:when test="starts-with($content,'&#x02213;')"><xsl:value-of select="'\mp '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02213;')"/></xsl:call-template></xsl:when> <!--/mp B: minus-or-plus sign -->
121
+ <xsl:when test="starts-with($content,'&#x02214;')"><xsl:value-of select="'\dotplus '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02214;')"/></xsl:call-template></xsl:when> <!--/dotplus B: plus sign, dot above --> <!-- Required amssymb -->
122
+ <!-- <xsl:when test="starts-with($content,'&#x02215;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02215;')"/></xsl:call-template></xsl:when>-->
123
+ <xsl:when test="starts-with($content,'&#x02216;')"><xsl:value-of select="'\setminus '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02216;')"/></xsl:call-template></xsl:when> <!--/setminus B: reverse solidus -->
124
+ <xsl:when test="starts-with($content,'&#x02217;')"><xsl:value-of select="'\ast '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02217;')"/></xsl:call-template></xsl:when> <!--low asterisk -->
125
+ <xsl:when test="starts-with($content,'&#x02218;')"><xsl:value-of select="'\circ '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02218;')"/></xsl:call-template></xsl:when> <!--/circ B: composite function (small circle) -->
126
+ <xsl:when test="starts-with($content,'&#x02219;')"><xsl:value-of select="'\bullet '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02219;')"/></xsl:call-template></xsl:when>
127
+ <xsl:when test="starts-with($content,'&#x0221A;')"><xsl:value-of select="'\surd '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0221A;')"/></xsl:call-template></xsl:when> <!--/surd radical -->
128
+ <xsl:when test="starts-with($content,'&#x0221D;')"><xsl:value-of select="'\propto '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0221D;')"/></xsl:call-template></xsl:when> <!--/propto R: is proportional to -->
129
+ <xsl:when test="starts-with($content,'&#x0221E;')"><xsl:value-of select="'\infty '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0221E;')"/></xsl:call-template></xsl:when> <!--/infty infinity -->
130
+ <!-- <xsl:when test="starts-with($content,'&#x0221F;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0221F;')"/></xsl:call-template></xsl:when> right (90 degree) angle -->
131
+ <xsl:when test="starts-with($content,'&#x02220;')"><xsl:value-of select="'\angle '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02220;')"/></xsl:call-template></xsl:when> <!--/angle - angle -->
132
+ <xsl:when test="starts-with($content,'&#x02221;')"><xsl:value-of select="'\measuredangle '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02221;')"/></xsl:call-template></xsl:when> <!--/measuredangle - angle-measured --> <!-- Required amssymb -->
133
+ <xsl:when test="starts-with($content,'&#x02222;')"><xsl:value-of select="'\sphericalangle '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02222;')"/></xsl:call-template></xsl:when><!--/sphericalangle angle-spherical --> <!-- Required amssymb -->
134
+ <xsl:when test="starts-with($content,'&#x02223;')"><xsl:value-of select="'\mid '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02223;')"/></xsl:call-template></xsl:when> <!--/mid R: -->
135
+ <xsl:when test="starts-with($content,'&#x02224;&#x0FE00;')"><xsl:value-of select="'\nshortmid '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02224;&#x0FE00;')"/></xsl:call-template></xsl:when> <!--/nshortmid --> <!-- Required amssymb -->
136
+ <xsl:when test="starts-with($content,'&#x02224;')"><xsl:value-of select="'\nmid '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02224;')"/></xsl:call-template></xsl:when> <!--/nmid --> <!-- Required amssymb -->
137
+ <xsl:when test="starts-with($content,'&#x02225;')"><xsl:value-of select="'\parallel '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02225;')"/></xsl:call-template></xsl:when> <!--/parallel R: parallel -->
138
+ <xsl:when test="starts-with($content,'&#x02226;&#x0FE00;')"><xsl:value-of select="'\nshortparallel '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02226;&#x0FE00;')"/></xsl:call-template></xsl:when> <!--/nshortparallel N: not short par --> <!-- Required amssymb -->
139
+ <xsl:when test="starts-with($content,'&#x02226;')"><xsl:value-of select="'\nparallel '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02226;')"/></xsl:call-template></xsl:when> <!--/nparallel N: not parallel --> <!-- Required amssymb -->
140
+ <xsl:when test="starts-with($content,'&#x02227;')"><xsl:value-of select="'\wedge '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02227;')"/></xsl:call-template></xsl:when> <!--/wedge /land B: logical and -->
141
+ <xsl:when test="starts-with($content,'&#x02228;')"><xsl:value-of select="'\vee '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02228;')"/></xsl:call-template></xsl:when> <!--/vee /lor B: logical or -->
142
+ <xsl:when test="starts-with($content,'&#x02229;')"><xsl:value-of select="'\cap '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02229;')"/></xsl:call-template></xsl:when> <!--/cap B: intersection -->
143
+ <xsl:when test="starts-with($content,'&#x0222A;')"><xsl:value-of select="'\cup '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0222A;')"/></xsl:call-template></xsl:when> <!--/cup B: union or logical sum -->
144
+ <xsl:when test="starts-with($content,'&#x0222B;')"><xsl:value-of select="'\int '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0222B;')"/></xsl:call-template></xsl:when> <!--/int L: integral operator -->
145
+ <xsl:when test="starts-with($content,'&#x0222C;')"><xsl:value-of select="'\iint '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0222C;')"/></xsl:call-template></xsl:when> <!--double integral operator --> <!-- Required amsmath -->
146
+ <xsl:when test="starts-with($content,'&#x0222D;')"><xsl:value-of select="'\iiint '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0222D;')"/></xsl:call-template></xsl:when> <!--/iiint triple integral operator --> <!-- Required amsmath -->
147
+ <xsl:when test="starts-with($content,'&#x0222E;')"><xsl:value-of select="'\oint '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0222E;')"/></xsl:call-template></xsl:when> <!--/oint L: contour integral operator -->
148
+ <!-- <xsl:when test="starts-with($content,'&#x0222F;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0222F;')"/></xsl:call-template></xsl:when>-->
149
+ <!-- <xsl:when test="starts-with($content,'&#x02230;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02230;')"/></xsl:call-template></xsl:when>-->
150
+ <!-- <xsl:when test="starts-with($content,'&#x02231;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02231;')"/></xsl:call-template></xsl:when>-->
151
+ <!-- <xsl:when test="starts-with($content,'&#x02232;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02232;')"/></xsl:call-template></xsl:when>-->
152
+ <!-- <xsl:when test="starts-with($content,'&#x02233;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02233;')"/></xsl:call-template></xsl:when>-->
153
+ <xsl:when test="starts-with($content,'&#x02234;')"><xsl:value-of select="'\therefore '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02234;')"/></xsl:call-template></xsl:when> <!--/therefore R: therefore --> <!-- Required amssymb -->
154
+ <xsl:when test="starts-with($content,'&#x02235;')"><xsl:value-of select="'\because '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02235;')"/></xsl:call-template></xsl:when> <!--/because R: because --> <!-- Required amssymb -->
155
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x02236;')"><xsl:value-of select="':'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02236;')"/></xsl:call-template></xsl:when> <!--/ratio -->
156
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x02237;')"><xsl:value-of select="'\colon\colon '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02237;')"/></xsl:call-template></xsl:when> <!--/Colon, two colons -->
157
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x02238;')"><xsl:value-of select="'\dot{-}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02238;')"/></xsl:call-template></xsl:when> <!--/dotminus B: minus sign, dot above -->
158
+ <!-- <xsl:when test="starts-with($content,'&#x02239;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02239;')"/></xsl:call-template></xsl:when> -->
159
+ <!-- <xsl:when test="starts-with($content,'&#x0223A;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0223A;')"/></xsl:call-template></xsl:when> minus with four dots, geometric properties -->
160
+ <!-- <xsl:when test="starts-with($content,'&#x0223B;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0223B;')"/></xsl:call-template></xsl:when> homothetic -->
161
+ <xsl:when test="starts-with($content,'&#x0223C;')"><xsl:value-of select="'\sim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0223C;')"/></xsl:call-template></xsl:when> <!--/sim R: similar -->
162
+ <xsl:when test="starts-with($content,'&#x0223D;')"><xsl:value-of select="'\backsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0223D;')"/></xsl:call-template></xsl:when> <!--/backsim R: reverse similar --> <!-- Required amssymb -->
163
+ <!-- <xsl:when test="starts-with($content,'&#x0223E;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0223E;')"/></xsl:call-template></xsl:when> most positive -->
164
+ <!-- <xsl:when test="starts-with($content,'&#x0223F;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0223F;')"/></xsl:call-template></xsl:when> ac current -->
165
+ <xsl:when test="starts-with($content,'&#x02240;')"><xsl:value-of select="'\wr '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02240;')"/></xsl:call-template></xsl:when> <!--/wr B: wreath product -->
166
+ <xsl:when test="starts-with($content,'&#x02241;')"><xsl:value-of select="'\nsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02241;')"/></xsl:call-template></xsl:when> <!--/nsim N: not similar --> <!-- Required amssymb -->
167
+ <xsl:when test="starts-with($content,'&#x02242;')"><xsl:value-of select="'\eqsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02242;')"/></xsl:call-template></xsl:when> <!--/esim R: equals, similar --> <!-- Required amssymb -->
168
+ <xsl:when test="starts-with($content,'&#x02243;')"><xsl:value-of select="'\simeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02243;')"/></xsl:call-template></xsl:when> <!--/simeq R: similar, equals -->
169
+ <xsl:when test="starts-with($content,'&#x02244;')"><xsl:value-of select="'\not\simeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02244;')"/></xsl:call-template></xsl:when> <!--/nsimeq N: not similar, equals -->
170
+ <xsl:when test="starts-with($content,'&#x02245;')"><xsl:value-of select="'\cong '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02245;')"/></xsl:call-template></xsl:when> <!--/cong R: congruent with -->
171
+ <!-- <xsl:when test="starts-with($content,'&#x02246;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02246;')"/></xsl:call-template></xsl:when> similar, not equals -->
172
+ <xsl:when test="starts-with($content,'&#x02247;')"><xsl:value-of select="'\ncong '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02247;')"/></xsl:call-template></xsl:when> <!--/ncong N: not congruent with --> <!-- Required amssymb -->
173
+ <xsl:when test="starts-with($content,'&#x02248;')"><xsl:value-of select="'\approx '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02248;')"/></xsl:call-template></xsl:when> <!--/approx R: approximate -->
174
+ <!-- <xsl:when test="starts-with($content,'&#x02249;&#x00338;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02249;&#x00338;')"/></xsl:call-template></xsl:when> not, vert, approximate -->
175
+ <xsl:when test="starts-with($content,'&#x02249;')"><xsl:value-of select="'\not\approx '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02249;')"/></xsl:call-template></xsl:when> <!--/napprox N: not approximate -->
176
+ <xsl:when test="starts-with($content,'&#x0224A;')"><xsl:value-of select="'\approxeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0224A;')"/></xsl:call-template></xsl:when> <!--/approxeq R: approximate, equals --> <!-- Required amssymb -->
177
+ <!-- <xsl:when test="starts-with($content,'&#x0224B;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0224B;')"/></xsl:call-template></xsl:when> approximately identical to -->
178
+ <!-- <xsl:when test="starts-with($content,'&#x0224C;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0224C;')"/></xsl:call-template></xsl:when> /backcong R: reverse congruent -->
179
+ <xsl:when test="starts-with($content,'&#x0224D;')"><xsl:value-of select="'\asymp '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0224D;')"/></xsl:call-template></xsl:when> <!--/asymp R: asymptotically equal to -->
180
+ <xsl:when test="starts-with($content,'&#x0224E;')"><xsl:value-of select="'\Bumpeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0224E;')"/></xsl:call-template></xsl:when> <!--/Bumpeq R: bumpy equals --> <!-- Required amssymb -->
181
+ <xsl:when test="starts-with($content,'&#x0224F;')"><xsl:value-of select="'\bumpeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0224F;')"/></xsl:call-template></xsl:when> <!--/bumpeq R: bumpy equals, equals --> <!-- Required amssymb -->
182
+ <xsl:when test="starts-with($content,'&#x02250;')"><xsl:value-of select="'\doteq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02250;')"/></xsl:call-template></xsl:when> <!--/doteq R: equals, single dot above -->
183
+ <xsl:when test="starts-with($content,'&#x02251;')"><xsl:value-of select="'\doteqdot '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02251;')"/></xsl:call-template></xsl:when> <!--/doteqdot /Doteq R: eq, even dots --> <!-- Required amssymb -->
184
+ <xsl:when test="starts-with($content,'&#x02252;')"><xsl:value-of select="'\fallingdotseq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02252;')"/></xsl:call-template></xsl:when> <!--/fallingdotseq R: eq, falling dots --> <!-- Required amssymb -->
185
+ <xsl:when test="starts-with($content,'&#x02253;')"><xsl:value-of select="'\risingdotseq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02253;')"/></xsl:call-template></xsl:when> <!--/risingdotseq R: eq, rising dots --> <!-- Required amssymb -->
186
+ <!-- <xsl:when test="starts-with($content,'&#x02254;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02254;')"/></xsl:call-template></xsl:when> /coloneq R: colon, equals -->
187
+ <!-- <xsl:when test="starts-with($content,'&#x02255;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02255;')"/></xsl:call-template></xsl:when> /eqcolon R: equals, colon -->
188
+ <xsl:when test="starts-with($content,'&#x02256;')"><xsl:value-of select="'\eqcirc '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02256;')"/></xsl:call-template></xsl:when> <!--/eqcirc R: circle on equals sign --> <!-- Required amssymb -->
189
+ <xsl:when test="starts-with($content,'&#x02257;')"><xsl:value-of select="'\circeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02257;')"/></xsl:call-template></xsl:when> <!--/circeq R: circle, equals --> <!-- Required amssymb -->
190
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x02258;')"><xsl:value-of select="'\stackrel{\frown}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02258;')"/></xsl:call-template></xsl:when>
191
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x02259;')"><xsl:value-of select="'\stackrel{\wedge}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02259;')"/></xsl:call-template></xsl:when> <!--/wedgeq R: corresponds to (wedge, equals) -->
192
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x0225A;')"><xsl:value-of select="'\stackrel{\vee}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0225A;')"/></xsl:call-template></xsl:when> <!--logical or, equals -->
193
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x0225B;')"><xsl:value-of select="'\stackrel{\star}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0225B;')"/></xsl:call-template></xsl:when> <!--equal, asterisk above -->
194
+ <xsl:when test="starts-with($content,'&#x0225C;')"><xsl:value-of select="'\triangleq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0225C;')"/></xsl:call-template></xsl:when> <!--/triangleq R: triangle, equals --> <!-- Required amssymb -->
195
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x0225D;')"><xsl:value-of select="'\stackrel{\scriptscriptstyle\mathrm{def}}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0225D;')"/></xsl:call-template></xsl:when>
196
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x0225E;')"><xsl:value-of select="'\stackrel{\scriptscriptstyle\mathrm{m}}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0225E;')"/></xsl:call-template></xsl:when>
197
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x0225F;')"><xsl:value-of select="'\stackrel{?}{=}'" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0225F;')"/></xsl:call-template></xsl:when> <!--/questeq R: equal with questionmark -->
198
+ <!-- <xsl:when test="starts-with($content,'&#x02260;&#x0FE00;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02260;&#x0FE00;')"/></xsl:call-template></xsl:when> not equal, dot -->
199
+ <xsl:when test="starts-with($content,'&#x02260;')"><xsl:value-of select="'\ne '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02260;')"/></xsl:call-template></xsl:when> <!--/ne /neq R: not equal -->
200
+ <!-- <xsl:when test="starts-with($content,'&#x02261;&#x020E5;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02261;&#x020E5;')"/></xsl:call-template></xsl:when> reverse not equivalent -->
201
+ <xsl:when test="starts-with($content,'&#x02261;')"><xsl:value-of select="'\equiv '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02261;')"/></xsl:call-template></xsl:when> <!--/equiv R: identical with -->
202
+ <xsl:when test="starts-with($content,'&#x02262;')"><xsl:value-of select="'\not\equiv '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02262;')"/></xsl:call-template></xsl:when> <!--/nequiv N: not identical with -->
203
+ <!-- <xsl:when test="starts-with($content,'&#x02263;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02263;')"/></xsl:call-template></xsl:when> -->
204
+ <xsl:when test="starts-with($content,'&#x02264;')"><xsl:value-of select="'\le '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02264;')"/></xsl:call-template></xsl:when> <!--/leq /le R: less-than-or-equal -->
205
+ <xsl:when test="starts-with($content,'&#x02265;')"><xsl:value-of select="'\ge '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02265;')"/></xsl:call-template></xsl:when> <!--/geq /ge R: greater-than-or-equal -->
206
+ <xsl:when test="starts-with($content,'&#x02266;')"><xsl:value-of select="'\leqq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02266;')"/></xsl:call-template></xsl:when> <!--/leqq R: less, double equals --> <!-- Required amssymb -->
207
+ <xsl:when test="starts-with($content,'&#x02267;')"><xsl:value-of select="'\geqq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02267;')"/></xsl:call-template></xsl:when> <!--/geqq R: greater, double equals --> <!-- Required amssymb -->
208
+ <xsl:when test="starts-with($content,'&#x02268;')"><xsl:value-of select="'\lneqq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02268;')"/></xsl:call-template></xsl:when> <!--/lneqq N: less, not double equals --> <!-- Required amssymb -->
209
+ <xsl:when test="starts-with($content,'&#x02269;')"><xsl:value-of select="'\gneqq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02269;')"/></xsl:call-template></xsl:when> <!--/gneqq N: greater, not dbl equals --> <!-- Required amssymb -->
210
+ <!-- <xsl:when test="starts-with($content,'&#x0226A;&#x00338;&#x0FE00;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226A;&#x00338;&#x0FE00;')"/></xsl:call-template></xsl:when> not much less than, variant -->
211
+ <!-- <xsl:when test="starts-with($content,'&#x0226A;&#x00338;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226A;&#x00338;')"/></xsl:call-template></xsl:when> not, vert, much less than -->
212
+ <xsl:when test="starts-with($content,'&#x0226A;')"><xsl:value-of select="'\ll '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226A;')"/></xsl:call-template></xsl:when> <!--/ll R: double less-than sign -->
213
+ <!-- <xsl:when test="starts-with($content,'&#x0226B;&#x00338;&#x0FE00;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226B;&#x00338;&#x0FE00;')"/></xsl:call-template></xsl:when> not much greater than, variant -->
214
+ <!-- <xsl:when test="starts-with($content,'&#x0226B;&#x00338;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226B;&#x00338;')"/></xsl:call-template></xsl:when> not, vert, much greater than -->
215
+ <xsl:when test="starts-with($content,'&#x0226B;')"><xsl:value-of select="'\gg '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226B;')"/></xsl:call-template></xsl:when> <!--/gg R: dbl greater-than sign -->
216
+ <xsl:when test="starts-with($content,'&#x0226C;')"><xsl:value-of select="'\between '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226C;')"/></xsl:call-template></xsl:when> <!--/between R: between --> <!-- Required amssymb -->
217
+ <xsl:when test="starts-with($content,'&#x0226D;')"><xsl:value-of select="'\not\asymp '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226D;')"/></xsl:call-template></xsl:when>
218
+ <xsl:when test="starts-with($content,'&#x0226E;')"><xsl:value-of select="'\nless '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226E;')"/></xsl:call-template></xsl:when> <!--/nless N: not less-than --> <!-- Required amssymb -->
219
+ <xsl:when test="starts-with($content,'&#x0226F;')"><xsl:value-of select="'\ngtr '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0226F;')"/></xsl:call-template></xsl:when> <!--/ngtr N: not greater-than --> <!-- Required amssymb -->
220
+ <xsl:when test="starts-with($content,'&#x02270;&#x020E5;')"><xsl:value-of select="'\nleq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02270;&#x020E5;')"/></xsl:call-template></xsl:when> <!--/nleq N: not less-than-or-equal --> <!-- Required amssymb -->
221
+ <xsl:when test="starts-with($content,'&#x02270;')"><xsl:value-of select="'\nleqq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02270;')"/></xsl:call-template></xsl:when> <!--/nleqq N: not less, dbl equals --> <!-- Required amssymb -->
222
+ <xsl:when test="starts-with($content,'&#x02271;&#x020E5;')"><xsl:value-of select="'\ngeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02271;&#x020E5;')"/></xsl:call-template></xsl:when> <!--/ngeq N: not greater-than-or-equal --> <!-- Required amssymb -->
223
+ <xsl:when test="starts-with($content,'&#x02271;')"><xsl:value-of select="'\ngeqq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02271;')"/></xsl:call-template></xsl:when> <!--/ngeqq N: not greater, dbl equals --> <!-- Required amssymb -->
224
+ <xsl:when test="starts-with($content,'&#x02272;')"><xsl:value-of select="'\lesssim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02272;')"/></xsl:call-template></xsl:when> <!--/lesssim R: less, similar --> <!-- Required amssymb -->
225
+ <xsl:when test="starts-with($content,'&#x02273;')"><xsl:value-of select="'\gtrsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02273;')"/></xsl:call-template></xsl:when> <!--/gtrsim R: greater, similar --> <!-- Required amssymb -->
226
+ <xsl:when test="starts-with($content,'&#x02274;')"><xsl:value-of select="'\not\lesssim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02274;')"/></xsl:call-template></xsl:when> <!--not less, similar --> <!-- Required amssymb -->
227
+ <xsl:when test="starts-with($content,'&#x02275;')"><xsl:value-of select="'\not\gtrsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02275;')"/></xsl:call-template></xsl:when> <!--not greater, similar --> <!-- Required amssymb -->
228
+ <xsl:when test="starts-with($content,'&#x02276;')"><xsl:value-of select="'\lessgtr '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02276;')"/></xsl:call-template></xsl:when> <!--/lessgtr R: less, greater --> <!-- Required amssymb -->
229
+ <xsl:when test="starts-with($content,'&#x02277;')"><xsl:value-of select="'\gtrless '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02277;')"/></xsl:call-template></xsl:when> <!--/gtrless R: greater, less --> <!-- Required amssymb -->
230
+ <xsl:when test="starts-with($content,'&#x02278;')"><xsl:value-of select="'\not\lessgtr '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02278;')"/></xsl:call-template></xsl:when> <!--not less, greater --> <!-- Required amssymb -->
231
+ <xsl:when test="starts-with($content,'&#x02279;')"><xsl:value-of select="'\not\gtrless '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02279;')"/></xsl:call-template></xsl:when> <!--not greater, less --> <!-- Required amssymb -->
232
+ <xsl:when test="starts-with($content,'&#x0227A;')"><xsl:value-of select="'\prec '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0227A;')"/></xsl:call-template></xsl:when> <!--/prec R: precedes -->
233
+ <xsl:when test="starts-with($content,'&#x0227B;')"><xsl:value-of select="'\succ '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0227B;')"/></xsl:call-template></xsl:when> <!--/succ R: succeeds -->
234
+ <xsl:when test="starts-with($content,'&#x0227C;')"><xsl:value-of select="'\preccurlyeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0227C;')"/></xsl:call-template></xsl:when> <!--/preccurlyeq R: precedes, curly eq --> <!-- Required amssymb -->
235
+ <xsl:when test="starts-with($content,'&#x0227D;')"><xsl:value-of select="'\succcurlyeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0227D;')"/></xsl:call-template></xsl:when> <!--/succcurlyeq R: succeeds, curly eq --> <!-- Required amssymb -->
236
+ <xsl:when test="starts-with($content,'&#x0227E;')"><xsl:value-of select="'\precsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0227E;')"/></xsl:call-template></xsl:when> <!--/precsim R: precedes, similar --> <!-- Required amssymb -->
237
+ <xsl:when test="starts-with($content,'&#x0227F;')"><xsl:value-of select="'\succsim '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0227F;')"/></xsl:call-template></xsl:when> <!--/succsim R: succeeds, similar --> <!-- Required amssymb -->
238
+ <xsl:when test="starts-with($content,'&#x02280;')"><xsl:value-of select="'\nprec '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02280;')"/></xsl:call-template></xsl:when> <!--/nprec N: not precedes --> <!-- Required amssymb -->
239
+ <xsl:when test="starts-with($content,'&#x02281;')"><xsl:value-of select="'\nsucc '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02281;')"/></xsl:call-template></xsl:when> <!--/nsucc N: not succeeds --> <!-- Required amssymb -->
240
+ <xsl:when test="starts-with($content,'&#x02282;')"><xsl:value-of select="'\subset '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02282;')"/></xsl:call-template></xsl:when> <!--/subset R: subset or is implied by -->
241
+ <xsl:when test="starts-with($content,'&#x02283;')"><xsl:value-of select="'\supset '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02283;')"/></xsl:call-template></xsl:when> <!--/supset R: superset or implies -->
242
+ <xsl:when test="starts-with($content,'&#x02284;')"><xsl:value-of select="'\not\subset '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02284;')"/></xsl:call-template></xsl:when> <!--not subset -->
243
+ <xsl:when test="starts-with($content,'&#x02285;')"><xsl:value-of select="'\not\supset '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02285;')"/></xsl:call-template></xsl:when> <!--not superset -->
244
+ <xsl:when test="starts-with($content,'&#x02286;')"><xsl:value-of select="'\subseteq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02286;')"/></xsl:call-template></xsl:when> <!--/subseteq R: subset, equals -->
245
+ <xsl:when test="starts-with($content,'&#x02287;')"><xsl:value-of select="'\supseteq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02287;')"/></xsl:call-template></xsl:when> <!--/supseteq R: superset, equals -->
246
+ <xsl:when test="starts-with($content,'&#x0228E;')"><xsl:value-of select="'\uplus '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0228E;')"/></xsl:call-template></xsl:when> <!--/uplus B: plus sign in union -->
247
+ <xsl:when test="starts-with($content,'&#x02293;')"><xsl:value-of select="'\sqcap '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02293;')"/></xsl:call-template></xsl:when> <!--/sqcap B: square intersection -->
248
+ <xsl:when test="starts-with($content,'&#x02294;')"><xsl:value-of select="'\bigsqcup '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02294;')"/></xsl:call-template></xsl:when> <!--/sqcup B: square union -->
249
+ <xsl:when test="starts-with($content,'&#x02295;')"><xsl:value-of select="'\oplus '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02295;')"/></xsl:call-template></xsl:when> <!--/oplus B: plus sign in circle -->
250
+ <xsl:when test="starts-with($content,'&#x02296;')"><xsl:value-of select="'\ominus '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02296;')"/></xsl:call-template></xsl:when> <!--/ominus B: minus sign in circle -->
251
+ <xsl:when test="starts-with($content,'&#x02297;')"><xsl:value-of select="'\otimes '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02297;')"/></xsl:call-template></xsl:when> <!--/otimes B: multiply sign in circle -->
252
+ <xsl:when test="starts-with($content,'&#x02298;')"><xsl:value-of select="'\oslash '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02298;')"/></xsl:call-template></xsl:when> <!--/oslash B: solidus in circle -->
253
+ <!-- ? --> <xsl:when test="starts-with($content,'&#x02299;')"><xsl:value-of select="'\odot '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x02299;')"/></xsl:call-template></xsl:when> <!--/odot B: middle dot in circle --> <!--/bigodot L: circle dot operator -->
254
+ <xsl:when test="starts-with($content,'&#x0229F;')"><xsl:value-of select="'\boxminus '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x0229F;')"/></xsl:call-template></xsl:when> <!--/boxminus B: minus sign in box --> <!-- Required amssymb -->
255
+ <xsl:when test="starts-with($content,'&#x022A4;')"><xsl:value-of select="'\top '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022A4;')"/></xsl:call-template></xsl:when> <!--/top top -->
256
+ <xsl:when test="starts-with($content,'&#x022A5;')"><xsl:value-of select="'\perp '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022A5;')"/></xsl:call-template></xsl:when> <!--/perp R: perpendicular --><!--/bot bottom -->
257
+ <xsl:when test="starts-with($content,'&#x022A6;')"><xsl:value-of select="'\vdash '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022A6;')"/></xsl:call-template></xsl:when> <!--/vdash R: vertical, dash -->
258
+ <xsl:when test="starts-with($content,'&#x022A7;')"><xsl:value-of select="'\vDash '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022A7;')"/></xsl:call-template></xsl:when> <!--/vDash R: vertical, dbl dash --> <!-- Required amssymb -->
259
+ <xsl:when test="starts-with($content,'&#x022A8;')"><xsl:value-of select="'\models '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022A8;')"/></xsl:call-template></xsl:when> <!--/models R: -->
260
+ <xsl:when test="starts-with($content,'&#x022AA;')"><xsl:value-of select="'\Vvdash '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022AA;')"/></xsl:call-template></xsl:when> <!--/Vvdash R: triple vertical, dash --> <!-- Required amssymb -->
261
+ <xsl:when test="starts-with($content,'&#x022C0;')"><xsl:value-of select="'\bigwedge '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C0;')"/></xsl:call-template></xsl:when> <!--/bigwedge L: logical or operator -->
262
+ <xsl:when test="starts-with($content,'&#x022C1;')"><xsl:value-of select="'\bigvee '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C1;')"/></xsl:call-template></xsl:when> <!--/bigcap L: intersection operator -->
263
+ <xsl:when test="starts-with($content,'&#x022C2;')"><xsl:value-of select="'\bigcap '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C2;')"/></xsl:call-template></xsl:when> <!--/bigvee L: logical and operator -->
264
+ <xsl:when test="starts-with($content,'&#x022C3;')"><xsl:value-of select="'\bigcup '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C3;')"/></xsl:call-template></xsl:when> <!--/bigcup L: union operator -->
265
+ <xsl:when test="starts-with($content,'&#x022C4;')"><xsl:value-of select="'\diamond '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C4;')"/></xsl:call-template></xsl:when> <!--/diamond B: open diamond -->
266
+ <xsl:when test="starts-with($content,'&#x022C5;')"><xsl:value-of select="'\cdot '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C5;')"/></xsl:call-template></xsl:when> <!--/cdot B: small middle dot -->
267
+ <xsl:when test="starts-with($content,'&#x022C6;')"><xsl:value-of select="'\star '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C6;')"/></xsl:call-template></xsl:when> <!--/star B: small star, filled -->
268
+ <xsl:when test="starts-with($content,'&#x022C7;')"><xsl:value-of select="'\divideontimes '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C7;')"/></xsl:call-template></xsl:when> <!--/divideontimes B: division on times --> <!-- Required amssymb -->
269
+ <xsl:when test="starts-with($content,'&#x022C8;')"><xsl:value-of select="'\bowtie '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022C8;')"/></xsl:call-template></xsl:when> <!--/bowtie R: -->
270
+ <xsl:when test="starts-with($content,'&#x022CD;')"><xsl:value-of select="'\backsimeq '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022CD;')"/></xsl:call-template></xsl:when> <!--/backsimeq R: reverse similar, eq --> <!-- Required amssymb -->
271
+ <xsl:when test="starts-with($content,'&#x022EF;')"><xsl:value-of select="'\cdots '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022EF;')"/></xsl:call-template></xsl:when> <!--/cdots, three dots, centered -->
272
+ <!-- <xsl:when test="starts-with($content,'&#x022F0;')"><xsl:value-of select="' '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022F0;')"/></xsl:call-template></xsl:when> three dots, ascending -->
273
+ <xsl:when test="starts-with($content,'&#x022F1;')"><xsl:value-of select="'\ddots '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x022F1;')"/></xsl:call-template></xsl:when> <!--/ddots, three dots, descending -->
274
+
275
+ <!-- ====================================================================== -->
276
+ <xsl:when test="starts-with($content,'&#x025A1;')"><xsl:value-of select="'\square '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x025A1;')"/></xsl:call-template></xsl:when> <!--/square, square --> <!-- Required amssymb -->
277
+ <xsl:when test="starts-with($content,'&#x025AA;')"><xsl:value-of select="'\blacksquare '" /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '&#x025AA;')"/></xsl:call-template></xsl:when> <!--/blacksquare, square, filled --> <!-- Required amssymb -->
278
+
279
+ <xsl:when test='starts-with($content,"&apos;")'><xsl:value-of select='"\text{&apos;}"' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select='substring-after($content, "&apos;")'/></xsl:call-template></xsl:when><!-- \text required amslatex -->
280
+ <xsl:when test='starts-with($content,"(")'><xsl:value-of select='"\left("' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '(')"/></xsl:call-template></xsl:when>
281
+ <xsl:when test='starts-with($content,")")'><xsl:value-of select='"\right)"' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, ')')"/></xsl:call-template></xsl:when>
282
+ <xsl:when test='starts-with($content,"[")'><xsl:value-of select='"\left["' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '[')"/></xsl:call-template></xsl:when>
283
+ <xsl:when test='starts-with($content,"]")'><xsl:value-of select='"\right]"' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, ']')"/></xsl:call-template></xsl:when>
284
+ <xsl:when test='starts-with($content,"{")'><xsl:value-of select='"\left\{"' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '{')"/></xsl:call-template></xsl:when>
285
+ <xsl:when test='starts-with($content,"}")'><xsl:value-of select='"\right\}"' /><xsl:call-template name="replaceEntities"><xsl:with-param name="content" select="substring-after($content, '}')"/></xsl:call-template></xsl:when>
286
+
287
+
288
+ <xsl:otherwise>
289
+ <xsl:value-of select="substring($content,1,1)"/>
290
+ <xsl:call-template name="replaceEntities">
291
+ <xsl:with-param name="content" select="substring($content, 2)"/>
292
+ </xsl:call-template>
293
+ </xsl:otherwise>
294
+ </xsl:choose></xsl:if>
295
+ </xsl:template>
296
+
297
+ <xsl:template name="replaceMtextEntities">
298
+ <xsl:param name="content"/>
299
+ <xsl:choose>
300
+ <xsl:when test="contains($content,'&#x02009;&#x0200A;&#x0200A;')"> <!-- ThickSpace - space of width 5/18 em -->
301
+ <xsl:call-template name="replaceMtextEntities">
302
+ <xsl:with-param name="content" select="concat(substring-before($content,'&#x02009;&#x0200A;&#x0200A;'),'\hspace{0.28em}',substring-after($content,'&#x02009;&#x0200A;&#x0200A;'))"/>
303
+ </xsl:call-template>
304
+ </xsl:when>
305
+ <xsl:when test="contains($content,'&#x02009;')"> <!-- ThinSpace - space of width 3/18 em -->
306
+ <xsl:call-template name="replaceMtextEntities">
307
+ <xsl:with-param name="content" select="concat(substring-before($content,'&#x02009;'),'\hspace{0.17em}',substring-after($content,'&#x02009;'))"/>
308
+ </xsl:call-template>
309
+ </xsl:when>
310
+ <xsl:otherwise>
311
+ <xsl:value-of select="normalize-space($content)"/>
312
+ </xsl:otherwise>
313
+ </xsl:choose>
314
+ </xsl:template>
315
+
316
+ </xsl:stylesheet>
ultradata_math_parser/mmltex/glayout.xsl ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
4
+ version='1.0'>
5
+
6
+ <!-- ====================================================================== -->
7
+ <!-- $id: glayout.xsl, 2002/17/05 Exp $
8
+ This file is part of the XSLT MathML Library distribution.
9
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
10
+ copyright and other information -->
11
+ <!-- ====================================================================== -->
12
+
13
+ <xsl:template match="m:mfrac">
14
+ <xsl:choose>
15
+ <xsl:when test="@bevelled='true'">
16
+ <!-- <xsl:text>\raisebox{1ex}{</xsl:text>
17
+ <xsl:apply-templates select="./*[1]"/>
18
+ <xsl:text>}\!\left/ \!\raisebox{-1ex}{</xsl:text>
19
+ <xsl:apply-templates select="./*[2]"/>
20
+ <xsl:text>}\right.</xsl:text>-->
21
+ </xsl:when>
22
+ <xsl:when test="@linethickness">
23
+ <xsl:text>\genfrac{}{}{</xsl:text>
24
+ <xsl:choose>
25
+ <xsl:when test="number(@linethickness)">
26
+ <xsl:value-of select="@linethickness div 10"/>
27
+ <xsl:text>ex</xsl:text>
28
+ </xsl:when>
29
+ <xsl:when test="@linethickness='thin'">
30
+ <xsl:text>.05ex</xsl:text>
31
+ </xsl:when>
32
+ <xsl:when test="@linethickness='medium'"/>
33
+ <xsl:when test="@linethickness='thick'">
34
+ <xsl:text>.2ex</xsl:text>
35
+ </xsl:when>
36
+ <xsl:otherwise>
37
+ <xsl:value-of select="@linethickness"/>
38
+ </xsl:otherwise>
39
+ </xsl:choose>
40
+ <xsl:text>}{}{</xsl:text>
41
+ </xsl:when>
42
+ <xsl:otherwise>
43
+ <xsl:text>\frac{</xsl:text>
44
+ </xsl:otherwise>
45
+ </xsl:choose>
46
+ <xsl:if test="@numalign='right'">
47
+ <xsl:text>\hfill </xsl:text>
48
+ </xsl:if>
49
+ <xsl:apply-templates select="./*[1]"/>
50
+ <xsl:if test="@numalign='left'">
51
+ <xsl:text>\hfill </xsl:text>
52
+ </xsl:if>
53
+ <xsl:text>}{</xsl:text>
54
+ <xsl:if test="@denomalign='right'">
55
+ <xsl:text>\hfill </xsl:text>
56
+ </xsl:if>
57
+ <xsl:apply-templates select="./*[2]"/>
58
+ <xsl:if test="@denomalign='left'">
59
+ <xsl:text>\hfill </xsl:text>
60
+ </xsl:if>
61
+ <xsl:text>}</xsl:text>
62
+ </xsl:template>
63
+
64
+ <xsl:template match="m:mroot">
65
+ <xsl:choose>
66
+ <xsl:when test="count(./*)=2">
67
+ <xsl:text>\sqrt[</xsl:text>
68
+ <xsl:apply-templates select="./*[2]"/>
69
+ <xsl:text>]{</xsl:text>
70
+ <xsl:apply-templates select="./*[1]"/>
71
+ <xsl:text>}</xsl:text>
72
+ </xsl:when>
73
+ <xsl:otherwise>
74
+ <!-- number of argumnets is not 2 - code 25 -->
75
+ <xsl:message>exception 25:</xsl:message>
76
+ <xsl:text>\text{exception 25:}</xsl:text>
77
+ </xsl:otherwise>
78
+ </xsl:choose>
79
+ </xsl:template>
80
+
81
+ <xsl:template match="m:msqrt">
82
+ <xsl:text>\sqrt{</xsl:text>
83
+ <xsl:apply-templates/>
84
+ <xsl:text>}</xsl:text>
85
+ </xsl:template>
86
+
87
+ <xsl:template match="m:mfenced">
88
+ <xsl:choose>
89
+ <xsl:when test="@open">
90
+ <xsl:if test="translate(@open,'{}[]()|','{{{{{{{')='{'">
91
+ <xsl:text>\left</xsl:text>
92
+ </xsl:if>
93
+ <xsl:if test="@open='{' or @open='}'">
94
+ <xsl:text>\</xsl:text>
95
+ </xsl:if>
96
+ <xsl:value-of select="@open"/>
97
+ </xsl:when>
98
+ <xsl:otherwise><xsl:text>\left(</xsl:text></xsl:otherwise>
99
+ </xsl:choose>
100
+ <xsl:choose>
101
+ <xsl:when test="count(./*)>1">
102
+ <xsl:variable name="symbol">
103
+ <xsl:choose>
104
+ <xsl:when test="@separators">
105
+ <xsl:call-template name="startspace">
106
+ <xsl:with-param name="symbol" select="@separators"/>
107
+ </xsl:call-template>
108
+ </xsl:when>
109
+ <xsl:otherwise>,</xsl:otherwise>
110
+ </xsl:choose>
111
+ </xsl:variable>
112
+ <xsl:for-each select="./*">
113
+ <xsl:apply-templates select="."/>
114
+ <xsl:if test="not(position()=last())">
115
+ <xsl:choose>
116
+ <xsl:when test="position()>string-length($symbol)">
117
+ <xsl:value-of select="substring($symbol,string-length($symbol))"/>
118
+ </xsl:when>
119
+ <xsl:otherwise>
120
+ <xsl:value-of select="substring($symbol,position(),1)"/>
121
+ </xsl:otherwise>
122
+ </xsl:choose>
123
+ </xsl:if>
124
+ </xsl:for-each>
125
+ </xsl:when>
126
+ <xsl:otherwise>
127
+ <xsl:apply-templates/>
128
+ </xsl:otherwise>
129
+ </xsl:choose>
130
+ <xsl:choose>
131
+ <xsl:when test="@close">
132
+ <xsl:if test="translate(@open,'{}[]()|','{{{{{{{')='{'">
133
+ <xsl:text>\right</xsl:text>
134
+ </xsl:if>
135
+ <xsl:if test="@open='{' or @open='}'">
136
+ <xsl:text>\</xsl:text>
137
+ </xsl:if>
138
+ <xsl:value-of select="@close"/>
139
+ </xsl:when>
140
+ <xsl:otherwise><xsl:text>\right)</xsl:text></xsl:otherwise>
141
+ </xsl:choose>
142
+ </xsl:template>
143
+
144
+ <xsl:template match="m:mphantom">
145
+ <xsl:text>\phantom{</xsl:text>
146
+ <xsl:apply-templates/>
147
+ <xsl:text>}</xsl:text>
148
+ </xsl:template>
149
+
150
+ <xsl:template match="m:menclose">
151
+ <xsl:choose>
152
+ <xsl:when test="@notation = 'actuarial'">
153
+ <xsl:text>\overline{</xsl:text>
154
+ <xsl:apply-templates/>
155
+ <xsl:text>\hspace{.2em}|}</xsl:text>
156
+ </xsl:when>
157
+ <xsl:when test="@notation = 'radical'">
158
+ <xsl:text>\sqrt{</xsl:text>
159
+ <xsl:apply-templates/>
160
+ <xsl:text>}</xsl:text>
161
+ </xsl:when>
162
+ <xsl:otherwise>
163
+ <xsl:text>\overline{)</xsl:text>
164
+ <xsl:apply-templates/>
165
+ <xsl:text>}</xsl:text>
166
+ </xsl:otherwise>
167
+ </xsl:choose>
168
+ </xsl:template>
169
+
170
+ <xsl:template match="m:mrow">
171
+ <xsl:apply-templates/>
172
+ </xsl:template>
173
+
174
+ <xsl:template match="m:mstyle">
175
+ <xsl:if test="@background">
176
+ <xsl:text>\colorbox[rgb]{</xsl:text>
177
+ <xsl:call-template name="color">
178
+ <xsl:with-param name="color" select="@background"/>
179
+ </xsl:call-template>
180
+ <xsl:text>}{$</xsl:text>
181
+ </xsl:if>
182
+ <xsl:if test="@color">
183
+ <xsl:text>\textcolor[rgb]{</xsl:text>
184
+ <xsl:call-template name="color">
185
+ <xsl:with-param name="color" select="@color"/>
186
+ </xsl:call-template>
187
+ <xsl:text>}{</xsl:text>
188
+ </xsl:if>
189
+ <xsl:apply-templates/>
190
+ <xsl:if test="@color">
191
+ <xsl:text>}</xsl:text>
192
+ </xsl:if>
193
+ <xsl:if test="@background">
194
+ <xsl:text>$}</xsl:text>
195
+ </xsl:if>
196
+ </xsl:template>
197
+ <!--
198
+
199
+ <xsl:template match="m:mstyle">
200
+ <xsl:if test="@displaystyle='true'">
201
+ <xsl:text>{\displaystyle</xsl:text>
202
+ </xsl:if>
203
+ <xsl:if test="@scriptlevel=2">
204
+ <xsl:text>{\scriptscriptstyle</xsl:text>
205
+ </xsl:if>
206
+ <xsl:apply-templates/>
207
+ <xsl:if test="@scriptlevel=2">
208
+ <xsl:text>}</xsl:text>
209
+ </xsl:if>
210
+ <xsl:if test="@displaystyle='true'">
211
+ <xsl:text>}</xsl:text>
212
+ </xsl:if>
213
+ </xsl:template>
214
+ -->
215
+
216
+ <xsl:template match="m:merror">
217
+ <xsl:apply-templates/>
218
+ </xsl:template>
219
+
220
+ </xsl:stylesheet>
ultradata_math_parser/mmltex/mmltex.xsl ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
4
+ version='1.0'>
5
+
6
+ <xsl:output method="text" indent="no" encoding="UTF-8"/>
7
+
8
+ <!-- ====================================================================== -->
9
+ <!-- $id: mmltex.xsl, 2002/22/11 Exp $
10
+ This file is part of the XSLT MathML Library distribution.
11
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
12
+ copyright and other information -->
13
+ <!-- ====================================================================== -->
14
+
15
+ <xsl:include href="tokens.xsl"/>
16
+ <xsl:include href="glayout.xsl"/>
17
+ <xsl:include href="scripts.xsl"/>
18
+ <xsl:include href="tables.xsl"/>
19
+ <xsl:include href="entities.xsl"/>
20
+ <xsl:include href="cmarkup.xsl"/>
21
+
22
+ <!-- Note: variables colora (template color) and symbola (template startspace) only for Sablotron -->
23
+
24
+ <xsl:template name="startspace">
25
+ <xsl:param name="symbol"/>
26
+ <xsl:if test="contains($symbol,' ')">
27
+ <xsl:variable name="symbola" select="concat(substring-before($symbol,' '),substring-after($symbol,' '))"/>
28
+ <xsl:call-template name="startspace">
29
+ <xsl:with-param name="symbol" select="$symbola"/>
30
+ </xsl:call-template>
31
+ </xsl:if>
32
+ <xsl:if test="not(contains($symbol,' '))">
33
+ <xsl:value-of select="$symbol"/>
34
+ </xsl:if>
35
+ </xsl:template>
36
+
37
+ <xsl:strip-space elements="m:*"/>
38
+
39
+ <xsl:template match="m:math">
40
+ <xsl:text>&#x00024;</xsl:text>
41
+ <xsl:apply-templates/>
42
+ <xsl:text>&#x00024;</xsl:text>
43
+ </xsl:template>
44
+
45
+ </xsl:stylesheet>
ultradata_math_parser/mmltex/scripts.xsl ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
4
+ version='1.0'>
5
+
6
+ <!-- ====================================================================== -->
7
+ <!-- $Id: scripts.xsl,v 1.1.1.1 2002/10/26 14:20:06 shade33 Exp $
8
+ This file is part of the XSLT MathML Library distribution.
9
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
10
+ copyright and other information -->
11
+ <!-- ====================================================================== -->
12
+
13
+ <xsl:template match="m:munderover">
14
+ <xsl:variable name="base">
15
+ <xsl:call-template name="startspace">
16
+ <xsl:with-param name="symbol" select="./*[1]"/>
17
+ </xsl:call-template>
18
+ </xsl:variable>
19
+ <xsl:variable name="under">
20
+ <xsl:call-template name="startspace">
21
+ <xsl:with-param name="symbol" select="./*[2]"/>
22
+ </xsl:call-template>
23
+ </xsl:variable>
24
+ <xsl:variable name="over">
25
+ <xsl:call-template name="startspace">
26
+ <xsl:with-param name="symbol" select="./*[3]"/>
27
+ </xsl:call-template>
28
+ </xsl:variable>
29
+
30
+ <xsl:choose>
31
+ <xsl:when test="$over='&#x000AF;'"> <!-- OverBar - over bar -->
32
+ <xsl:text>\overline{</xsl:text>
33
+ <xsl:call-template name="munder">
34
+ <xsl:with-param name="base" select="$base"/>
35
+ <xsl:with-param name="under" select="$under"/>
36
+ </xsl:call-template>
37
+ <xsl:text>}</xsl:text>
38
+ </xsl:when>
39
+ <xsl:when test="$over='&#x0FE37;'"> <!-- OverBrace - over brace -->
40
+ <xsl:text>\overbrace{</xsl:text>
41
+ <xsl:call-template name="munder">
42
+ <xsl:with-param name="base" select="$base"/>
43
+ <xsl:with-param name="under" select="$under"/>
44
+ </xsl:call-template>
45
+ <xsl:text>}</xsl:text>
46
+ </xsl:when>
47
+ <xsl:when test="$under='&#x00332;'"> <!-- UnderBar - combining low line -->
48
+ <xsl:text>\underline{</xsl:text>
49
+ <xsl:call-template name="mover">
50
+ <xsl:with-param name="base" select="$base"/>
51
+ <xsl:with-param name="over" select="$over"/>
52
+ <xsl:with-param name="pos_over" select="3"/>
53
+ </xsl:call-template>
54
+ <xsl:text>}</xsl:text>
55
+ </xsl:when>
56
+ <xsl:when test="$under='&#x0FE38;'"> <!-- UnderBrace - under brace -->
57
+ <xsl:text>\underbrace{</xsl:text>
58
+ <xsl:call-template name="mover">
59
+ <xsl:with-param name="base" select="$base"/>
60
+ <xsl:with-param name="over" select="$over"/>
61
+ <xsl:with-param name="pos_over" select="3"/>
62
+ </xsl:call-template>
63
+ <xsl:text>}</xsl:text>
64
+ </xsl:when>
65
+ <xsl:when test="translate($base,'&#x0220F;&#x02210;&#x022c2;&#x022c3;&#x02294;',
66
+ '&#x02211;&#x02211;&#x02211;&#x02211;&#x02211;')='&#x02211;'">
67
+ <!-- if $base is operator, such as
68
+ &#x02211; /sum L: summation operator
69
+ &#x0220F; /prod L: product operator
70
+ &#x02210; /coprod L: coproduct operator
71
+ &#x022c2; /bigcap
72
+ &#x022c3; /bigcup
73
+ &#x02294; /bigsqcup
74
+ -->
75
+ <xsl:apply-templates select="./*[1]"/>
76
+ <xsl:text>_{</xsl:text>
77
+ <xsl:apply-templates select="./*[2]"/>
78
+ <xsl:text>}^{</xsl:text>
79
+ <xsl:apply-templates select="./*[3]"/>
80
+ <xsl:text>}</xsl:text>
81
+ </xsl:when>
82
+ <xsl:otherwise>
83
+ <xsl:text>\underset{</xsl:text>
84
+ <xsl:apply-templates select="./*[2]"/>
85
+ <xsl:text>}{\overset{</xsl:text>
86
+ <xsl:apply-templates select="./*[3]"/>
87
+ <xsl:text>}{</xsl:text>
88
+ <xsl:apply-templates select="./*[1]"/>
89
+ <xsl:text>}}</xsl:text>
90
+ </xsl:otherwise>
91
+ </xsl:choose>
92
+ </xsl:template>
93
+
94
+ <xsl:template match="m:mover">
95
+ <xsl:call-template name="mover">
96
+ <xsl:with-param name="base">
97
+ <xsl:call-template name="startspace">
98
+ <xsl:with-param name="symbol" select="./*[1]"/>
99
+ </xsl:call-template>
100
+ </xsl:with-param>
101
+ <xsl:with-param name="over">
102
+ <xsl:call-template name="startspace">
103
+ <xsl:with-param name="symbol" select="./*[2]"/>
104
+ </xsl:call-template>
105
+ </xsl:with-param>
106
+ </xsl:call-template>
107
+ </xsl:template>
108
+
109
+ <xsl:template match="m:munder">
110
+ <xsl:call-template name="munder">
111
+ <xsl:with-param name="base">
112
+ <xsl:call-template name="startspace">
113
+ <xsl:with-param name="symbol" select="./*[1]"/>
114
+ </xsl:call-template>
115
+ </xsl:with-param>
116
+ <xsl:with-param name="under">
117
+ <xsl:call-template name="startspace">
118
+ <xsl:with-param name="symbol" select="./*[2]"/>
119
+ </xsl:call-template>
120
+ </xsl:with-param>
121
+ </xsl:call-template>
122
+ </xsl:template>
123
+
124
+ <xsl:template name="mover">
125
+ <xsl:param name="base"/>
126
+ <xsl:param name="over"/>
127
+ <xsl:param name="pos_over" select="2"/>
128
+ <xsl:choose>
129
+ <xsl:when test="$over='&#x000AF;'"> <!-- OverBar - over bar -->
130
+ <xsl:text>\overline{</xsl:text>
131
+ <xsl:apply-templates select="./*[1]"/>
132
+ <xsl:text>}</xsl:text>
133
+ </xsl:when>
134
+ <xsl:when test="$over='&#x0FE37;'"> <!-- OverBrace - over brace -->
135
+ <xsl:text>\overbrace{</xsl:text>
136
+ <xsl:apply-templates select="./*[1]"/>
137
+ <xsl:text>}</xsl:text>
138
+ </xsl:when>
139
+ <xsl:when test="translate($base,'&#x0220F;&#x02210;&#x022c2;&#x022c3;&#x02294;',
140
+ '&#x02211;&#x02211;&#x02211;&#x02211;&#x02211;')='&#x02211;'">
141
+ <!-- if $base is operator, such as
142
+ &#x02211; /sum L: summation operator
143
+ &#x0220F; /prod L: product operator
144
+ &#x02210; /coprod L: coproduct operator
145
+ &#x022c2; /bigcap
146
+ &#x022c3; /bigcup
147
+ &#x02294; /bigsqcup
148
+ -->
149
+ <xsl:apply-templates select="./*[1]"/>
150
+ <xsl:text>^{</xsl:text>
151
+ <xsl:apply-templates select="./*[$pos_over]"/>
152
+ <xsl:text>}</xsl:text>
153
+ </xsl:when>
154
+ <xsl:otherwise>
155
+ <xsl:text>\stackrel{</xsl:text>
156
+ <xsl:apply-templates select="./*[$pos_over]"/>
157
+ <xsl:text>}{</xsl:text>
158
+ <xsl:apply-templates select="./*[1]"/>
159
+ <xsl:text>}</xsl:text>
160
+ <!--
161
+ <xsl:text>\overset{</xsl:text>
162
+ <xsl:apply-templates select="./*[$pos_over]"/>
163
+ <xsl:text>}{</xsl:text>
164
+ <xsl:apply-templates select="./*[1]"/>
165
+ <xsl:text>}</xsl:text>-->
166
+ </xsl:otherwise>
167
+ </xsl:choose>
168
+ </xsl:template>
169
+
170
+ <xsl:template name="munder">
171
+ <xsl:param name="base"/>
172
+ <xsl:param name="under"/>
173
+ <xsl:choose>
174
+ <xsl:when test="$under='&#x00332;'"> <!-- UnderBar - combining low line -->
175
+ <xsl:text>\underline{</xsl:text>
176
+ <xsl:apply-templates select="./*[1]"/>
177
+ <xsl:text>}</xsl:text>
178
+ </xsl:when>
179
+ <xsl:when test="$under='&#x0FE38;'"> <!-- UnderBrace - under brace -->
180
+ <xsl:text>\underbrace{</xsl:text>
181
+ <xsl:apply-templates select="./*[1]"/>
182
+ <xsl:text>}</xsl:text>
183
+ </xsl:when>
184
+ <xsl:when test="translate($base,'&#x0220F;&#x02210;&#x022c2;&#x022c3;&#x02294;',
185
+ '&#x02211;&#x02211;&#x02211;&#x02211;&#x02211;')='&#x02211;'">
186
+ <!-- if $base is operator, such as
187
+ &#x02211; /sum L: summation operator
188
+ &#x0220F; /prod L: product operator
189
+ &#x02210; /coprod L: coproduct operator
190
+ &#x022c2; /bigcap
191
+ &#x022c3; /bigcup
192
+ &#x02294; /bigsqcup
193
+ -->
194
+ <xsl:apply-templates select="./*[1]"/>
195
+ <xsl:text>_{</xsl:text>
196
+ <xsl:apply-templates select="./*[2]"/>
197
+ <xsl:text>}</xsl:text>
198
+ </xsl:when>
199
+ <xsl:otherwise>
200
+ <xsl:text>\underset{</xsl:text> <!-- Required AmsMath package -->
201
+ <xsl:apply-templates select="./*[2]"/>
202
+ <xsl:text>}{</xsl:text>
203
+ <xsl:apply-templates select="./*[1]"/>
204
+ <xsl:text>}</xsl:text>
205
+ </xsl:otherwise>
206
+ </xsl:choose>
207
+ </xsl:template>
208
+
209
+ <xsl:template match="m:msubsup">
210
+ <xsl:text>{</xsl:text>
211
+ <xsl:apply-templates select="./*[1]"/>
212
+ <xsl:text>}_{</xsl:text>
213
+ <xsl:apply-templates select="./*[2]"/>
214
+ <xsl:text>}^{</xsl:text>
215
+ <xsl:apply-templates select="./*[3]"/>
216
+ <xsl:text>}</xsl:text>
217
+ </xsl:template>
218
+
219
+ <xsl:template match="m:msup">
220
+ <xsl:text>{</xsl:text>
221
+ <xsl:apply-templates select="./*[1]"/>
222
+ <xsl:text>}^{</xsl:text>
223
+ <xsl:apply-templates select="./*[2]"/>
224
+ <xsl:text>}</xsl:text>
225
+ </xsl:template>
226
+
227
+ <xsl:template match="m:msub">
228
+ <xsl:text>{</xsl:text>
229
+ <xsl:apply-templates select="./*[1]"/>
230
+ <xsl:text>}_{</xsl:text>
231
+ <xsl:apply-templates select="./*[2]"/>
232
+ <xsl:text>}</xsl:text>
233
+ </xsl:template>
234
+
235
+ <xsl:template match="m:mmultiscripts" mode="mprescripts">
236
+ <xsl:for-each select="m:mprescripts/following-sibling::*">
237
+ <xsl:if test="position() mod 2 and local-name(.)!='none'">
238
+ <xsl:text>{}_{</xsl:text>
239
+ <xsl:apply-templates select="."/>
240
+ <xsl:text>}</xsl:text>
241
+ </xsl:if>
242
+ <xsl:if test="not(position() mod 2) and local-name(.)!='none'">
243
+ <xsl:text>{}^{</xsl:text>
244
+ <xsl:apply-templates select="."/>
245
+ <xsl:text>}</xsl:text>
246
+ </xsl:if>
247
+ </xsl:for-each>
248
+ <xsl:apply-templates select="./*[1]"/>
249
+ <xsl:for-each select="m:mprescripts/preceding-sibling::*[position()!=last()]">
250
+ <xsl:if test="position()>2 and local-name(.)!='none'">
251
+ <xsl:text>{}</xsl:text>
252
+ </xsl:if>
253
+ <xsl:if test="position() mod 2 and local-name(.)!='none'">
254
+ <xsl:text>_{</xsl:text>
255
+ <xsl:apply-templates select="."/>
256
+ <xsl:text>}</xsl:text>
257
+ </xsl:if>
258
+ <xsl:if test="not(position() mod 2) and local-name(.)!='none'">
259
+ <xsl:text>^{</xsl:text>
260
+ <xsl:apply-templates select="."/>
261
+ <xsl:text>}</xsl:text>
262
+ </xsl:if>
263
+ </xsl:for-each>
264
+ </xsl:template>
265
+
266
+ <xsl:template match="m:mmultiscripts">
267
+ <xsl:choose>
268
+ <xsl:when test="m:mprescripts">
269
+ <xsl:apply-templates select="." mode="mprescripts"/>
270
+ </xsl:when>
271
+ <xsl:otherwise>
272
+ <xsl:apply-templates select="./*[1]"/>
273
+ <xsl:for-each select="*[position()>1]">
274
+ <xsl:if test="position()>2 and local-name(.)!='none'">
275
+ <xsl:text>{}</xsl:text>
276
+ </xsl:if>
277
+ <xsl:if test="position() mod 2 and local-name(.)!='none'">
278
+ <xsl:text>_{</xsl:text>
279
+ <xsl:apply-templates select="."/>
280
+ <xsl:text>}</xsl:text>
281
+ </xsl:if>
282
+ <xsl:if test="not(position() mod 2) and local-name(.)!='none'">
283
+ <xsl:text>^{</xsl:text>
284
+ <xsl:apply-templates select="."/>
285
+ <xsl:text>}</xsl:text>
286
+ </xsl:if>
287
+ </xsl:for-each>
288
+ </xsl:otherwise>
289
+ </xsl:choose>
290
+ </xsl:template>
291
+
292
+ </xsl:stylesheet>
ultradata_math_parser/mmltex/tables.xsl ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
4
+ version='1.0'>
5
+
6
+ <!-- ====================================================================== -->
7
+ <!-- $id: tables.xsl, 2002/17/05 Exp $
8
+ This file is part of the XSLT MathML Library distribution.
9
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
10
+ copyright and other information -->
11
+ <!-- ====================================================================== -->
12
+
13
+ <xsl:template match="m:mtd[@columnspan]">
14
+ <xsl:text>\multicolumn{</xsl:text>
15
+ <xsl:value-of select="@columnspan"/>
16
+ <xsl:text>}{c}{</xsl:text>
17
+ <xsl:apply-templates/>
18
+ <xsl:text>}</xsl:text>
19
+ <xsl:if test="count(following-sibling::*)>0">
20
+ <xsl:text>&amp; </xsl:text>
21
+ </xsl:if>
22
+ </xsl:template>
23
+
24
+
25
+ <xsl:template match="m:mtd">
26
+ <xsl:if test="@columnalign='right' or @columnalign='center'">
27
+ <xsl:text>\hfill </xsl:text>
28
+ </xsl:if>
29
+ <xsl:apply-templates/>
30
+ <xsl:if test="@columnalign='left' or @columnalign='center'">
31
+ <xsl:text>\hfill </xsl:text>
32
+ </xsl:if>
33
+ <xsl:if test="count(following-sibling::*)>0">
34
+ <!-- this test valid for Sablotron, another form - test="not(position()=last())".
35
+ Also for m:mtd[@columnspan] and m:mtr -->
36
+ <xsl:text>&amp; </xsl:text>
37
+ </xsl:if>
38
+ </xsl:template>
39
+
40
+ <xsl:template match="m:mtr">
41
+ <xsl:apply-templates/>
42
+ <xsl:if test="count(following-sibling::*)>0">
43
+ <xsl:text>\\ </xsl:text>
44
+ </xsl:if>
45
+ </xsl:template>
46
+
47
+ <xsl:template match="m:mtable">
48
+ <xsl:text>\begin{array}{</xsl:text>
49
+ <xsl:if test="@frame='solid'">
50
+ <xsl:text>|</xsl:text>
51
+ </xsl:if>
52
+ <xsl:variable name="numbercols" select="count(./m:mtr[1]/m:mtd[not(@columnspan)])+sum(./m:mtr[1]/m:mtd/@columnspan)"/>
53
+ <xsl:choose>
54
+ <xsl:when test="@columnalign">
55
+ <xsl:variable name="colalign">
56
+ <xsl:call-template name="colalign">
57
+ <xsl:with-param name="colalign" select="@columnalign"/>
58
+ </xsl:call-template>
59
+ </xsl:variable>
60
+ <xsl:choose>
61
+ <xsl:when test="string-length($colalign) > $numbercols">
62
+ <xsl:value-of select="substring($colalign,1,$numbercols)"/>
63
+ </xsl:when>
64
+ <xsl:when test="string-length($colalign) &lt; $numbercols">
65
+ <xsl:value-of select="$colalign"/>
66
+ <xsl:call-template name="generate-string">
67
+ <xsl:with-param name="text" select="substring($colalign,string-length($colalign))"/>
68
+ <xsl:with-param name="count" select="$numbercols - string-length($colalign)"/>
69
+ </xsl:call-template>
70
+ </xsl:when>
71
+ <xsl:otherwise>
72
+ <xsl:value-of select="$colalign"/>
73
+ </xsl:otherwise>
74
+ </xsl:choose>
75
+ </xsl:when>
76
+ <xsl:otherwise>
77
+ <xsl:call-template name="generate-string">
78
+ <xsl:with-param name="text" select="'c'"/>
79
+ <xsl:with-param name="count" select="$numbercols"/>
80
+ </xsl:call-template>
81
+ </xsl:otherwise>
82
+ </xsl:choose>
83
+ <xsl:if test="@frame='solid'">
84
+ <xsl:text>|</xsl:text>
85
+ </xsl:if>
86
+ <xsl:text>}</xsl:text>
87
+ <xsl:if test="@frame='solid'">
88
+ <xsl:text>\hline </xsl:text>
89
+ </xsl:if>
90
+ <xsl:apply-templates/>
91
+ <xsl:if test="@frame='solid'">
92
+ <xsl:text>\\ \hline</xsl:text>
93
+ </xsl:if>
94
+ <xsl:text>\end{array}</xsl:text>
95
+ </xsl:template>
96
+
97
+ <xsl:template name="colalign">
98
+ <xsl:param name="colalign"/>
99
+ <xsl:choose>
100
+ <xsl:when test="contains($colalign,' ')">
101
+ <xsl:value-of select="substring($colalign,1,1)"/>
102
+ <xsl:call-template name="colalign">
103
+ <xsl:with-param name="colalign" select="substring-after($colalign,' ')"/>
104
+ </xsl:call-template>
105
+ </xsl:when>
106
+ <xsl:otherwise>
107
+ <xsl:value-of select="substring($colalign,1,1)"/>
108
+ </xsl:otherwise>
109
+ </xsl:choose>
110
+ </xsl:template>
111
+
112
+ <xsl:template name="generate-string">
113
+ <!-- template from XSLT Standard Library v1.1 -->
114
+ <xsl:param name="text"/>
115
+ <xsl:param name="count"/>
116
+
117
+ <xsl:choose>
118
+ <xsl:when test="string-length($text) = 0 or $count &lt;= 0"/>
119
+
120
+ <xsl:otherwise>
121
+ <xsl:value-of select="$text"/>
122
+ <xsl:call-template name="generate-string">
123
+ <xsl:with-param name="text" select="$text"/>
124
+ <xsl:with-param name="count" select="$count - 1"/>
125
+ </xsl:call-template>
126
+ </xsl:otherwise>
127
+ </xsl:choose>
128
+ </xsl:template>
129
+
130
+ </xsl:stylesheet>
ultradata_math_parser/mmltex/tokens.xsl ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version='1.0' encoding="UTF-8"?>
2
+ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3
+ xmlns:m="http://www.w3.org/1998/Math/MathML"
4
+ version='1.0'>
5
+
6
+ <!-- ====================================================================== -->
7
+ <!-- $id: tokens.xsl, 2002/22/11 Exp $
8
+ This file is part of the XSLT MathML Library distribution.
9
+ See ./README or http://www.raleigh.ru/MathML/mmltex for
10
+ copyright and other information -->
11
+ <!-- ====================================================================== -->
12
+
13
+ <xsl:template match="m:mi|m:mn|m:mo|m:mtext|m:ms">
14
+ <xsl:call-template name="CommonTokenAtr"/>
15
+ </xsl:template>
16
+
17
+ <xsl:template name="mi">
18
+ <xsl:choose>
19
+ <xsl:when test="string-length(normalize-space(.))>1 and not(@mathvariant)">
20
+ <xsl:text>\mathrm{</xsl:text>
21
+ <xsl:apply-templates/>
22
+ <xsl:text>}</xsl:text>
23
+ </xsl:when>
24
+ <xsl:otherwise>
25
+ <xsl:apply-templates/>
26
+ </xsl:otherwise>
27
+ </xsl:choose>
28
+ </xsl:template>
29
+
30
+ <xsl:template name="mn">
31
+ <xsl:apply-templates/>
32
+ </xsl:template>
33
+
34
+ <xsl:template name="mo">
35
+ <xsl:apply-templates/>
36
+ </xsl:template>
37
+
38
+ <xsl:template name="mtext">
39
+ <xsl:variable name="content">
40
+ <xsl:call-template name="replaceMtextEntities">
41
+ <xsl:with-param name="content" select="."/>
42
+ </xsl:call-template>
43
+ </xsl:variable>
44
+ <xsl:text>\text{</xsl:text>
45
+ <xsl:value-of select="$content"/>
46
+ <xsl:text>}</xsl:text>
47
+ </xsl:template>
48
+
49
+ <xsl:template match="m:mspace">
50
+ <xsl:text>\phantom{\rule</xsl:text>
51
+ <xsl:if test="@depth">
52
+ <xsl:text>[-</xsl:text>
53
+ <xsl:value-of select="@depth"/>
54
+ <xsl:text>]</xsl:text>
55
+ </xsl:if>
56
+ <xsl:text>{</xsl:text>
57
+ <xsl:if test="not(@width)">
58
+ <xsl:text>0ex</xsl:text>
59
+ </xsl:if>
60
+ <xsl:value-of select="@width"/>
61
+ <xsl:text>}{</xsl:text>
62
+ <xsl:if test="not(@height)">
63
+ <xsl:text>0ex</xsl:text>
64
+ </xsl:if>
65
+ <xsl:value-of select="@height"/>
66
+ <xsl:text>}}</xsl:text>
67
+ </xsl:template>
68
+
69
+ <xsl:template name="ms">
70
+ <xsl:choose>
71
+ <xsl:when test="@lquote"><xsl:value-of select="@lquote"/></xsl:when>
72
+ <xsl:otherwise><xsl:text>"</xsl:text></xsl:otherwise>
73
+ </xsl:choose><xsl:apply-templates/><xsl:choose>
74
+ <xsl:when test="@rquote"><xsl:value-of select="@rquote"/></xsl:when>
75
+ <xsl:otherwise><xsl:text>"</xsl:text></xsl:otherwise>
76
+ </xsl:choose>
77
+ </xsl:template>
78
+
79
+ <xsl:template name="CommonTokenAtr">
80
+ <xsl:if test="@mathbackground">
81
+ <xsl:text>\colorbox[rgb]{</xsl:text>
82
+ <xsl:call-template name="color">
83
+ <xsl:with-param name="color" select="@mathbackground"/>
84
+ </xsl:call-template>
85
+ <xsl:text>}{$</xsl:text>
86
+ </xsl:if>
87
+ <xsl:if test="@color or @mathcolor"> <!-- Note: @color is deprecated in MathML 2.0 -->
88
+ <xsl:text>\textcolor[rgb]{</xsl:text>
89
+ <xsl:call-template name="color">
90
+ <xsl:with-param name="color" select="@color|@mathcolor"/>
91
+ </xsl:call-template>
92
+ <xsl:text>}{</xsl:text>
93
+ </xsl:if>
94
+ <xsl:if test="@mathvariant">
95
+ <xsl:choose>
96
+ <xsl:when test="@mathvariant='normal'">
97
+ <xsl:text>\mathrm{</xsl:text>
98
+ </xsl:when>
99
+ <xsl:when test="@mathvariant='bold'">
100
+ <xsl:text>\mathbf{</xsl:text>
101
+ </xsl:when>
102
+ <xsl:when test="@mathvariant='italic'">
103
+ <xsl:text>\mathit{</xsl:text>
104
+ </xsl:when>
105
+ <xsl:when test="@mathvariant='bold-italic'"> <!-- Required definition -->
106
+ <xsl:text>\mathbit{</xsl:text>
107
+ </xsl:when>
108
+ <xsl:when test="@mathvariant='double-struck'"> <!-- Required amsfonts -->
109
+ <xsl:text>\mathbb{</xsl:text>
110
+ </xsl:when>
111
+ <xsl:when test="@mathvariant='bold-fraktur'"> <!-- Error -->
112
+ <xsl:text>{</xsl:text>
113
+ </xsl:when>
114
+ <xsl:when test="@mathvariant='script'">
115
+ <xsl:text>\mathcal{</xsl:text>
116
+ </xsl:when>
117
+ <xsl:when test="@mathvariant='bold-script'"> <!-- Error -->
118
+ <xsl:text>\mathsc{</xsl:text>
119
+ </xsl:when>
120
+ <xsl:when test="@mathvariant='fraktur'"> <!-- Required amsfonts -->
121
+ <xsl:text>\mathfrak{</xsl:text>
122
+ </xsl:when>
123
+ <xsl:when test="@mathvariant='sans-serif'">
124
+ <xsl:text>\mathsf{</xsl:text>
125
+ </xsl:when>
126
+ <xsl:when test="@mathvariant='bold-sans-serif'"> <!-- Required definition -->
127
+ <xsl:text>\mathbsf{</xsl:text>
128
+ </xsl:when>
129
+ <xsl:when test="@mathvariant='sans-serif-italic'"> <!-- Required definition -->
130
+ <xsl:text>\mathsfit{</xsl:text>
131
+ </xsl:when>
132
+ <xsl:when test="@mathvariant='sans-serif-bold-italic'"> <!-- Error -->
133
+ <xsl:text>\mathbsfit{</xsl:text>
134
+ </xsl:when>
135
+ <xsl:when test="@mathvariant='monospace'">
136
+ <xsl:text>\mathtt{</xsl:text>
137
+ </xsl:when>
138
+ <xsl:otherwise>
139
+ <xsl:text>{</xsl:text>
140
+ </xsl:otherwise>
141
+ </xsl:choose>
142
+ </xsl:if>
143
+ <xsl:call-template name="selectTemplate"/>
144
+ <xsl:if test="@mathvariant">
145
+ <xsl:text>}</xsl:text>
146
+ </xsl:if>
147
+ <xsl:if test="@color or @mathcolor">
148
+ <xsl:text>}</xsl:text>
149
+ </xsl:if>
150
+ <xsl:if test="@mathbackground">
151
+ <xsl:text>$}</xsl:text>
152
+ </xsl:if>
153
+ </xsl:template>
154
+
155
+ <xsl:template name="selectTemplate">
156
+ <!-- <xsl:variable name="name" select="local-name()"/>
157
+ <xsl:call-template name="{$name}"/>-->
158
+ <xsl:choose>
159
+ <xsl:when test="local-name(.)='mi'">
160
+ <xsl:call-template name="mi"/>
161
+ </xsl:when>
162
+ <xsl:when test="local-name(.)='mn'">
163
+ <xsl:call-template name="mn"/>
164
+ </xsl:when>
165
+ <xsl:when test="local-name(.)='mo'">
166
+ <xsl:call-template name="mo"/>
167
+ </xsl:when>
168
+ <xsl:when test="local-name(.)='mtext'">
169
+ <xsl:call-template name="mtext"/>
170
+ </xsl:when>
171
+ <xsl:when test="local-name(.)='ms'">
172
+ <xsl:call-template name="ms"/>
173
+ </xsl:when>
174
+ </xsl:choose>
175
+ </xsl:template>
176
+
177
+ <xsl:template name="color">
178
+ <!-- NB: Variables colora and valueColor{n} only for Sablotron -->
179
+ <xsl:param name="color"/>
180
+ <xsl:variable name="colora" select="translate($color,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')"/>
181
+ <xsl:choose>
182
+ <xsl:when test="starts-with($colora,'#') and string-length($colora)=4">
183
+ <xsl:variable name="valueColor">
184
+ <xsl:call-template name="Hex2Decimal">
185
+ <xsl:with-param name="arg" select="substring($colora,2,1)"/>
186
+ </xsl:call-template>
187
+ </xsl:variable>
188
+ <xsl:value-of select="$valueColor div 15"/><xsl:text>,</xsl:text>
189
+ <xsl:variable name="valueColor1">
190
+ <xsl:call-template name="Hex2Decimal">
191
+ <xsl:with-param name="arg" select="substring($colora,3,1)"/>
192
+ </xsl:call-template>
193
+ </xsl:variable>
194
+ <xsl:value-of select="$valueColor1 div 15"/><xsl:text>,</xsl:text>
195
+ <xsl:variable name="valueColor2">
196
+ <xsl:call-template name="Hex2Decimal">
197
+ <xsl:with-param name="arg" select="substring($colora,4,1)"/>
198
+ </xsl:call-template>
199
+ </xsl:variable>
200
+ <xsl:value-of select="$valueColor2 div 15"/>
201
+ </xsl:when>
202
+ <xsl:when test="starts-with($colora,'#') and string-length($colora)=7">
203
+ <xsl:variable name="valueColor1">
204
+ <xsl:call-template name="Hex2Decimal">
205
+ <xsl:with-param name="arg" select="substring($colora,2,1)"/>
206
+ </xsl:call-template>
207
+ </xsl:variable>
208
+ <xsl:variable name="valueColor2">
209
+ <xsl:call-template name="Hex2Decimal">
210
+ <xsl:with-param name="arg" select="substring($colora,3,1)"/>
211
+ </xsl:call-template>
212
+ </xsl:variable>
213
+ <xsl:value-of select="($valueColor1*16 + $valueColor2) div 255"/><xsl:text>,</xsl:text>
214
+ <xsl:variable name="valueColor1a">
215
+ <xsl:call-template name="Hex2Decimal">
216
+ <xsl:with-param name="arg" select="substring($colora,4,1)"/>
217
+ </xsl:call-template>
218
+ </xsl:variable>
219
+ <xsl:variable name="valueColor2a">
220
+ <xsl:call-template name="Hex2Decimal">
221
+ <xsl:with-param name="arg" select="substring($colora,5,1)"/>
222
+ </xsl:call-template>
223
+ </xsl:variable>
224
+ <xsl:value-of select="($valueColor1a*16 + $valueColor2a) div 255"/><xsl:text>,</xsl:text>
225
+ <xsl:variable name="valueColor1b">
226
+ <xsl:call-template name="Hex2Decimal">
227
+ <xsl:with-param name="arg" select="substring($colora,6,1)"/>
228
+ </xsl:call-template>
229
+ </xsl:variable>
230
+ <xsl:variable name="valueColor2b">
231
+ <xsl:call-template name="Hex2Decimal">
232
+ <xsl:with-param name="arg" select="substring($colora,7,1)"/>
233
+ </xsl:call-template>
234
+ </xsl:variable>
235
+ <xsl:value-of select="($valueColor1b*16 + $valueColor2b) div 255"/>
236
+ </xsl:when>
237
+ <!-- ======================= if color specifed as an html-color-name ========================================== -->
238
+ <xsl:when test="$colora='aqua'"><xsl:text>0,1,1</xsl:text></xsl:when>
239
+ <xsl:when test="$colora='black'"><xsl:text>0,0,0</xsl:text></xsl:when>
240
+ <xsl:when test="$colora='blue'"><xsl:text>0,0,1</xsl:text></xsl:when>
241
+ <xsl:when test="$colora='fuchsia'"><xsl:text>1,0,1</xsl:text></xsl:when>
242
+ <xsl:when test="$colora='gray'"><xsl:text>.5,.5,.5</xsl:text></xsl:when>
243
+ <xsl:when test="$colora='green'"><xsl:text>0,.5,0</xsl:text></xsl:when>
244
+ <xsl:when test="$colora='lime'"><xsl:text>0,1,0</xsl:text></xsl:when>
245
+ <xsl:when test="$colora='maroon'"><xsl:text>.5,0,0</xsl:text></xsl:when>
246
+ <xsl:when test="$colora='navy'"><xsl:text>0,0,.5</xsl:text></xsl:when>
247
+ <xsl:when test="$colora='olive'"><xsl:text>.5,.5,0</xsl:text></xsl:when>
248
+ <xsl:when test="$colora='purple'"><xsl:text>.5,0,.5</xsl:text></xsl:when>
249
+ <xsl:when test="$colora='red'"><xsl:text>1,0,0</xsl:text></xsl:when>
250
+ <xsl:when test="$colora='silver'"><xsl:text>.75,.75,.75</xsl:text></xsl:when>
251
+ <xsl:when test="$colora='teal'"><xsl:text>0,.5,.5</xsl:text></xsl:when>
252
+ <xsl:when test="$colora='white'"><xsl:text>1,1,1</xsl:text></xsl:when>
253
+ <xsl:when test="$colora='yellow'"><xsl:text>1,1,0</xsl:text></xsl:when>
254
+ <xsl:otherwise>
255
+ <xsl:message>Exception at color template</xsl:message>
256
+ </xsl:otherwise>
257
+ </xsl:choose>
258
+ </xsl:template>
259
+
260
+ <xsl:template name="Hex2Decimal">
261
+ <xsl:param name="arg"/>
262
+ <xsl:choose>
263
+ <xsl:when test="$arg='f'">
264
+ <xsl:value-of select="15"/>
265
+ </xsl:when>
266
+ <xsl:when test="$arg='e'">
267
+ <xsl:value-of select="14"/>
268
+ </xsl:when>
269
+ <xsl:when test="$arg='d'">
270
+ <xsl:value-of select="13"/>
271
+ </xsl:when>
272
+ <xsl:when test="$arg='c'">
273
+ <xsl:value-of select="12"/>
274
+ </xsl:when>
275
+ <xsl:when test="$arg='b'">
276
+ <xsl:value-of select="11"/>
277
+ </xsl:when>
278
+ <xsl:when test="$arg='a'">
279
+ <xsl:value-of select="10"/>
280
+ </xsl:when>
281
+ <xsl:when test="translate($arg, '0123456789', '9999999999')='9'"> <!-- if $arg is number -->
282
+ <xsl:value-of select="$arg"/>
283
+ </xsl:when>
284
+ <xsl:otherwise>
285
+ <xsl:message>Exception at Hex2Decimal template</xsl:message>
286
+ </xsl:otherwise>
287
+ </xsl:choose>
288
+ </xsl:template>
289
+
290
+ <xsl:template match="m:*/text()">
291
+ <xsl:call-template name="replaceEntities">
292
+ <xsl:with-param name="content" select="normalize-space()"/>
293
+ </xsl:call-template>
294
+ </xsl:template>
295
+
296
+ </xsl:stylesheet>
ultradata_math_parser/parsers/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ from ultradata_math_parser.parsers.article_parser import ArticleParser
4
+ from ultradata_math_parser.parsers.forum_parser import ForumParser
5
+ from ultradata_math_parser.parsers.custom_parser import CustomParser
6
+ from ultradata_math_parser.parsers.unified_parser import UnifiedParser
7
+ from ultradata_math_parser.parsers.title_parser import TitleParser
8
+
9
+ __all__ = [
10
+ "ArticleParser",
11
+ "ForumParser",
12
+ "CustomParser",
13
+ "UnifiedParser",
14
+ "TitleParser",
15
+ ]
ultradata_math_parser/parsers/article_parser.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ from copy import deepcopy
4
+
5
+ from ultradata_math_parser.utils import *
6
+ from ultradata_math_parser.parsers.base_parser import BaseParser
7
+ from ultradata_math_parser.parsers.title_parser import TitleParser
8
+
9
+
10
+ class ArticleParser(BaseParser):
11
+ def __init__(self) -> None:
12
+ super().__init__()
13
+
14
+ def extract(self, html="", **kwargs) -> dict:
15
+ base_url = kwargs.get("base_url", "")
16
+ self.process_math = kwargs.get("process_math", self.process_math)
17
+ self.preserve_math_containers = kwargs.get("preserve_math_containers", self.preserve_math_containers)
18
+ self.include_tables = kwargs.get("include_tables", self.include_tables)
19
+ self.include_images = kwargs.get("include_images", self.include_images)
20
+ html = html.replace("&nbsp;", " ").replace("&#160;", " ")
21
+ tree = load_html(html)
22
+ if tree is None:
23
+ raise ValueError
24
+
25
+ title = TitleParser().process(tree)
26
+
27
+ # base_url
28
+ base_href = tree.xpath("//base/@href")
29
+
30
+ if base_href and "http" in base_href[0]:
31
+ base_url = base_href[0]
32
+
33
+ if "://blog.csdn.net/" in base_url:
34
+ for dtree in tree.xpath('//div[@id="content_views"]//ul[@class="pre-numbering"]'):
35
+ self.remove_node(dtree)
36
+
37
+ raw_tree = deepcopy(tree)
38
+ working_tree = deepcopy(tree)
39
+
40
+ # 标签转换, 增加数学标签处理
41
+ format_tree = self.convert_tags(working_tree, base_url=base_url)
42
+ format_tree = self._remove_tables_from_tree(format_tree)
43
+ format_tree = self._remove_images_from_tree(format_tree)
44
+
45
+ # 删除script style等标签及其内容
46
+ normal_tree = self.clean_tags(format_tree)
47
+ normal_tree = self._remove_tables_from_tree(normal_tree)
48
+ normal_tree = self._remove_images_from_tree(normal_tree)
49
+ fallback_tree = deepcopy(normal_tree)
50
+
51
+ subtree, xp_num, drop_list = self.xp_1_5(normal_tree)
52
+ if xp_num == "others":
53
+ subtree, drop_list = self.prune_unwanted_sections(normal_tree)
54
+ body_html = self.get_content_html(subtree, xp_num, base_url)
55
+
56
+ body_html, fallback_strategy = self.apply_fallbacks(
57
+ primary_html=body_html,
58
+ base_url=base_url,
59
+ normal_tree=fallback_tree,
60
+ raw_tree=raw_tree,
61
+ )
62
+
63
+ body_html = self._strip_tables_from_html(body_html)
64
+ body_html = self._strip_images_from_html(body_html)
65
+
66
+ text_length = self._text_length_from_html(body_html)
67
+
68
+ return {
69
+ "xp_num": xp_num,
70
+ "drop_list": drop_list,
71
+ "html": body_html,
72
+ "title": title,
73
+ "base_url": base_url,
74
+ "fallback_strategy": fallback_strategy,
75
+ "text_length": text_length,
76
+ }
ultradata_math_parser/parsers/base_parser.py ADDED
@@ -0,0 +1,1059 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ import html
4
+ import logging
5
+ from collections import defaultdict
6
+ from copy import deepcopy
7
+ from urllib.parse import unquote, urljoin
8
+ from lxml.etree import Comment, strip_elements
9
+ from ultradata_math_parser.config import *
10
+ from ultradata_math_parser.readability_plus import Document as DocumentPlus
11
+ from ultradata_math_parser.utils import *
12
+
13
+
14
+ class BaseParser:
15
+ def __init__(self):
16
+ self.drop_ids = []
17
+ self.need_comment = False
18
+ self.process_math = True
19
+ self.preserve_math_containers = True
20
+ self.include_tables = True
21
+ self.include_images = False
22
+ self.fallback_min_length = 250
23
+ self.enable_wild_text_fallback = True
24
+ self.enable_readability_fallback = True
25
+ self._logger = logging.getLogger(__name__)
26
+
27
+ def xp_1_5(self, tree: HtmlElement):
28
+ drop_list = False
29
+ xp_num = "others"
30
+ result_body = Element("body")
31
+
32
+ for idx, expr in enumerate(BODY_XPATH):
33
+ try:
34
+ subtree = tree.xpath(expr)[0]
35
+ xp_num = str(idx + 1)
36
+ except IndexError:
37
+ continue
38
+
39
+ subtree, drop_list = self.prune_unwanted_sections(subtree)
40
+
41
+ if len(subtree) == 0:
42
+ xp_num = "others"
43
+ continue
44
+
45
+ ptest = subtree.xpath(".//text()[not(ancestor::a)]")
46
+ ptest_len = text_len("".join(ptest))
47
+ all_text_len = text_len(
48
+ "".join(tree.xpath("//p//text()[not(ancestor::a)]"))
49
+ )
50
+ if drop_list:
51
+ if ptest_len <= 50:
52
+ if all_text_len > 100:
53
+ xp_num = "others"
54
+ continue
55
+ else:
56
+ if ptest_len <= 20:
57
+ if all_text_len > 100:
58
+ xp_num = "others"
59
+ continue
60
+ result_body.append(subtree)
61
+ return result_body, xp_num, drop_list
62
+
63
+ return result_body, xp_num, drop_list
64
+
65
+ def get_content_html(self, cleaned_tree_backup, xp_num="others", base_url=""):
66
+ # readability_plus
67
+ doc = DocumentPlus(
68
+ cleaned_tree_backup,
69
+ url=base_url,
70
+ xp_num=xp_num,
71
+ need_comment=self.need_comment,
72
+ )
73
+ body = doc.summary(html_partial=True)
74
+
75
+ return body
76
+
77
+ def _text_length_from_html(self, html_fragment):
78
+ if not html_fragment:
79
+ return 0
80
+ # 使用 lxml.html.fromstring 解析后提取 text_content
81
+ # 不再依赖 w3m
82
+ try:
83
+ tree = fromstring(html_fragment)
84
+ text = tree.text_content()
85
+ return len(text or "")
86
+ except Exception:
87
+ return 0
88
+
89
+ def _is_content_sufficient(self, html_fragment):
90
+ return self._text_length_from_html(html_fragment) >= self.fallback_min_length
91
+
92
+ def _remove_tables_from_tree(self, tree: HtmlElement) -> HtmlElement:
93
+ if self.include_tables:
94
+ return tree
95
+ for table in list(tree.xpath(".//table")):
96
+ parent = table.getparent()
97
+ if parent is not None:
98
+ parent.remove(table)
99
+ return tree
100
+
101
+ def _strip_tables_from_html(self, html_fragment: str) -> str:
102
+ if self.include_tables or not html_fragment:
103
+ return html_fragment
104
+ try:
105
+ wrapper = fromstring(f"<div>{html_fragment}</div>")
106
+ except Exception:
107
+ return html_fragment
108
+ self._remove_tables_from_tree(wrapper)
109
+ return "".join(tostring(child, encoding=str) for child in wrapper)
110
+
111
+ def _remove_images_from_tree(self, tree: HtmlElement) -> HtmlElement:
112
+ for node in list(tree.xpath(".//img|.//picture|.//source")):
113
+ # 在删除IMG之前,检查ALT是否包含LaTeX公式
114
+ if node.tag == "img":
115
+ alt = node.get("alt", "")
116
+ src = node.get("src", "")
117
+
118
+ if alt:
119
+ # URL解码(处理 &space; 等编码)
120
+ alt_decoded = unquote(alt.replace('&space;', ' ').replace('&#92;', '\\'))
121
+
122
+ # 检测ALT是否包含LaTeX特征
123
+ is_latex = False
124
+ # 1. 以$开头结尾
125
+ if alt_decoded.strip().startswith('$') and len(alt_decoded.strip()) > 2:
126
+ is_latex = True
127
+ # 2. 以\[开头或\]结尾 (display math)
128
+ elif alt_decoded.strip().startswith('\\[') or alt_decoded.strip().endswith('\\]'):
129
+ is_latex = True
130
+ # 3. 包含LaTeX命令 (\frac, \sum, \alpha等)
131
+ elif re.search(r'\\[a-zA-Z]+', alt_decoded):
132
+ is_latex = True
133
+ # 4. 包含上下标
134
+ elif re.search(r'\^|_\{|_[a-zA-Z0-9]', alt_decoded):
135
+ is_latex = True
136
+ # 5. src包含latex相关关键词(作为辅助判断)
137
+ elif any(kw in src.lower() for kw in ['latex', 'codecogs', 'math', 'tex', 'equation']):
138
+ if len(alt_decoded.strip()) > 1:
139
+ is_latex = True
140
+
141
+ if is_latex:
142
+ # 创建span保存LaTeX公式
143
+ new_span = Element("span")
144
+ # 确保公式被正确包装
145
+ if alt_decoded.strip().startswith('$') or alt_decoded.strip().startswith('\\['):
146
+ new_span.text = alt_decoded
147
+ else:
148
+ new_span.text = wrap_math(alt_decoded)
149
+
150
+ # 在img之前插入span
151
+ parent = node.getparent()
152
+ if parent is not None:
153
+ node.addprevious(new_span)
154
+
155
+ # 删除图片节点
156
+ parent = node.getparent()
157
+ if parent is not None:
158
+ parent.remove(node)
159
+
160
+ for html_map in list(tree.xpath(".//map")):
161
+ parent = html_map.getparent()
162
+ if parent is not None:
163
+ parent.remove(html_map)
164
+ return tree
165
+
166
+ def _strip_images_from_html(self, html_fragment: str) -> str:
167
+ if not html_fragment:
168
+ return html_fragment
169
+ try:
170
+ wrapper = fromstring(f"<div>{html_fragment}</div>")
171
+ except Exception:
172
+ return html_fragment
173
+ self._remove_images_from_tree(wrapper)
174
+ return "".join(tostring(child, encoding=str) for child in wrapper)
175
+
176
+ def recover_wild_text(self, tree, base_url="", aggressive=False):
177
+ if tree is None:
178
+ return None
179
+ working_tree = deepcopy(tree)
180
+ try:
181
+ pruned_tree, _ = self.prune_unwanted_sections(working_tree)
182
+ except Exception:
183
+ pruned_tree = working_tree
184
+ search_expr = ".//p|.//pre|.//code|.//blockquote|.//q|.//quote"
185
+ if self.include_tables:
186
+ search_expr += "|.//table"
187
+ if aggressive:
188
+ search_expr += "|.//div|.//section|.//article|.//li"
189
+ try:
190
+ nodes = pruned_tree.xpath(search_expr)
191
+ except Exception:
192
+ nodes = []
193
+ if not nodes:
194
+ return None
195
+ container = Element("div")
196
+ seen_texts = set()
197
+ for node in nodes:
198
+ try:
199
+ text_value = trim(node.text_content())
200
+ except Exception:
201
+ text_value = None
202
+ if not text_value:
203
+ continue
204
+ if text_len(text_value) < 10:
205
+ continue
206
+ if text_value in seen_texts:
207
+ continue
208
+ seen_texts.add(text_value)
209
+ if node.tag == "table":
210
+ if self.include_tables:
211
+ container.append(deepcopy(node))
212
+ continue
213
+ else:
214
+ paragraph = Element("p")
215
+ paragraph.text = text_value
216
+ container.append(paragraph)
217
+ if len(container) == 0:
218
+ return None
219
+ return tostring(container, encoding=str)
220
+
221
+ def readability_fallback(self, tree, base_url=""):
222
+ if tree is None:
223
+ return None
224
+ try:
225
+ doc = DocumentPlus(
226
+ deepcopy(tree),
227
+ url=base_url,
228
+ xp_num="others",
229
+ need_comment=self.need_comment,
230
+ )
231
+ return doc.summary(html_partial=True)
232
+ except Exception:
233
+ return None
234
+
235
+ def apply_fallbacks(self, primary_html, base_url, normal_tree, raw_tree):
236
+ if self._is_content_sufficient(primary_html):
237
+ return primary_html, "primary"
238
+
239
+ wild_html = None
240
+ if self.enable_wild_text_fallback:
241
+ wild_html = self.recover_wild_text(normal_tree, base_url)
242
+ if self._is_content_sufficient(wild_html):
243
+ return wild_html, "wild_text"
244
+
245
+ readability_html = None
246
+ if self.enable_readability_fallback:
247
+ readability_html = self.readability_fallback(raw_tree, base_url)
248
+ if self._is_content_sufficient(readability_html):
249
+ return readability_html, "readability"
250
+
251
+ for candidate, name in (
252
+ (primary_html, "primary"),
253
+ (wild_html, "wild_text"),
254
+ (readability_html, "readability"),
255
+ ):
256
+ if candidate:
257
+ return candidate, name
258
+ return "", "primary"
259
+
260
+ def prune_unwanted_nodes(self, tree, nodelist, with_backup=False):
261
+ if with_backup is True:
262
+ old_len = len(tree.text_content())
263
+ backup = deepcopy(tree)
264
+ for expr in nodelist:
265
+ for subtree in tree.xpath(expr):
266
+ if self.preserve_math_containers and subtree.xpath(".//math"):
267
+ continue
268
+
269
+ # DISCARD_IMAGE_ELEMENTS 需要特殊判断
270
+ if '"caption"' in expr and subtree.xpath(".//img"):
271
+ continue
272
+ # 有些出现hidden
273
+ if "hidden" in expr:
274
+ try:
275
+ if re.findall(
276
+ "overflow-x:\s*hidden", subtree.attrib["style"]
277
+ ) or re.findall(
278
+ "overflow-y:\s*hidden", subtree.attrib["style"]
279
+ ):
280
+ continue
281
+ if re.findall(
282
+ "overflow:\s*hidden", subtree.attrib["style"]
283
+ ) and re.findall("height:", subtree.attrib["style"]):
284
+ height_px = re.findall(
285
+ "height:\s*(\d+)", subtree.attrib["style"]
286
+ )[0]
287
+ if int(height_px) >= 800:
288
+ continue
289
+ except:
290
+ pass
291
+
292
+ if ancestor_node_check(subtree, ['code', 'pre']):
293
+ continue
294
+ self.remove_node(subtree)
295
+ if with_backup is False:
296
+ return tree
297
+ # else:
298
+ new_len = len(tree.text_content())
299
+ if new_len > old_len / 7:
300
+ return tree
301
+ return backup
302
+
303
+ def prune_html(self, tree):
304
+ """Delete selected empty elements"""
305
+ for element in tree.xpath(".//*[not(node())]"):
306
+ if element.tag in CUT_EMPTY_ELEMS:
307
+ self.remove_node(element)
308
+ return tree
309
+
310
+ def remove_node(self, node: HtmlElement):
311
+ parent = node.getparent()
312
+ if text_strip(node.tail):
313
+ previous = node.getprevious()
314
+ if previous is None:
315
+ if parent is not None:
316
+ if text_strip(parent.text):
317
+ parent.text = "".join([parent.text, node.tail])
318
+ else:
319
+ parent.text = node.tail
320
+ else:
321
+ if text_strip(previous.tail):
322
+ previous.tail = "".join([previous.tail, node.tail])
323
+ else:
324
+ previous.tail = node.tail
325
+
326
+ if parent is not None:
327
+ idx = node.attrib.get(Unique_ID, "")
328
+ parent.remove(node)
329
+ if idx:
330
+ self.drop_ids.append(int(idx))
331
+
332
+ def clean_tags(self, tree):
333
+ strip_elements(tree, Comment)
334
+
335
+ xp_lists = []
336
+ if not self.need_comment:
337
+ xp_lists.append(REMOVE_COMMENTS_XPATH)
338
+ xp_lists.append(CONTENT_EXTRACTOR_NOISE_XPATHS)
339
+ for xp_list in xp_lists:
340
+ tree = self.prune_unwanted_nodes(tree, xp_list)
341
+
342
+ cleaning_list, stripping_list = (
343
+ MANUALLY_CLEANED.copy(),
344
+ MANUALLY_STRIPPED.copy(),
345
+ )
346
+
347
+ for elem in tree.xpath(".//figure[descendant::table]"):
348
+ elem.tag = "div"
349
+
350
+ for expression in cleaning_list + ["form"]:
351
+ for element in tree.getiterator(expression):
352
+ if self.preserve_math_containers and element.xpath('.//math'):
353
+ continue
354
+ # 针对form 标签特殊处理
355
+ if element.tag == "form":
356
+ ptest = element.xpath(".//text()[not(ancestor::a)]")
357
+ if text_len("".join(ptest)) <= 60: # 50
358
+ self.remove_node(element)
359
+ else:
360
+ self.remove_node(element)
361
+
362
+ HTML_CLEANER.kill_tags, HTML_CLEANER.remove_tags = cleaning_list, stripping_list
363
+ cleaned_tree = HTML_CLEANER.clean_html(self.prune_html(tree))
364
+
365
+ return cleaned_tree
366
+
367
+ def generate_unique_id(self, element):
368
+ idx = 0
369
+ for node in iter_node(element):
370
+ l_tag = node.tag.lower()
371
+ if l_tag not in ["html", "body"]:
372
+ node.attrib[Unique_ID] = str(idx)
373
+ idx += 1
374
+
375
+ def clean_unique_id(self, raw_element, content_html):
376
+ ids = re.findall(f' {Unique_ID}="(\d+)"', content_html)
377
+ self.drop_ids = list(set(self.drop_ids))
378
+ self.drop_ids.sort()
379
+ skip_ids = [-1]
380
+ for x in ids:
381
+ if int(x) > int(skip_ids[-1]):
382
+ skip_ids.append(int(x))
383
+ drop_node = raw_element.xpath(
384
+ f"//*[@{Unique_ID}='{x}']"
385
+ )
386
+ if drop_node:
387
+ new_div = Element("div")
388
+ for j in self.drop_ids:
389
+ if int(j) > int(skip_ids[-1]):
390
+ append_element = drop_node[0].xpath(
391
+ f".//*[@{Unique_ID}='{j}']"
392
+ )
393
+ if append_element:
394
+ skip_ids.append(j)
395
+ if len(append_element[0]) > 0:
396
+ skip_ids.extend(
397
+ [
398
+ int(pjid)
399
+ for pjid in append_element[0].xpath(
400
+ f".//*/@{Unique_ID}"
401
+ )
402
+ ]
403
+ )
404
+ append_element[0].tail = None
405
+ new_div.append(append_element[0])
406
+
407
+ try:
408
+ drop_node[0].addnext(new_div)
409
+ parent = drop_node[0].getparent()
410
+ if parent is not None:
411
+ parent.remove(drop_node[0])
412
+ except:
413
+ pass
414
+
415
+ content_html = re.sub(f' {Unique_ID}="\d+"', "", content_html)
416
+
417
+ drop_html = re.sub(
418
+ f' {Unique_ID}="\d+"',
419
+ "",
420
+ tostring(raw_element, encoding=str),
421
+ )
422
+ return content_html, drop_html
423
+
424
+ def math_latex_processing(self, node):
425
+ # 1. 文本中有\\begin{align} 或 \\begin{equation}
426
+ if node.tag not in ["script", "style"] and text_strip(node.text):
427
+ regex = r"\\begin{align}(.*?)\\end{align}"
428
+ text = node.text
429
+ matches = re.findall(regex, text, re.DOTALL)
430
+ if matches:
431
+ node.text = text.replace("\\begin{align}", "").replace(
432
+ "\\end{align}", ""
433
+ )
434
+
435
+ if node.tag not in ["script", "style"] and text_strip(node.text):
436
+ regex = r"\\begin{equation}(.*?)\\end{equation}"
437
+ text = node.text
438
+ matches = re.findall(regex, text, re.DOTALL)
439
+ for match in matches:
440
+ match = match.replace("\\begin{equation}", "")
441
+ match = match.replace("\\end{equation}", "")
442
+ wrapped_text = wrap_math(match, display=True)
443
+ text = text.replace(match, wrapped_text)
444
+ if matches:
445
+ # Remove the \begin{equation} and \end{equation} tags
446
+ text = text.replace("\\begin{equation}", "").replace(
447
+ "\\end{equation}", ""
448
+ )
449
+ node.text = text
450
+
451
+ if node.tag not in ["script", "style"] and text_strip(node.tail):
452
+ regex = r"\\begin{align}(.*?)\\end{align}"
453
+ text = node.tail
454
+ matches = re.findall(regex, text, re.DOTALL)
455
+ if matches:
456
+ node.tail = text.replace("\\begin{align}", "").replace(
457
+ "\\end{align}", ""
458
+ )
459
+
460
+ if node.tag not in ["script", "style"] and text_strip(node.tail):
461
+ regex = r"\\begin{equation}(.*?)\\end{equation}"
462
+ text = node.tail
463
+ matches = re.findall(regex, text, re.DOTALL)
464
+ for match in matches:
465
+ match = match.replace("\\begin{equation}", "")
466
+ match = match.replace("\\end{equation}", "")
467
+ wrapped_text = wrap_math(match, display=True)
468
+ text = text.replace(match, wrapped_text)
469
+ if matches:
470
+ # Remove the \begin{equation} and \end{equation} tags
471
+ text = text.replace("\\begin{equation}", "").replace(
472
+ "\\end{equation}", ""
473
+ )
474
+ node.tail = text
475
+
476
+ node_class = node.get("class")
477
+
478
+ parent = node.getparent()
479
+
480
+ # 2. class 为 texerror 的标签
481
+ # Find the text between {} (maximum length) and replace the texerror with that text
482
+
483
+ # 3. img中的latex
484
+ if node.tag == "img":
485
+ if node_class:
486
+ class_list = node_class.split(" ")
487
+ if any(
488
+ [img_class in class_list for img_class in latex_image_class_names]
489
+ ):
490
+ alt = node.get("alt")
491
+ if text_strip(alt):
492
+ new_span = Element("span")
493
+ wrapped_alt = wrap_math(alt)
494
+ new_span.text = wrapped_alt
495
+ node.addprevious(new_span)
496
+ self.remove_node(node)
497
+ src = node.get("src")
498
+ if src:
499
+ if "codecogs.com" in src:
500
+ try:
501
+ latex = src.split("?")[1:]
502
+ latex = "?".join(
503
+ latex
504
+ ) # In case there are multiple ? in the latex
505
+ latex = unquote(latex)
506
+ new_span = Element("span")
507
+ wrapped_latex = wrap_math(latex)
508
+ new_span.text = wrapped_latex
509
+ node.addprevious(new_span)
510
+ self.remove_node(node)
511
+ except:
512
+ pass
513
+ if "latex.php" in src:
514
+ try:
515
+ # they usually have "alt='-i u_t + &#92;Delta u = |u|^2 u'"
516
+ alt = node.get("alt")
517
+ if text_strip(alt):
518
+ # Unescape the latex
519
+ alt = unquote(alt)
520
+ # Get the latex
521
+ wrapped_alt = wrap_math(alt)
522
+ new_span = Element("span")
523
+ new_span.text = wrapped_alt
524
+ node.addprevious(new_span)
525
+ self.remove_node(node)
526
+ except:
527
+ pass
528
+ if "/images/math/codecogs" in src:
529
+ try:
530
+ # they usually have "alt='-i u_t + &#92;Delta u = |u|^2 u'"
531
+ alt = node.get("alt")
532
+ if text_strip(alt):
533
+ # Unescape the latex
534
+ alt = unquote(alt)
535
+ # Get the latex
536
+ wrapped_alt = wrap_math(alt)
537
+ new_span = Element("span")
538
+ new_span.text = wrapped_alt
539
+ node.addprevious(new_span)
540
+ self.remove_node(node)
541
+ except:
542
+ pass
543
+ if "mimetex.cgi" in src:
544
+ try:
545
+ latex = src.split("?")[1:]
546
+ latex = "?".join(
547
+ latex
548
+ ) # In case there are multiple ? in the latex
549
+ latex = unquote(latex)
550
+ new_span = Element("span")
551
+ wrapped_latex = wrap_math(latex)
552
+ new_span.text = wrapped_latex
553
+ node.addprevious(new_span)
554
+ self.remove_node(node)
555
+ except:
556
+ pass
557
+ if "mathtex.cgi" in src:
558
+ try:
559
+ latex = src.split("?")[1:]
560
+ latex = "?".join(
561
+ latex
562
+ ) # In case there are multiple ? in the latex
563
+ latex = unquote(latex)
564
+ new_span = Element("span")
565
+ wrapped_latex = wrap_math(latex)
566
+ new_span.text = wrapped_latex
567
+ node.addprevious(new_span)
568
+ self.remove_node(node)
569
+ except:
570
+ pass
571
+ if node_class:
572
+ if "x-ck12" in node_class:
573
+ try:
574
+ latex = node.get("alt")
575
+ if text_strip(latex):
576
+ latex = unquote(latex)
577
+ new_span = Element("span")
578
+ wrapped_latex = wrap_math(latex)
579
+ new_span.text = wrapped_latex
580
+ node.addprevious(new_span)
581
+ except:
582
+ pass
583
+
584
+ # 4. class 为 math-container
585
+ if node_class == "math-container":
586
+ try:
587
+ text = node.text
588
+ if text_strip(text):
589
+ new_span = Element("span")
590
+ wrapped_math = wrap_math(text, display=True)
591
+ new_span.text = wrapped_math
592
+ if parent is not None:
593
+ if text_strip(node.tail):
594
+ new_span.tail = node.tail
595
+ parent.replace(node, new_span)
596
+ except:
597
+ pass
598
+
599
+ # 5. class 为 wp-katex-eq
600
+ if node_class == "wp-katex-eq":
601
+ try:
602
+ text = node.text
603
+ if text_strip(text):
604
+ new_span = Element("span")
605
+ display_attr = node.get("data-display")
606
+ if display_attr is not None:
607
+ display = display_attr == "true"
608
+ else:
609
+ display = False
610
+ wrapped_math = wrap_math(text, display=display)
611
+ new_span.text = wrapped_math
612
+ if parent is not None:
613
+ if text_strip(node.tail):
614
+ new_span.tail = node.tail
615
+ parent.replace(node, new_span)
616
+ except:
617
+ pass
618
+
619
+ # 6. script[type="math/tex"]
620
+ if node.tag == "script" and node.get("type") == "math/tex":
621
+ try:
622
+ text = node.text
623
+ if text_strip(text):
624
+ new_span = Element("span")
625
+ wrapped_text = wrap_math(text)
626
+ new_span.text = wrapped_text
627
+ if parent is not None:
628
+ if text_strip(node.tail):
629
+ new_span.tail = node.tail
630
+ parent.replace(node, new_span)
631
+ except:
632
+ pass
633
+
634
+ # 7. script[type="math/asciimath"]
635
+ if node.tag == "script" and node.get("type") == "math/asciimath":
636
+ try:
637
+ text = node.text
638
+ if text_strip(text):
639
+ new_span = Element("span")
640
+ wrapped_asciimath = wrap_math(extract_asciimath(text))
641
+ new_span.text = wrapped_asciimath
642
+ if parent is not None:
643
+ if text_strip(node.tail):
644
+ new_span.tail = node.tail
645
+ parent.replace(node, new_span)
646
+ except:
647
+ # Delete this script tag
648
+ self.remove_node(node)
649
+
650
+ # 8. class tex
651
+ if node_class == "tex":
652
+ try:
653
+ # Check if they have data-expr attr
654
+ expr = node.get("data-expr")
655
+ if text_strip(expr):
656
+ # Replace with a span
657
+ new_span = Element("span")
658
+ wrapped_expr = wrap_math(expr)
659
+ new_span.text = wrapped_expr
660
+ if parent is not None:
661
+ if text_strip(node.tail):
662
+ new_span.tail = node.tail
663
+ parent.replace(node, new_span)
664
+ except:
665
+ pass
666
+
667
+ # 9. span.katex
668
+ if node.tag == "span" and node_class == "katex":
669
+ # Find any spans with class "katex-html" and remove them
670
+ katex_html_spans = node.xpath('.//span[@class="katex-html"]')
671
+ for katex_html_span in katex_html_spans:
672
+ self.remove_node(katex_html_span)
673
+
674
+ # 10. Remove any .MathJax_Preview spans
675
+ if node.tag == "span" and node_class == "MathJax_Preview":
676
+ self.remove_node(node)
677
+
678
+ if node.tag == "span" and node_class and "x-ck12-mathEditor" in node_class:
679
+ try:
680
+ expr = node.get("data-tex")
681
+ if text_strip(expr):
682
+ expr = unquote(expr).replace("\&quot;", "").replace("&quot;", "")
683
+ # Replace with a span
684
+ new_span = Element("span")
685
+ wrapped_expr = wrap_math(expr)
686
+ new_span.text = wrapped_expr
687
+ if parent is not None:
688
+ if text_strip(node.tail):
689
+ new_span.tail = node.tail
690
+ parent.replace(node, new_span)
691
+ except:
692
+ pass
693
+
694
+ # 11. all math tags
695
+ if node.tag == "math":
696
+ annotation_tags = node.xpath('.//annotation[@encoding="application/x-tex"]')
697
+ if len(annotation_tags) > 0:
698
+ annotation_tag = annotation_tags[0]
699
+ text = annotation_tag.text
700
+ if text_strip(text):
701
+ new_span = Element("span")
702
+ wrapped_text = wrap_math(text)
703
+ new_span.text = wrapped_text
704
+ if parent is not None:
705
+ if text_strip(node.tail):
706
+ new_span.tail = node.tail
707
+ parent.replace(node, new_span)
708
+ style_value = parent.get("style")
709
+ if style_value:
710
+ normalized_style_value = (
711
+ style_value.lower()
712
+ .strip()
713
+ .replace(" ", "")
714
+ .replace(";", "")
715
+ )
716
+ if "display:none" in normalized_style_value:
717
+ parent.style = ""
718
+ elif text_strip(node.get("alttext")):
719
+ # Get the alttext attribute
720
+ alttext = node.get("alttext")
721
+ if text_strip(alttext):
722
+ new_span = Element("span")
723
+ wrapped_alttext = wrap_math(alttext)
724
+ new_span.text = wrapped_alttext
725
+ if parent is not None:
726
+ if text_strip(node.tail):
727
+ new_span.tail = node.tail
728
+ parent.replace(node, new_span)
729
+ else:
730
+ try:
731
+ # Try translating to LaTeX
732
+ tmp_node = deepcopy(node)
733
+ tmp_node.tail = None
734
+ mathml = tostring(tmp_node, encoding=str)
735
+ # If this includes xmlns:mml, then we need to replace all
736
+ # instances of mml: with nothing
737
+ if "xmlns:mml" in mathml:
738
+ mathml = mathml.replace("mml:", "")
739
+ # replace xmlns:mml="..." with nothing
740
+ mathml = re.sub(r'xmlns:mml=".*?"', "", mathml)
741
+ # if 'xmlns=' in mathml:
742
+ # mathml = re.sub(r"xmlns='.*?'", '', mathml)
743
+ latex = mml_to_latex(mathml)
744
+ # Make a new span tag
745
+ new_span = Element("span")
746
+ # Set the html of the new span tag to the text
747
+ wrapped_latex = wrap_math(latex)
748
+ new_span.text = wrapped_latex
749
+ # Then, we need to replace the math tag with the new span tag
750
+ if parent is not None:
751
+ if text_strip(node.tail):
752
+ new_span.tail = node.tail
753
+ parent.replace(node, new_span)
754
+ except:
755
+
756
+ self.remove_node(node)
757
+
758
+ if node.tag == "mathjax":
759
+ try:
760
+ # Get the inner text of the mathjax tag
761
+ text = node.text
762
+ if text_strip(text):
763
+ text = html.unescape(text)
764
+ # Use regex to find text wrapped in hashes
765
+ matches = re.findall(r"#(.+?)#", text)
766
+ # For each match, replace the match with the LaTeX
767
+ for match in matches:
768
+ try:
769
+ latex = extract_asciimath(match)
770
+ # Replace the match with the LaTeX
771
+ text = text.replace(f"#{match}#", latex)
772
+ except:
773
+
774
+ pass
775
+ # Create a new span tag
776
+ new_span = Element("span")
777
+ # Set the html of the new span tag to the text
778
+ new_span.text = text
779
+ # Then, we need to replace the mathjax tag with the new span tag
780
+ if parent is not None:
781
+ if text_strip(node.tail):
782
+ new_span.tail = node.tail
783
+ parent.replace(node, new_span)
784
+ except:
785
+ pass
786
+
787
+ def convert_tags(self, element, base_url=""):
788
+ USELESS_ATTR_LIST = USELESS_ATTR
789
+ if not self.need_comment:
790
+ USELESS_ATTR_LIST = USELESS_ATTR_LIST + ["comment"]
791
+ for node in iter_node(element):
792
+
793
+ if self.process_math:
794
+ # 增加数学标签转换
795
+ self.math_latex_processing(node)
796
+
797
+ if "data-src" in node.attrib and "src" not in node.attrib:
798
+ node.attrib["src"] = node.attrib["data-src"]
799
+ if "src" in node.attrib and node.attrib["src"] and base_url:
800
+ src_url = node.attrib["src"]
801
+ absolute_url = urljoin(base_url, src_url)
802
+ node.attrib["src"] = absolute_url
803
+
804
+ if node.tag.lower() == "div" and not node.getchildren():
805
+ node.tag = "p"
806
+
807
+ class_name = node.get("class")
808
+ if class_name:
809
+ if class_name.lower() in USELESS_ATTR_LIST:
810
+ self.remove_node(node)
811
+
812
+ return element
813
+
814
+ def delete_by_link_density(
815
+ self, subtree, tagname, backtracking=False, favor_precision=False
816
+ ):
817
+ need_del_par = []
818
+ skip_par = []
819
+ drop_list = False
820
+ for descendant in subtree.iter(tagname):
821
+ pparent = descendant.getparent()
822
+ if pparent in need_del_par or pparent in skip_par:
823
+ continue
824
+ siblings = descendant.xpath(f"following-sibling::{tagname}")
825
+
826
+ if 'list' in descendant.get("class", "") and len(descendant.xpath('./a')) >= 5:
827
+ need_del_par.append(descendant)
828
+ need_del_par.extend(siblings)
829
+ continue
830
+
831
+ nn = [descendant]
832
+ nn.extend(siblings)
833
+ txt_max_num = 0
834
+ if len(siblings) + 1 >= 4:
835
+ pass
836
+ else:
837
+ txt_max_dict = {
838
+ "read": 0,
839
+ "more": 0,
840
+ "...": 0,
841
+ "阅读": 0,
842
+ "更多": 0,
843
+ "详细": 0,
844
+ "detail": 0,
845
+ "article": 0,
846
+ "blog": 0,
847
+ "news": 0,
848
+ }
849
+ if tagname == "div" or tagname == "article" or tagname == "section":
850
+ for j in nn:
851
+ txt = "".join(j.xpath(".//text()")).strip()
852
+ for x in [
853
+ "read",
854
+ "more",
855
+ "...",
856
+ "阅读",
857
+ "更多",
858
+ "详细",
859
+ "detail",
860
+ "article",
861
+ "blog",
862
+ "news",
863
+ ]:
864
+ if txt.lower().endswith(x):
865
+ txt_max_dict[x] += 1
866
+ txt_num = max(txt_max_dict.values())
867
+ if txt_max_num < txt_num:
868
+ txt_max_num = txt_num
869
+ if txt_max_num >= 3:
870
+ break
871
+ if txt_max_num >= 3:
872
+ pass
873
+ else:
874
+ continue
875
+ skip_par.append(pparent)
876
+ a_num = 0
877
+ for j in siblings:
878
+ if j.xpath(".//a"):
879
+ if tagname == "p":
880
+ if density_of_a_text(j, pre=0.8):
881
+ a_num += 1
882
+ elif tagname in ["div", "section", "article"]:
883
+ if density_of_a_text(j, pre=0.2):
884
+ a_num += 1
885
+ else:
886
+ if self.need_comment:
887
+ # 增加判断是否包含评论 再决定是否删除
888
+ break_flg = False
889
+ for c_xpath in Forum_XPATH[:-1]:
890
+ if j.xpath(c_xpath.replace(".//*", "self::*")):
891
+ break_flg = True
892
+ break
893
+ if break_flg:
894
+ continue
895
+ if tagname == "li":
896
+ if text_len("".join(j.xpath(".//text()[not(ancestor::a)]"))) > 50:
897
+ continue
898
+ a_num += 1
899
+
900
+ if a_num < len(siblings):
901
+ if a_num >= 15 and (
902
+ tagname == "div" or tagname == "article" or tagname == "section"
903
+ ):
904
+ pass
905
+ else:
906
+ continue
907
+
908
+ similarity_with_siblings_nums = similarity_with_siblings(
909
+ descendant, siblings
910
+ )
911
+ if tagname == "article" or tagname == "item": # or tagname == "section"
912
+ similarity_with_siblings_nums = similarity_with_siblings_nums * 1.5
913
+ # 列表有个很特殊的地方 另一种情况就是 descendant和siblings 都包含title/h1 | h2 标签
914
+ if tagname == "div" or tagname == "article" or tagname == "section":
915
+ title_max_num = 0
916
+ for ll in [".//head[@rend='h2']", ".//head[@rend='h1']", "./article"]:
917
+ title_num = 0
918
+ for jj in nn:
919
+ if jj.xpath(ll):
920
+ title_num += 1
921
+ if title_max_num < title_num:
922
+ title_max_num = title_num
923
+ if title_max_num >= 4:
924
+ similarity_with_siblings_nums = similarity_with_siblings_nums * 1.5
925
+
926
+ if txt_max_num >= 3:
927
+ pass
928
+ elif similarity_with_siblings_nums < 0.84:
929
+ if len(siblings) >= 15 and (
930
+ tagname == "div" or tagname == "article" or tagname == "section"
931
+ ):
932
+ pass
933
+ else:
934
+ continue
935
+ # 父div中包含多同级div 且div class post-时,删除其余节点,保留第一篇文章
936
+ class_attr = descendant.get("class") if descendant.get("class") else ""
937
+ if (
938
+ re.findall("post-", class_attr, re.I)
939
+ or re.findall("-post", class_attr, re.I)
940
+ or re.findall("blog|aricle", class_attr, re.I)
941
+ ):
942
+ drop_list = True
943
+ sk_flg = True
944
+ for dl in siblings:
945
+ if (
946
+ text_len("".join(descendant.xpath(".//text()"))) * 2
947
+ < text_len("".join(dl.xpath(".//text()")))
948
+ and sk_flg
949
+ ):
950
+ self.remove_node(descendant)
951
+ sk_flg = False
952
+ else:
953
+ self.remove_node(dl)
954
+ else:
955
+ need_del_par.append(descendant)
956
+ need_del_par.extend(siblings)
957
+ for node in need_del_par:
958
+ drop_list = True
959
+ try:
960
+ self.remove_node(node)
961
+ except Exception as e:
962
+ pass
963
+
964
+ myelems, deletions = defaultdict(list), []
965
+
966
+ if tagname == "div":
967
+ for elem in subtree.iter(tagname):
968
+ if density_of_a_text(elem, pre=0.8) and img_div_check(elem):
969
+ deletions.append(elem)
970
+
971
+ for elem in subtree.iter(tagname):
972
+ elemtext = trim(elem.text_content())
973
+ result, templist = link_density_test(elem, elemtext, favor_precision)
974
+ if result is True and img_div_check(elem):
975
+ # 保留table中的链接
976
+ if tagname in ['ul', 'li', 'div', 'p'] and ancestor_node_check(elem, ['td']):
977
+ continue
978
+ deletions.append(elem)
979
+ elif backtracking is True and len(templist) > 0: # if?
980
+ myelems[elemtext].append(elem)
981
+ if backtracking is True:
982
+ if favor_precision is False:
983
+ threshold = 100
984
+ else:
985
+ threshold = 200
986
+ for text, elem in myelems.items():
987
+ if 0 < len(text) < threshold and len(elem) >= 3:
988
+ deletions.extend(elem)
989
+
990
+ for elem in uniquify_list(deletions):
991
+ try:
992
+ if self.need_comment:
993
+ # 增加判断是否包含评论 再决定是否删除
994
+ break_flg = False
995
+ for c_xpath in Forum_XPATH[:-1]:
996
+ if elem.xpath(c_xpath):
997
+ break_flg = True
998
+ break
999
+ if break_flg:
1000
+ continue
1001
+ self.remove_node(elem)
1002
+ except AttributeError:
1003
+ pass
1004
+ return subtree, drop_list
1005
+
1006
+ def prune_unwanted_sections(self, tree):
1007
+ tmp_OVERALL_DISCARD_XPATH = OVERALL_DISCARD_XPATH
1008
+ if self.need_comment:
1009
+ tmp_OVERALL_DISCARD_XPATH = tmp_OVERALL_DISCARD_XPATH[:-1]
1010
+ tree = self.prune_unwanted_nodes(
1011
+ tree, tmp_OVERALL_DISCARD_XPATH, with_backup=True
1012
+ )
1013
+ for xp_list in [
1014
+ PAYWALL_DISCARD_XPATH,
1015
+ TEASER_DISCARD_XPATH,
1016
+ DISCARD_IMAGE_ELEMENTS,
1017
+ ]:
1018
+ tree = self.prune_unwanted_nodes(tree, xp_list)
1019
+ # remove elements by link density
1020
+ tree, drop_list_1 = self.delete_by_link_density(
1021
+ tree, "div", backtracking=True, favor_precision=False
1022
+ )
1023
+ tree, drop_list_1_1 = self.delete_by_link_density(
1024
+ tree, "article", backtracking=False, favor_precision=False
1025
+ )
1026
+ tree, drop_list_1_2 = self.delete_by_link_density(
1027
+ tree, "section", backtracking=False, favor_precision=False
1028
+ )
1029
+ tree, drop_list_2_1 = self.delete_by_link_density(
1030
+ tree, "ul", backtracking=False, favor_precision=False
1031
+ )
1032
+ tree, drop_list_2_2 = self.delete_by_link_density(
1033
+ tree, "li", backtracking=False, favor_precision=False
1034
+ )
1035
+ tree, drop_list_3_1 = self.delete_by_link_density(
1036
+ tree, "dl", backtracking=False, favor_precision=False
1037
+ )
1038
+ tree, drop_list_3_3 = self.delete_by_link_density(
1039
+ tree, "dt", backtracking=False, favor_precision=False
1040
+ )
1041
+ tree, drop_list_3_2 = self.delete_by_link_density(
1042
+ tree, "dd", backtracking=False, favor_precision=False
1043
+ )
1044
+ tree, drop_list_3 = self.delete_by_link_density(
1045
+ tree, "p", backtracking=False, favor_precision=False
1046
+ )
1047
+
1048
+ return (
1049
+ tree,
1050
+ drop_list_1
1051
+ or drop_list_2_1
1052
+ or drop_list_2_2
1053
+ or drop_list_3
1054
+ or drop_list_1_1
1055
+ or drop_list_1_2
1056
+ or drop_list_3_1
1057
+ or drop_list_3_2
1058
+ or drop_list_3_3,
1059
+ )
ultradata_math_parser/parsers/custom_parser.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ import re
3
+
4
+ from ultradata_math_parser.utils import *
5
+ from ultradata_math_parser.parsers.base_parser import BaseParser
6
+ from ultradata_math_parser.parsers.title_parser import TitleParser
7
+
8
+
9
+ class CustomParser(BaseParser):
10
+ def __init__(self) -> None:
11
+ super().__init__()
12
+
13
+ def use_clean_rule(self, tree, clean_rules):
14
+ for clean_rule in clean_rules:
15
+ for x in tree.xpath(clean_rule):
16
+ self.remove_node(x)
17
+ return tree
18
+
19
+ def use_extract_rule(self, tree, extract_rule):
20
+ if "/text()" in extract_rule["value"]:
21
+ return "".join(tree.xpath(extract_rule["value"])).strip()
22
+ return tree.xpath(extract_rule["value"])[0]
23
+
24
+ def extract(self, html="", base_url="", rule={}, **kwargs) -> dict:
25
+ self.include_images = kwargs.get("include_images", False)
26
+ tree = load_html(html)
27
+ if tree is None:
28
+ raise ValueError
29
+
30
+ # base_url
31
+ base_href = tree.xpath("//base/@href")
32
+
33
+ if base_href and "http" in base_href[0]:
34
+ base_url = base_href[0]
35
+
36
+ if "clean" in rule:
37
+ tree = self.use_clean_rule(tree, rule["clean"])
38
+
39
+ # 获取title
40
+ if "title" not in rule:
41
+ title = TitleParser().process(tree)
42
+ else:
43
+ title = self.use_extract_rule(tree, rule["title"])
44
+
45
+ # 文章区域
46
+ try:
47
+ body_tree = self.use_extract_rule(tree, rule["content"])
48
+ except:
49
+ raise ValueError
50
+ if not self.include_images:
51
+ self._remove_images_from_tree(body_tree)
52
+ body_html = tostring(body_tree, encoding=str)
53
+ body_html = self._strip_images_from_html(body_html)
54
+
55
+ text_length = self._text_length_from_html(body_html)
56
+
57
+ return {
58
+ "xp_num": "custom",
59
+ "drop_list": False,
60
+ "html": body_html,
61
+ "title": title,
62
+ "base_url": base_url,
63
+ "text_length": text_length,
64
+ }
ultradata_math_parser/parsers/forum_parser.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ import re
3
+
4
+ from ultradata_math_parser.config import Forum_XPATH, Unique_ID
5
+ from ultradata_math_parser.utils import *
6
+ from ultradata_math_parser.parsers.base_parser import BaseParser
7
+ from ultradata_math_parser.parsers.title_parser import TitleParser
8
+
9
+
10
+ class ForumParser(BaseParser):
11
+ def __init__(self) -> None:
12
+ super().__init__()
13
+
14
+ def extract(self, html="", base_url="", **kwargs) -> dict:
15
+ self.preserve_math_containers = kwargs.get("preserve_math_containers", self.preserve_math_containers)
16
+ self.process_math = kwargs.get("process_math", self.process_math)
17
+ self.include_tables = kwargs.get("include_tables", self.include_tables)
18
+ self.include_images = kwargs.get("include_images", self.include_images)
19
+ self.need_comment = True
20
+ html = html.replace("&nbsp;", " ").replace("&#160;", " ")
21
+ tree = load_html(html)
22
+ if tree is None:
23
+ raise ValueError
24
+
25
+ # 获取title
26
+ title = TitleParser().process(tree)
27
+
28
+ # base_url
29
+ base_href = tree.xpath("//base/@href")
30
+
31
+ if base_href and "http" in base_href[0]:
32
+ base_url = base_href[0]
33
+ self.generate_unique_id(tree)
34
+
35
+ format_tree = self.convert_tags(tree, base_url=base_url)
36
+ format_tree = self._remove_tables_from_tree(format_tree)
37
+ format_tree = self._remove_images_from_tree(format_tree)
38
+
39
+ normal_tree = self.clean_tags(format_tree)
40
+ normal_tree = self._remove_tables_from_tree(normal_tree)
41
+ normal_tree = self._remove_images_from_tree(normal_tree)
42
+
43
+ subtree, xp_num, drop_list = self.xp_1_5(normal_tree)
44
+ if xp_num == "others":
45
+ subtree, drop_list = self.prune_unwanted_sections(normal_tree)
46
+ body_html = self.get_content_html(subtree, xp_num, base_url)
47
+ body_html = self._strip_tables_from_html(body_html)
48
+ body_html = self._strip_images_from_html(body_html)
49
+
50
+ # 论坛等独有
51
+ body_html_tree = fromstring(body_html)
52
+ try:
53
+ body_tree = body_html_tree.body
54
+ except:
55
+ body_tree = Element("body")
56
+ body_tree.extend(body_html_tree)
57
+ main_ids = body_tree.xpath(f".//@{Unique_ID}")
58
+
59
+ for main_id in main_ids:
60
+ main_tree = normal_tree.xpath(
61
+ f".//*[@{Unique_ID}={main_id}]"
62
+ )
63
+ if main_tree:
64
+ self.remove_node(main_tree[0])
65
+ if not main_ids:
66
+ main_ids = [-1]
67
+
68
+ if xp_num != "others":
69
+ normal_tree, _ = self.prune_unwanted_sections(normal_tree)
70
+ for c_xpath in Forum_XPATH:
71
+ while normal_tree.xpath(c_xpath):
72
+ x = normal_tree.xpath(c_xpath)[0]
73
+ self.remove_node(x)
74
+ if "'post-'" in c_xpath:
75
+ if not (re.findall('post-\d+', x.attrib.get("id", "").lower()) or re.findall('post_\d+',
76
+ x.attrib.get("id",
77
+ "").lower())):
78
+ continue
79
+ if (
80
+ "header" in x.attrib.get("class", "").lower()
81
+ or "header" in x.attrib.get("id", "").lower()
82
+ ):
83
+ continue
84
+ try:
85
+ if int(x.attrib.get(Unique_ID, "0")) > int(
86
+ main_ids[-1]
87
+ ):
88
+ body_tree.append(x)
89
+ else:
90
+ prefix_div = Element("div")
91
+ suffix_div = Element("div")
92
+ need_prefix = False
93
+ need_suffix = False
94
+ while x.xpath(
95
+ f".//*[number(@{Unique_ID}) > {int(main_ids[-1])}]"
96
+ ):
97
+ tmp_x = x.xpath(
98
+ f".//*[number(@{Unique_ID}) > {int(main_ids[-1])}]"
99
+ )[0]
100
+ self.remove_node(tmp_x)
101
+ suffix_div.append(tmp_x)
102
+ need_suffix = True
103
+ while x.xpath(
104
+ f".//*[number(@{Unique_ID}) < {int(main_ids[-1])}]"
105
+ ):
106
+ tmp_x = x.xpath(
107
+ f".//*[number(@{Unique_ID}) < {int(main_ids[-1])}]"
108
+ )[0]
109
+ self.remove_node(tmp_x)
110
+ prefix_div.append(tmp_x)
111
+ need_prefix = True
112
+ if need_prefix:
113
+ body_tree.insert(0, prefix_div)
114
+ if need_suffix:
115
+ body_tree.append(suffix_div)
116
+
117
+ except:
118
+ pass
119
+
120
+ body_html = re.sub(
121
+ f' {Unique_ID}="\d+"',
122
+ "",
123
+ tostring(body_tree, encoding=str),
124
+ )
125
+
126
+ text_length = self._text_length_from_html(body_html)
127
+
128
+ return {
129
+ "xp_num": xp_num,
130
+ "drop_list": drop_list,
131
+ "html": body_html,
132
+ "title": title,
133
+ "base_url": base_url,
134
+ "text_length": text_length,
135
+ }
ultradata_math_parser/parsers/title_parser.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ from ultradata_math_parser.utils import *
4
+ from ultradata_math_parser.config import *
5
+
6
+
7
+ class TitleParser:
8
+ def extract_by_meta(self, element: HtmlElement):
9
+ for xpath in METAS:
10
+ title = element.xpath(xpath)
11
+ if title:
12
+ return "".join(title)
13
+
14
+ def extract_by_title(self, element: HtmlElement):
15
+ return "".join(element.xpath("//title//text()")).strip()
16
+
17
+ def extract_by_hs(self, element: HtmlElement):
18
+ hs = element.xpath("//h1//text()|//h2//text()|//h3//text()")
19
+ return hs or []
20
+
21
+ def extract_by_h(self, element: HtmlElement):
22
+ for xpath in ["//h1", "//h2", "//h3"]:
23
+ children = element.xpath(xpath)
24
+ if not children:
25
+ continue
26
+ child = children[0]
27
+ texts = child.xpath("./text()")
28
+ if texts and len(texts):
29
+ return texts[0].strip()
30
+
31
+ def process(self, element: HtmlElement):
32
+ title_extracted_by_meta = self.extract_by_meta(element)
33
+ if title_extracted_by_meta:
34
+ return title_extracted_by_meta
35
+ title_extracted_by_h = self.extract_by_h(element)
36
+ title_extracted_by_hs = self.extract_by_hs(element)
37
+ title_extracted_by_title = self.extract_by_title(element)
38
+ title_extracted_by_hs = sorted(
39
+ title_extracted_by_hs,
40
+ key=lambda x: similarity2(x, title_extracted_by_title),
41
+ reverse=True,
42
+ )
43
+ if title_extracted_by_hs:
44
+ return lcs_of_2(title_extracted_by_hs[0], title_extracted_by_title)
45
+
46
+ if title_extracted_by_title:
47
+ return title_extracted_by_title
48
+
49
+ return title_extracted_by_h
ultradata_math_parser/parsers/unified_parser.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+ import re
3
+ from copy import deepcopy
4
+
5
+ from lxml.html import Element, tostring, fromstring
6
+
7
+ from ultradata_math_parser.config import Forum_XPATH, Unique_ID
8
+ from ultradata_math_parser.utils import load_html, text_len
9
+ from ultradata_math_parser.parsers.base_parser import BaseParser
10
+ from ultradata_math_parser.parsers.title_parser import TitleParser
11
+
12
+
13
+ class UnifiedParser(BaseParser):
14
+ def __init__(self):
15
+ super().__init__()
16
+ self.need_comment = True
17
+ self.enable_forum_assembly = True
18
+ self.forum_assembly_min_gain = 1.1
19
+
20
+ def extract(self, html="", **kwargs) -> dict:
21
+ base_url = kwargs.get("base_url", "")
22
+ self.process_math = kwargs.get("process_math", self.process_math)
23
+ self.preserve_math_containers = kwargs.get("preserve_math_containers", self.preserve_math_containers)
24
+ self.include_tables = kwargs.get("include_tables", self.include_tables)
25
+ self.include_images = kwargs.get("include_images", self.include_images)
26
+ self.enable_forum_assembly = kwargs.get("enable_forum_assembly", self.enable_forum_assembly)
27
+ self.fallback_min_length = kwargs.get("fallback_min_length", self.fallback_min_length)
28
+
29
+ html = html.replace("&nbsp;", " ").replace("&#160;", " ")
30
+ tree = load_html(html)
31
+ if tree is None:
32
+ raise ValueError
33
+
34
+ title = TitleParser().process(tree)
35
+
36
+ raw_tree = deepcopy(tree)
37
+
38
+ # base_url
39
+ base_href = tree.xpath("//base/@href")
40
+ if base_href and "http" in base_href[0]:
41
+ base_url = base_href[0]
42
+
43
+ self.generate_unique_id(tree)
44
+
45
+ # 标签转换
46
+ format_tree = self.convert_tags(tree, base_url=base_url)
47
+ format_tree = self._remove_tables_from_tree(format_tree)
48
+ format_tree = self._remove_images_from_tree(format_tree)
49
+
50
+ normal_tree = self.clean_tags(format_tree)
51
+ normal_tree = self._remove_tables_from_tree(normal_tree)
52
+ normal_tree = self._remove_images_from_tree(normal_tree)
53
+
54
+ fallback_tree = deepcopy(normal_tree)
55
+
56
+ # 主体提取
57
+ subtree, xp_num, drop_list = self.xp_1_5(normal_tree)
58
+ if xp_num == "others":
59
+ subtree, drop_list = self.prune_unwanted_sections(normal_tree)
60
+
61
+ body_html = self.get_content_html(subtree, xp_num, base_url)
62
+
63
+ # 论坛帖子拼装
64
+ forum_assembled = False
65
+ if self.enable_forum_assembly:
66
+ if xp_num != "others":
67
+ normal_tree, _ = self.prune_unwanted_sections(normal_tree)
68
+
69
+ original_length = self._text_length_from_html(body_html)
70
+ assembled_html = self._try_forum_assembly(normal_tree, body_html)
71
+ assembled_length = self._text_length_from_html(assembled_html)
72
+
73
+ if assembled_length >= original_length * self.forum_assembly_min_gain:
74
+ body_html = assembled_html
75
+ forum_assembled = True
76
+
77
+ # 条件兜底
78
+ current_length = self._text_length_from_html(body_html)
79
+ fallback_strategy = "primary"
80
+
81
+ if current_length < self.fallback_min_length:
82
+ body_html, fallback_strategy = self.apply_fallbacks(
83
+ primary_html=body_html,
84
+ base_url=base_url,
85
+ normal_tree=fallback_tree,
86
+ raw_tree=raw_tree,
87
+ )
88
+
89
+ body_html = self._strip_tables_from_html(body_html)
90
+ body_html = self._strip_images_from_html(body_html)
91
+
92
+ text_length = self._text_length_from_html(body_html)
93
+
94
+ return {
95
+ "xp_num": xp_num,
96
+ "drop_list": drop_list,
97
+ "html": body_html,
98
+ "title": title,
99
+ "base_url": base_url,
100
+ "fallback_strategy": fallback_strategy,
101
+ "text_length": text_length,
102
+ "forum_assembled": forum_assembled,
103
+ }
104
+
105
+ def _try_forum_assembly(self, normal_tree, body_html):
106
+ if not body_html:
107
+ return body_html
108
+
109
+ try:
110
+ body_html_tree = fromstring(body_html)
111
+ except Exception:
112
+ return body_html
113
+
114
+ try:
115
+ body_tree = body_html_tree.body
116
+ except:
117
+ body_tree = Element("body")
118
+ body_tree.extend(body_html_tree)
119
+
120
+ main_ids = body_tree.xpath(f".//@{Unique_ID}")
121
+
122
+ for main_id in main_ids:
123
+ main_tree = normal_tree.xpath(f".//*[@{Unique_ID}={main_id}]")
124
+ if main_tree:
125
+ try:
126
+ self.remove_node(main_tree[0])
127
+ except:
128
+ pass
129
+
130
+ if not main_ids:
131
+ main_ids = [-1]
132
+
133
+ for c_xpath in Forum_XPATH:
134
+ while True:
135
+ matches = normal_tree.xpath(c_xpath)
136
+ if not matches:
137
+ break
138
+
139
+ x = matches[0]
140
+ self.remove_node(x)
141
+
142
+ if "'post-'" in c_xpath or "'post_'" in c_xpath:
143
+ elem_id = x.attrib.get("id", "").lower()
144
+ if not (re.search(r'post-\d+', elem_id) or re.search(r'post_\d+', elem_id)):
145
+ continue
146
+
147
+ if "header" in x.attrib.get("class", "").lower() or "header" in x.attrib.get("id", "").lower():
148
+ continue
149
+
150
+ try:
151
+ node_id = int(x.attrib.get(Unique_ID, "0"))
152
+ last_main_id = int(main_ids[-1]) if main_ids else -1
153
+
154
+ if node_id > last_main_id:
155
+ body_tree.append(x)
156
+ else:
157
+ prefix_div = Element("div")
158
+ suffix_div = Element("div")
159
+ need_prefix = False
160
+ need_suffix = False
161
+
162
+ while x.xpath(f".//*[number(@{Unique_ID}) > {last_main_id}]"):
163
+ tmp_x = x.xpath(f".//*[number(@{Unique_ID}) > {last_main_id}]")[0]
164
+ self.remove_node(tmp_x)
165
+ suffix_div.append(tmp_x)
166
+ need_suffix = True
167
+
168
+ while x.xpath(f".//*[number(@{Unique_ID}) < {last_main_id}]"):
169
+ tmp_x = x.xpath(f".//*[number(@{Unique_ID}) < {last_main_id}]")[0]
170
+ self.remove_node(tmp_x)
171
+ prefix_div.append(tmp_x)
172
+ need_prefix = True
173
+
174
+ if need_prefix:
175
+ body_tree.insert(0, prefix_div)
176
+ if need_suffix:
177
+ body_tree.append(suffix_div)
178
+ except Exception:
179
+ pass
180
+
181
+ result_html = re.sub(
182
+ f' {Unique_ID}="\d+"',
183
+ "",
184
+ tostring(body_tree, encoding=str),
185
+ )
186
+
187
+ return result_html
ultradata_math_parser/readability_plus.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ from lxml.etree import tounicode
4
+ from lxml.html import document_fromstring, fragment_fromstring
5
+
6
+ from ultradata_math_parser.utils import *
7
+
8
+
9
+ def to_int(x):
10
+ if not x:
11
+ return None
12
+ x = x.strip()
13
+ if x.endswith("px"):
14
+ return int(x[:-2])
15
+ if x.endswith("em"):
16
+ return int(x[:-2]) * 12
17
+ return int(x)
18
+
19
+
20
+ def clean(text):
21
+ text = re.sub(r"\s{255,}", " " * 255, text)
22
+ text = re.sub(r"\s*\n\s*", "\n", text)
23
+ text = re.sub(r"\t|[ \t]{2,}", " ", text)
24
+ return text.strip()
25
+
26
+
27
+ def text_length(i):
28
+ return len(clean(i.text_content() or ""))
29
+
30
+
31
+ bad_attrs = ["width", "height", "style", "[-a-z]*color", "background[-a-z]*", "on*"]
32
+ single_quoted = "'[^']+'"
33
+ double_quoted = '"[^"]+"'
34
+ non_space = "[^ \"'>]+"
35
+ htmlstrip = re.compile(
36
+ "<" # open
37
+ "([^>]+) " # prefix
38
+ "(?:%s) *" % ("|".join(bad_attrs),)
39
+ + "= *(?:%s|%s|%s)" # undesirable attributes
40
+ % (non_space, single_quoted, double_quoted)
41
+ + "([^>]*)" # value # postfix
42
+ ">", # end
43
+ re.I,
44
+ )
45
+
46
+
47
+ def clean_attributes(html):
48
+ while htmlstrip.search(html):
49
+ html = htmlstrip.sub("<\\1\\2>", html)
50
+ return html
51
+
52
+
53
+ class Document:
54
+ def __init__(
55
+ self,
56
+ input,
57
+ url=None,
58
+ min_text_length=25,
59
+ retry_length=250,
60
+ xpath=False,
61
+ handle_failures="discard",
62
+ xp_num="others",
63
+ need_comment=False,
64
+ ):
65
+ self.input = input
66
+ self.html = None
67
+ self.encoding = None
68
+ self.positive_keywords = None
69
+ self.negative_keywords = None
70
+ self.url = url
71
+ self.min_text_length = min_text_length
72
+ self.retry_length = retry_length
73
+ self.xpath = xpath
74
+ self.handle_failures = handle_failures
75
+ self.xp_num = xp_num
76
+ self.need_comment = need_comment
77
+ if not need_comment:
78
+ self.REGEXES = {
79
+ "unlikelyCandidatesRe": re.compile(
80
+ r"combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter",
81
+ re.I,
82
+ ),
83
+ "okMaybeItsACandidateRe": re.compile(
84
+ r"and|article|body|column|main|shadow", re.I
85
+ ),
86
+ "positiveRe": re.compile(
87
+ r"article|body|content|entry|hentry|main|page|pagination|post|text|blog|story",
88
+ re.I,
89
+ ),
90
+ "negativeRe": re.compile(
91
+ r"combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget",
92
+ re.I,
93
+ ),
94
+ "divToPElementsRe": re.compile(
95
+ r"<(a|blockquote|dl|div|img|ol|p|pre|table|ul)", re.I
96
+ ),
97
+ "videoRe": re.compile(r"https?:\/\/(www\.)?(youtube|vimeo)\.com", re.I),
98
+ }
99
+ else:
100
+ self.REGEXES = {
101
+ "unlikelyCandidatesRe": re.compile(
102
+ r"combx|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter",
103
+ re.I,
104
+ ),
105
+ "okMaybeItsACandidateRe": re.compile(
106
+ r"and|article|body|column|main|shadow", re.I
107
+ ),
108
+ "positiveRe": re.compile(
109
+ r"article|body|content|entry|hentry|main|page|pagination|post|text|blog|story",
110
+ re.I,
111
+ ),
112
+ "negativeRe": re.compile(
113
+ r"combx|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget",
114
+ re.I,
115
+ ),
116
+ "divToPElementsRe": re.compile(
117
+ r"<(a|blockquote|dl|div|img|ol|p|pre|table|ul)", re.I
118
+ ),
119
+ "videoRe": re.compile(r"https?:\/\/(www\.)?(youtube|vimeo)\.com", re.I),
120
+ }
121
+
122
+ def _html(self, force=False):
123
+ if force or self.html is None:
124
+ self.html = self._parse(self.input)
125
+ if self.xpath:
126
+ root = self.html.getroottree()
127
+ for i in self.html.getiterator():
128
+ i.attrib["x"] = root.getpath(i)
129
+ return self.html
130
+
131
+ def _parse(self, input: HtmlElement):
132
+ doc = input
133
+ base_href = self.url
134
+ if base_href:
135
+ try:
136
+ doc.make_links_absolute(
137
+ base_href,
138
+ resolve_base_href=True,
139
+ handle_failures=self.handle_failures,
140
+ )
141
+ except TypeError:
142
+ doc.make_links_absolute(
143
+ base_href,
144
+ resolve_base_href=True,
145
+ handle_failures=self.handle_failures,
146
+ )
147
+ else:
148
+ doc.resolve_base_href(handle_failures=self.handle_failures)
149
+ return doc
150
+
151
+ def summary(self, html_partial=False):
152
+ try:
153
+ ruthless = True
154
+ while True:
155
+ self._html(True)
156
+ for i in self.tags(self.html, "body"):
157
+ i.set("id", "readabilityplusBody")
158
+ if ruthless and self.xp_num == "others":
159
+ self.remove_unlikely_candidates()
160
+ self.transform_misused_divs_into_paragraphs()
161
+ if self.xp_num == "others":
162
+ candidates = self.score_paragraphs()
163
+ best_candidate = self.select_best_candidate(candidates)
164
+ else:
165
+ best_candidate = None
166
+ ruthless = False
167
+ candidates = {}
168
+ if best_candidate:
169
+ article = self.get_article(
170
+ candidates, best_candidate, html_partial=html_partial
171
+ )
172
+ else:
173
+ if ruthless:
174
+ ruthless = False
175
+ continue
176
+ else:
177
+ article = self.html.find("body")
178
+ if article is None:
179
+ article = self.html
180
+ cleaned_article = self.sanitize(article, candidates)
181
+
182
+ article_length = len(cleaned_article or "")
183
+ retry_length = self.retry_length
184
+ of_acceptable_length = article_length >= retry_length
185
+ if ruthless and not of_acceptable_length:
186
+ ruthless = False
187
+ continue
188
+ else:
189
+ return cleaned_article
190
+ except Exception as e:
191
+ return None
192
+
193
+ def get_article(self, candidates, best_candidate, html_partial=False):
194
+ sibling_score_threshold = max([10, best_candidate["content_score"] * 0.2])
195
+ if html_partial:
196
+ output = fragment_fromstring("<div/>")
197
+ else:
198
+ output = document_fromstring("<div/>")
199
+ best_elem = best_candidate["elem"]
200
+ parent = best_elem.getparent()
201
+ siblings = parent.getchildren() if parent is not None else [best_elem]
202
+ for sibling in siblings:
203
+ append = False
204
+ if sibling is best_elem:
205
+ append = True
206
+ sibling_key = sibling
207
+ if (
208
+ sibling_key in candidates
209
+ and candidates[sibling_key]["content_score"] >= sibling_score_threshold
210
+ ):
211
+ append = True
212
+
213
+ if sibling.tag == "p":
214
+ link_density = self.get_link_density(sibling)
215
+ node_content = sibling.text or ""
216
+ node_length = len(node_content)
217
+
218
+ if node_length > 80 and link_density < 0.25:
219
+ append = True
220
+ elif (
221
+ node_length <= 80
222
+ and link_density == 0
223
+ and re.search(r"\.( |$)", node_content)
224
+ ):
225
+ append = True
226
+
227
+ if append:
228
+ if html_partial:
229
+ output.append(sibling)
230
+ else:
231
+ output.getchildren()[0].getchildren()[0].append(sibling)
232
+ return output
233
+
234
+ def select_best_candidate(self, candidates):
235
+ if not candidates:
236
+ return None
237
+
238
+ sorted_candidates = sorted(
239
+ candidates.values(), key=lambda x: x["content_score"], reverse=True
240
+ )
241
+ for candidate in sorted_candidates[:5]:
242
+ elem = candidate["elem"]
243
+
244
+ best_candidate = sorted_candidates[0]
245
+ return best_candidate
246
+
247
+ def get_link_density(self, elem):
248
+ link_length = 0
249
+ for i in elem.findall(".//a"):
250
+ link_length += text_length(i)
251
+ total_length = text_length(elem)
252
+ return float(link_length) / max(total_length, 1)
253
+
254
+ def score_paragraphs(self):
255
+ MIN_LEN = self.min_text_length
256
+ candidates = {}
257
+ ordered = []
258
+ for elem in self.tags(self._html(), "p", "pre", "td"):
259
+ parent_node = elem.getparent()
260
+ if parent_node is None:
261
+ continue
262
+ grand_parent_node = parent_node.getparent()
263
+
264
+ inner_text = clean(elem.text_content() or "")
265
+ inner_text_len = len(inner_text)
266
+
267
+ if inner_text_len < MIN_LEN:
268
+ continue
269
+
270
+ if parent_node not in candidates:
271
+ candidates[parent_node] = self.score_node(parent_node)
272
+ ordered.append(parent_node)
273
+
274
+ if grand_parent_node is not None and grand_parent_node not in candidates:
275
+ candidates[grand_parent_node] = self.score_node(grand_parent_node)
276
+ ordered.append(grand_parent_node)
277
+
278
+ content_score = 1
279
+ content_score += len(inner_text.split(","))
280
+ content_score += len(inner_text.split(","))
281
+ content_score += min((inner_text_len / 100), 3)
282
+
283
+ candidates[parent_node]["content_score"] += content_score
284
+ if grand_parent_node is not None:
285
+ candidates[grand_parent_node]["content_score"] += content_score / 2.0
286
+
287
+ for elem in ordered:
288
+ candidate = candidates[elem]
289
+ ld = self.get_link_density(elem)
290
+ score = candidate["content_score"]
291
+
292
+ candidate["content_score"] *= 1 - ld
293
+
294
+ return candidates
295
+
296
+ def class_weight(self, e):
297
+ weight = 0
298
+ for feature in [e.get("class", None), e.get("id", None)]:
299
+ if feature:
300
+ if self.xp_num == "others":
301
+ if self.REGEXES["negativeRe"].search(feature):
302
+ weight -= 25
303
+
304
+ if self.REGEXES["positiveRe"].search(feature):
305
+ weight += 25
306
+ else:
307
+ if self.REGEXES["positiveRe"].search(feature):
308
+ weight += 25
309
+
310
+ if self.positive_keywords and self.positive_keywords.search(feature):
311
+ weight += 25
312
+
313
+ if self.negative_keywords and self.negative_keywords.search(feature):
314
+ weight -= 25
315
+
316
+ if self.positive_keywords and self.positive_keywords.match("tag-" + e.tag):
317
+ weight += 25
318
+
319
+ if self.negative_keywords and self.negative_keywords.match("tag-" + e.tag):
320
+ weight -= 25
321
+
322
+ return weight
323
+
324
+ def score_node(self, elem):
325
+ content_score = self.class_weight(elem)
326
+ name = elem.tag.lower()
327
+ if name in ["div", "article"]:
328
+ content_score += 5
329
+ elif name in ["pre", "td", "blockquote"]:
330
+ content_score += 3
331
+ elif name in ["address", "ol", "ul", "dl", "dd", "dt", "li", "form", "aside"]:
332
+ content_score -= 3
333
+ elif name in [
334
+ "h1",
335
+ "h2",
336
+ "h3",
337
+ "h4",
338
+ "h5",
339
+ "h6",
340
+ "th",
341
+ "header",
342
+ "footer",
343
+ "nav",
344
+ ]:
345
+ content_score -= 5
346
+ return {"content_score": content_score, "elem": elem}
347
+
348
+ def remove_unlikely_candidates(self):
349
+ for elem in self.html.findall(".//*"):
350
+ s = "%s %s" % (elem.get("class", ""), elem.get("id", ""))
351
+ if len(s) < 2:
352
+ continue
353
+ if (
354
+ self.REGEXES["unlikelyCandidatesRe"].search(s)
355
+ and (not self.REGEXES["okMaybeItsACandidateRe"].search(s))
356
+ and elem.tag not in ["html", "body"]
357
+ ):
358
+ elem.drop_tree()
359
+
360
+ def transform_misused_divs_into_paragraphs(self):
361
+ for elem in self.tags(self.html, "div"):
362
+ if not self.REGEXES["divToPElementsRe"].search(
363
+ str(b"".join(map(tostring, list(elem))))
364
+ ):
365
+ elem.tag = "p"
366
+
367
+ for elem in self.tags(self.html, "div"):
368
+ if elem.text and elem.text.strip():
369
+ p = fragment_fromstring("<p/>")
370
+ p.text = elem.text
371
+ elem.text = None
372
+ elem.insert(0, p)
373
+
374
+ for pos, child in reversed(list(enumerate(elem))):
375
+ if child.tail and child.tail.strip():
376
+ p = fragment_fromstring("<p/>")
377
+ p.text = child.tail
378
+ child.tail = None
379
+ elem.insert(pos + 1, p)
380
+ if child.tag == "br":
381
+ child.drop_tree()
382
+
383
+ def tags(self, node, *tag_names):
384
+ for tag_name in tag_names:
385
+ for e in node.findall(".//%s" % tag_name):
386
+ yield e
387
+
388
+ def reverse_tags(self, node, *tag_names):
389
+ for tag_name in tag_names:
390
+ for e in reversed(node.findall(".//%s" % tag_name)):
391
+ yield e
392
+
393
+ def sanitize(self, node, candidates):
394
+ MIN_LEN = self.min_text_length
395
+ for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
396
+ if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33:
397
+ header.drop_tree()
398
+
399
+ for elem in self.tags(node, "iframe"):
400
+ if "src" in elem.attrib and self.REGEXES["videoRe"].search(
401
+ elem.attrib["src"]
402
+ ):
403
+ elem.text = "VIDEO"
404
+ else:
405
+ elem.drop_tree()
406
+
407
+ allowed = {}
408
+ # Conditionally clean <table>s, <ul>s, and <div>s
409
+ for el in self.reverse_tags(
410
+ node, "table", "ul", "div", "aside", "header", "footer", "section"
411
+ ):
412
+ if el in allowed:
413
+ continue
414
+ weight = self.class_weight(el)
415
+ if el in candidates:
416
+ content_score = candidates[el]["content_score"]
417
+ else:
418
+ content_score = 0
419
+ tag = el.tag
420
+
421
+ if weight + content_score < 0:
422
+ el.drop_tree()
423
+ elif el.text_content().count(",") + el.text_content().count(",") < 10:
424
+ counts = {}
425
+ for kind in ["p", "img", "li", "a", "embed", "input"]:
426
+ counts[kind] = len(el.findall(".//%s" % kind))
427
+ counts["li"] -= 100
428
+ counts["input"] -= len(el.findall('.//input[@type="hidden"]'))
429
+
430
+ content_length = text_length(el)
431
+ link_density = self.get_link_density(el)
432
+
433
+ to_remove = False
434
+ reason = ""
435
+
436
+ # 修改
437
+ if el.tag == "div" and counts["img"] >= 1:
438
+ continue
439
+ if counts["p"] and counts["img"] > 1 + counts["p"] * 1.3:
440
+ reason = "too many images (%s)" % counts["img"]
441
+ # to_remove = True
442
+ elif counts["li"] > counts["p"] and tag not in ("ol", "ul"):
443
+ reason = "more <li>s than <p>s"
444
+ # to_remove = True
445
+ elif counts["input"] > (counts["p"] / 3):
446
+ reason = "less than 3x <p>s than <input>s"
447
+ to_remove = True
448
+ elif content_length < MIN_LEN and counts["img"] == 0:
449
+ # 代码块内容过短,导致删除
450
+ if el.tag in ['code', 'pre']:
451
+ continue
452
+ if ancestor_node_check(el, ['code', 'pre']):
453
+ continue
454
+ # 保留table中的链接
455
+ if el.tag in ['ul', 'div'] and ancestor_node_check(el, ['td']):
456
+ continue
457
+ reason = (
458
+ "too short content length %s without a single image"
459
+ % content_length
460
+ )
461
+ to_remove = True
462
+ elif content_length < MIN_LEN and counts["img"] > 2:
463
+ reason = (
464
+ "too short content length %s and too many images"
465
+ % content_length
466
+ )
467
+ to_remove = True
468
+ elif weight < 25 and link_density > 0.2:
469
+ if tag in ["div", "ul", "table"]:
470
+ ptest = el.xpath(".//text()[not(ancestor::a)]")
471
+ ptest_len = text_len("".join(ptest))
472
+ if ptest_len >= MIN_LEN and link_density <= 0.3:
473
+ continue
474
+ if tag == "table":
475
+ if len(el.xpath('.//tr[1]/td')) >=2:
476
+ continue
477
+ if tag == "div":
478
+ if el.xpath('.//table'):
479
+ continue
480
+ reason = "too many links %.3f for its weight %s" % (
481
+ link_density,
482
+ weight,
483
+ )
484
+ to_remove = True
485
+ elif weight >= 25 and link_density > 0.5:
486
+ if tag == "table":
487
+ if len(el.xpath('.//tr[1]/td')) >= 2:
488
+ continue
489
+ if tag == "div":
490
+ if el.xpath('.//table'):
491
+ continue
492
+ reason = "too many links %.3f for its weight %s" % (
493
+ link_density,
494
+ weight,
495
+ )
496
+ to_remove = True
497
+ elif (counts["embed"] == 1 and content_length < 75) or counts[
498
+ "embed"
499
+ ] > 1:
500
+ reason = (
501
+ "<embed>s with too short content length, or too many <embed>s"
502
+ )
503
+ to_remove = True
504
+ elif not content_length:
505
+ reason = "no content"
506
+ to_remove = True
507
+
508
+ i, j = 0, 0
509
+ x = 1
510
+ siblings = []
511
+ for sib in el.itersiblings():
512
+ sib_content_length = text_length(sib)
513
+ if sib_content_length:
514
+ i = +1
515
+ siblings.append(sib_content_length)
516
+ if i == x:
517
+ break
518
+ for sib in el.itersiblings(preceding=True):
519
+ sib_content_length = text_length(sib)
520
+ if sib_content_length:
521
+ j = +1
522
+ siblings.append(sib_content_length)
523
+ if j == x:
524
+ break
525
+ if siblings and sum(siblings) > 1000:
526
+ to_remove = False
527
+ for desnode in self.tags(el, "table", "ul", "div", "section"):
528
+ allowed[desnode] = True
529
+
530
+ if to_remove:
531
+ el.drop_tree()
532
+ else:
533
+ pass
534
+
535
+ self.html = node
536
+ return self.get_clean_html()
537
+
538
+ def get_clean_html(self):
539
+ return clean_attributes(tounicode(self.html, method="html"))
ultradata_math_parser/utils.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding:utf-8 -*-
2
+
3
+ import os
4
+ import re
5
+ import logging
6
+ import subprocess
7
+ import tempfile
8
+ from gzip import decompress
9
+
10
+ import numpy as np
11
+ from lxml import etree
12
+ from lxml.html import Element, HtmlElement, HTMLParser, fromstring, tostring
13
+ from lxml.html.clean import Cleaner
14
+ from urllib3.response import HTTPResponse
15
+ from ultradata_math_parser.config import Unique_ID
16
+
17
+ try:
18
+ import brotli
19
+ except ImportError:
20
+ brotli = None
21
+
22
+ try:
23
+ from cchardet import detect as cchardet_detect
24
+ except ImportError:
25
+ cchardet_detect = None
26
+
27
+ from difflib import SequenceMatcher
28
+
29
+ from charset_normalizer import from_bytes
30
+
31
+ HTML_PARSER = HTMLParser(
32
+ collect_ids=False,
33
+ default_doctype=False,
34
+ encoding="utf-8",
35
+ remove_comments=True,
36
+ remove_pis=True,
37
+ )
38
+ DOCTYPE_TAG = re.compile("^< ?! ?DOCTYPE.+?/ ?>", re.I)
39
+ UNICODE_ALIASES = {"utf-8", "utf_8"}
40
+
41
+ HTML_CLEANER = Cleaner(
42
+ annoying_tags=False,
43
+ comments=True,
44
+ embedded=False,
45
+ forms=False,
46
+ frames=False,
47
+ javascript=False,
48
+ links=False,
49
+ meta=False,
50
+ page_structure=False,
51
+ processing_instructions=True,
52
+ remove_unknown_tags=False,
53
+ safe_attrs_only=False,
54
+ scripts=False,
55
+ style=False,
56
+ )
57
+
58
+ color_regex = re.compile(r"\\textcolor\[.*?\]\{.*?\}")
59
+
60
+ latex_image_class_names = [
61
+ "latexcenter",
62
+ "latex",
63
+ "tex",
64
+ "latexdisplay",
65
+ "latexblock",
66
+ "latexblockcenter",
67
+ ]
68
+
69
+
70
+ def _translator():
71
+ old_log_level = logging.getLogger().level
72
+ try:
73
+ import py_asciimath.translator.translator as _translator
74
+
75
+ return _translator
76
+ finally:
77
+ logging.getLogger().setLevel(old_log_level)
78
+
79
+
80
+ def ASCIIMath2Tex(*args, **kwargs):
81
+ return _translator().ASCIIMath2Tex(*args, **kwargs)
82
+
83
+
84
+ def MathML2Tex(*args, **kwargs):
85
+ return _translator().MathML2Tex(*args, **kwargs)
86
+
87
+
88
+ asciimath2tex = ASCIIMath2Tex(log=False)
89
+
90
+
91
+ def lcs_of_2(a, b):
92
+ match = SequenceMatcher(None, a, b).find_longest_match(0, len(a), 0, len(b))
93
+ return a[match[0]: match[0] + match[2]]
94
+
95
+
96
+ def lcs_of_list(*args):
97
+ if len(args) == 2:
98
+ return lcs_of_2(args[0], args[1])
99
+ first = args[0]
100
+ remains = args[1:]
101
+ return lcs_of_2(first, lcs_of_list(*remains))
102
+
103
+
104
+ def isutf8(data):
105
+ try:
106
+ data.decode("UTF-8")
107
+ except UnicodeDecodeError:
108
+ return False
109
+ return True
110
+
111
+
112
+ def handle_compressed_file(filecontent):
113
+ if isinstance(filecontent, bytes):
114
+ if filecontent[:2] == b"\x1f\x8b":
115
+ try:
116
+ filecontent = decompress(filecontent)
117
+ except (EOFError, OSError):
118
+ pass
119
+ elif brotli is not None:
120
+ try:
121
+ filecontent = brotli.decompress(filecontent)
122
+ except brotli.error:
123
+ pass
124
+ return filecontent
125
+
126
+
127
+ def detect_encoding(bytesobject):
128
+ if isutf8(bytesobject):
129
+ return ["utf-8"]
130
+ guesses = []
131
+ if cchardet_detect is not None:
132
+ cchardet_guess = cchardet_detect(bytesobject)["encoding"]
133
+ if cchardet_guess is not None:
134
+ guesses.append(cchardet_guess.lower())
135
+ detection_results = from_bytes(bytesobject[:15000]) or from_bytes(bytesobject)
136
+ if len(detection_results) > 0:
137
+ guesses.extend([r.encoding for r in detection_results])
138
+ return [g for g in guesses if g not in UNICODE_ALIASES]
139
+
140
+
141
+ def decode_file(filecontent):
142
+ if isinstance(filecontent, str):
143
+ return filecontent
144
+ htmltext = None
145
+ filecontent = handle_compressed_file(filecontent)
146
+ for guessed_encoding in detect_encoding(filecontent):
147
+ try:
148
+ htmltext = filecontent.decode(guessed_encoding)
149
+ except (LookupError, UnicodeDecodeError):
150
+ htmltext = None
151
+ else:
152
+ break
153
+ return htmltext or str(filecontent, encoding="utf-8", errors="replace")
154
+
155
+
156
+ def strip_faulty_doctypes(htmlstring: str, beginning: str) -> str:
157
+ if "doctype" in beginning:
158
+ firstline, _, rest = htmlstring.partition("\n")
159
+ return DOCTYPE_TAG.sub("", firstline, count=1) + "\n" + rest
160
+ return htmlstring
161
+
162
+
163
+ def is_dubious_html(beginning: str) -> bool:
164
+ return "html" not in beginning
165
+
166
+
167
+ def fromstring_bytes(htmlobject):
168
+ tree = None
169
+ try:
170
+ tree = fromstring(
171
+ htmlobject.encode("utf8", "surrogatepass"), parser=HTML_PARSER
172
+ )
173
+ except Exception as err:
174
+ pass
175
+ return tree
176
+
177
+
178
+ def ancestor_node_check(node: HtmlElement, tags: list):
179
+ for tag in tags:
180
+ if node.xpath(f'ancestor::{tag}[1]'):
181
+ return True
182
+ return False
183
+
184
+
185
+ def load_html(htmlobject):
186
+ if isinstance(htmlobject, HtmlElement):
187
+ return htmlobject
188
+ if isinstance(htmlobject, HTTPResponse) or hasattr(htmlobject, "data"):
189
+ htmlobject = htmlobject.data
190
+ if not isinstance(htmlobject, (bytes, str)):
191
+ raise TypeError("incompatible input type", type(htmlobject))
192
+ tree = None
193
+ htmlobject = decode_file(htmlobject)
194
+ beginning = htmlobject[:50].lower()
195
+ check_flag = is_dubious_html(beginning)
196
+ htmlobject = strip_faulty_doctypes(htmlobject, beginning)
197
+ fallback_parse = False
198
+ try:
199
+ tree = fromstring(htmlobject, parser=HTML_PARSER)
200
+ except ValueError:
201
+ tree = fromstring_bytes(htmlobject)
202
+ fallback_parse = True
203
+ except Exception as err:
204
+ pass
205
+ if (tree is None or len(tree) < 1) and not fallback_parse:
206
+ tree = fromstring_bytes(htmlobject)
207
+ if tree is not None and check_flag is True and len(tree) < 2:
208
+ tree = None
209
+ return tree
210
+
211
+
212
+ class W3MError(RuntimeError):
213
+ """Raised when w3m rendering fails."""
214
+
215
+
216
+ def run_w3m_dump(html_content: str, w3m_path: str, *, columns: int = 200) -> str:
217
+ """
218
+ Render HTML content into plain text using w3m.
219
+
220
+ :param html_content: HTML snippet to render.
221
+ :param w3m_path: Path to the w3m executable.
222
+ :param columns: Column width passed to w3m (-cols).
223
+ :return: Rendered plain text.
224
+ :raises RuntimeError: if w3m is unavailable or returns a non-zero exit code.
225
+ """
226
+ if not w3m_path:
227
+ raise W3MError("w3m path must be provided")
228
+
229
+ tmp_file = tempfile.NamedTemporaryFile(
230
+ mode="w", suffix=".html", delete=False, encoding="utf-8"
231
+ )
232
+ try:
233
+ tmp_file.write(html_content or "")
234
+ tmp_file.flush()
235
+ tmp_file.close()
236
+
237
+ try:
238
+ completed = subprocess.run(
239
+ [
240
+ w3m_path,
241
+ "-dump",
242
+ "-T",
243
+ "text/html",
244
+ "-cols",
245
+ str(columns),
246
+ tmp_file.name,
247
+ ],
248
+ check=True,
249
+ capture_output=True,
250
+ text=True,
251
+ )
252
+ except FileNotFoundError as exc:
253
+ raise W3MError(f"w3m executable not found at '{w3m_path}'") from exc
254
+ except subprocess.CalledProcessError as exc:
255
+ stderr = (exc.stderr or "").strip()
256
+ message = f"w3m exited with status {exc.returncode}"
257
+ if stderr:
258
+ message = f"{message}: {stderr}"
259
+ raise W3MError(message) from exc
260
+
261
+ return completed.stdout
262
+ finally:
263
+ try:
264
+ os.unlink(tmp_file.name)
265
+ except OSError:
266
+ pass
267
+
268
+
269
+ def is_empty_element(node: HtmlElement):
270
+ return not node.getchildren() and not node.text
271
+
272
+
273
+ def iter_node(element: HtmlElement):
274
+ yield element
275
+ for sub_element in element:
276
+ if isinstance(sub_element, HtmlElement):
277
+ yield from iter_node(sub_element)
278
+
279
+
280
+ def img_div_check(tree):
281
+ """
282
+ 如果一个div中只有一张图,且子节点数小于4则保留
283
+ """
284
+ if len(tree.xpath(".//img")) == 1 and len(tree.xpath(".//*")) < 4:
285
+ return False
286
+ else:
287
+ return True
288
+
289
+
290
+ def text_len(s):
291
+ s = re.sub(" +", " ", s) # 将连续的多个空格替换为一个空格
292
+ s = re.sub("[\n\t\r]+", "\n", s)
293
+ english_words = s.split()
294
+ chinese_characters = re.findall(r"[\u4e00-\u9fff]", s)
295
+ japanese_characters = re.findall(r"[\u3040-\u309F\u30A0-\u30FF]", s)
296
+ arabic_characters = re.findall(r"[\u0600-\u06FF]", s)
297
+ return (
298
+ len(english_words)
299
+ + len(chinese_characters)
300
+ + len(japanese_characters)
301
+ + len(arabic_characters)
302
+ )
303
+
304
+
305
+ def alias(element):
306
+ if element is None:
307
+ return ""
308
+ tag = element.tag
309
+ # skip nth-child
310
+ if tag in ["html", "body"]:
311
+ return tag
312
+ attribs = [tag]
313
+ for k, v in element.attrib.items():
314
+ if k == Unique_ID:
315
+ continue
316
+ k, v = re.sub(r"\s*", "", k), re.sub(r"\s*", "", v)
317
+ v = re.sub(r"-\d+", "", v)
318
+ attribs.append(f'[{k}="{v}"]' if v else f"[{k}]")
319
+ result = "".join(attribs)
320
+
321
+ # 直接将当前子节点属性展示上来
322
+ nth = ""
323
+ for child in element.getchildren():
324
+ if child.tag in ["dt", "dd", "li"]:
325
+ try:
326
+ # 子节点个数
327
+ nth += str(len(list(child.getchildren())))
328
+ except:
329
+ pass
330
+ continue
331
+ attribs = [child.tag]
332
+ for k, v in child.attrib.items():
333
+ if k == Unique_ID:
334
+ continue
335
+ k, v = re.sub(r"\s*", "", k), re.sub(r"\s*", "", v)
336
+ v = re.sub(r"-\d+", "", v)
337
+ attribs.append(f"[{k}]" if v else f"[{k}]")
338
+ nth += "".join(attribs)
339
+
340
+ result += f":{nth}"
341
+ return result
342
+
343
+
344
+ def similarity2(s1, s2):
345
+ if not s1 or not s2:
346
+ return 0
347
+ s1_set = set(list(s1))
348
+ s2_set = set(list(s2))
349
+ intersection = s1_set.intersection(s2_set)
350
+ union = s1_set.union(s2_set)
351
+ return len(intersection) / len(union)
352
+
353
+
354
+ def similarity_with_element(element1, element2):
355
+ alias1 = alias(element1)
356
+ alias2 = alias(element2)
357
+ return similarity2(alias1, alias2)
358
+
359
+
360
+ def similarity_with_siblings(element, siblings):
361
+ scores = []
362
+ for sibling in siblings:
363
+ # TODO: maybe compare all children not only alias
364
+ scores.append(similarity_with_element(element, sibling))
365
+ if not scores:
366
+ return 0
367
+ # 去掉一个最低值
368
+ min_value = min(scores)
369
+ scores.remove(min_value)
370
+ return np.mean(scores)
371
+
372
+
373
+ def number_of_a_char(ele, xpath=".//a//text()"):
374
+ s = "".join(ele.xpath(xpath)).strip()
375
+ return text_len(s)
376
+
377
+
378
+ def number_of_char(ele, xpath=".//text()"):
379
+ s = "".join(ele.xpath(xpath)).strip()
380
+ return text_len(s) + 1
381
+
382
+
383
+ def density_of_a_text(ele, pre=0.7):
384
+ a_char = number_of_a_char(ele)
385
+ t_char = number_of_char(ele)
386
+ if a_char / t_char >= pre:
387
+ return True
388
+ else:
389
+ return False
390
+
391
+
392
+ def uniquify_list(l):
393
+ return list(dict.fromkeys(l))
394
+
395
+
396
+ def trim(string):
397
+ """Remove unnecessary spaces within a text string"""
398
+ try:
399
+ return " ".join(string.split()).strip()
400
+ except (AttributeError, TypeError):
401
+ return None
402
+
403
+
404
+ def collect_link_info(links_xpath, favor_precision=False):
405
+ shortelems, mylist = 0, []
406
+ threshold = 10 if not favor_precision else 50
407
+ for subelem in links_xpath:
408
+ subelemtext = trim(subelem.text_content())
409
+ if subelemtext:
410
+ mylist.append(subelemtext)
411
+ if len(subelemtext) < threshold:
412
+ shortelems += 1
413
+ lengths = sum(len(text) for text in mylist)
414
+ return lengths, len(mylist), shortelems, mylist
415
+
416
+
417
+ def link_density_test(element, text, favor_precision=False):
418
+ links_xpath, mylist = element.findall(".//a"), []
419
+ if links_xpath:
420
+ if element.tag == "p":
421
+ if favor_precision is False:
422
+ if element.getnext() is None:
423
+ limitlen, threshold = 60, 0.8
424
+ else:
425
+ limitlen, threshold = 30, 0.8
426
+ else:
427
+ limitlen, threshold = 200, 0.8
428
+ else:
429
+ if element.getnext() is None:
430
+ limitlen, threshold = 300, 0.8
431
+ else:
432
+ limitlen, threshold = 100, 0.8
433
+ elemlen = len(text)
434
+ if elemlen < limitlen:
435
+ linklen, elemnum, shortelems, mylist = collect_link_info(
436
+ links_xpath, favor_precision
437
+ )
438
+ if elemnum == 0:
439
+ return True, mylist
440
+ if density_of_a_text(element, 0.5):
441
+ if linklen > threshold * elemlen or (
442
+ elemnum > 1 and shortelems / elemnum > 0.8
443
+ ):
444
+ return True, mylist
445
+ return False, mylist
446
+
447
+
448
+ def text_strip(text):
449
+ return text.strip() if text else text
450
+
451
+
452
+ def wrap_math(s, display=False):
453
+ s = re.sub(r"\s+", " ", s)
454
+ s = color_regex.sub("", s)
455
+ s = s.replace("$", "")
456
+ s = s.replace("\n", " ").replace("\\n", "")
457
+ s = s.strip()
458
+ if len(s) == 0:
459
+ return s
460
+ # Don't wrap if it's already in \align
461
+ if "align" in s:
462
+ return s
463
+ if display:
464
+ return "$$" + s + "$$"
465
+ return "$" + s + "$"
466
+
467
+
468
+ def extract_asciimath(s):
469
+ parsed = asciimath2tex.translate(s)
470
+ return parsed
471
+
472
+
473
+ cur_file = os.path.abspath(__file__)
474
+ xsl_path = os.path.join(os.path.dirname(cur_file), "mmltex/mmltex.xsl")
475
+
476
+ xslt = etree.parse(xsl_path)
477
+ transform = etree.XSLT(xslt)
478
+
479
+
480
+ def mml_to_latex(mml_code):
481
+ # Remove any attibutes from the math tag
482
+ mml_code = re.sub(r"(<math.*?>)", r"\1", mml_code)
483
+ mml_ns = mml_code.replace(
484
+ "<math>", '<math xmlns="http://www.w3.org/1998/Math/MathML">'
485
+ ) # Required.
486
+
487
+ mml_ns = mml_ns.replace("&quot;", '"')
488
+ mml_ns = mml_ns.replace("'\\\"", '"').replace("\\\"'", '"')
489
+
490
+ # 很多网页中标签内容就是错误
491
+ # pattern = r"(<[^<>]*?\s)(mathbackground|mathsize|mathvariant|mathfamily|class|separators|style|id|rowalign|columnspacing|rowlines|columnlines|frame|framespacing|equalrows|equalcolumns|align|linethickness|lspace|rspace|mathcolor|rowspacing|displaystyle|style|columnalign|open|close|right|left)(?=\s|>)(?![\"'][^<>]*?>)"
492
+
493
+ pattern = r'"([^"]+?)\''
494
+ mml_ns = re.sub(pattern, r'"\1"', mml_ns)
495
+
496
+ mml_dom = etree.fromstring(mml_ns)
497
+ mmldom = transform(mml_dom)
498
+ latex_code = str(mmldom)
499
+ return latex_code