diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..246097c2a17a4e417fd084344aef4b87a4295ce0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# 虚拟环境 +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +.trae/ +*.swp +*.swo +*~ + +# 日志 +*.log + + +# Jupyter Notebook +.ipynb_checkpoints + +# macOS +.DS_Store + + +vis/ +driver/ +**/**/chromedriver diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..561ae9e947a4487b56347715a3293996eebbf9c6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,185 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. +"License" shall mean the terms and conditions for use, reproduction, +and distribution as defined by Sections 1 through 9 of this document. +"Licensor" shall mean the copyright owner or entity authorized by +the copyright owner that is granting the License. +"Legal Entity" shall mean the union of the acting entity and all other +entities that control, are controlled by, or are under common control +with that entity. For the purposes of this definition, "control" means +(i) the power, direct or indirect, to cause the direction or management +of such entity, whether by contract or otherwise, or (ii) ownership of +fifty percent (50%) or more of the outstanding shares, or (iii) beneficial +ownership of such entity. +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. +"Source" form shall mean the preferred form for making modifications, +including but not limited to software source code, documentation source, +and configuration files. +"Object" form shall mean any form resulting from mechanical transformation +or translation of a Source form, including but not limited to compiled +object code, generated documentation, and conversions to other media types. +"Work" shall mean the work of authorship, whether in Source or Object +form, made available under the License, as indicated by a copyright +notice that is included in or attached to the work (an example is +provided in the Appendix below). +"Derivative Works" shall mean any work, whether in Source or Object +form, that is based on (or derived from) the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. For the purposes +of this License, Derivative Works shall not include works that remain +separable from, or merely link (or bind by name) to the interfaces of, +the Work and Derivative Works thereof. +"Contribution" shall mean any work of authorship, including +the original version of the Work and any modifications or additions +to that Work or Derivative Works thereof, that is intentionally submitted +to Licensor for inclusion in the Work by the copyright owner or by an +individual or Legal Entity authorized to submit on behalf of the copyright +owner. For the purposes of this definition, "submitted" means any form of +electronic, verbal, or written communication sent to the Licensor or its +representatives, including but not limited to communication on electronic +mailing lists, source code control systems, and issue tracking systems +that are managed by, or on behalf of, the Licensor for the purpose of +discussing and improving the Work, but excluding communication that is +conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." +"Contributor" shall mean Licensor and any individual or Legal Entity +on behalf of whom a Contribution has been received by Licensor and +subsequently incorporated within the Work. + +2. Grant of Copyright License. +Subject to the terms and conditions of this License, each Contributor +hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, +royalty-free, irrevocable copyright license to reproduce, prepare +Derivative Works of, publicly display, publicly perform, sublicense, +and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. +Subject to the terms and conditions of this License, each Contributor +hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, +royalty-free, irrevocable (except as stated in this section) patent license +to make, have made, use, offer to sell, sell, import, and otherwise +transfer the Work, where such license applies only to those patent claims +licensable by such Contributor that are necessarily infringed by their +Contribution(s) alone or by combination of their Contribution(s) with the Work +to which such Contribution(s) was submitted. If You institute patent litigation +against any entity (including a cross-claim or counterclaim in a lawsuit) +alleging that the Work or a Contribution incorporated within the Work +constitutes direct or contributory patent infringement, then any patent +licenses granted to You under this License for that Work shall terminate +as of the date such litigation is filed. + +4. Redistribution. +You may reproduce and distribute copies of the Work or Derivative Works +thereof in any medium, with or without modifications, and in Source or +Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works + a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating + that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that + You distribute, all copyright, patent, trademark, and attribution + notices from the Source form of the Work, excluding those notices that + do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, + then any Derivative Works that You distribute must include a readable + copy of the attribution notices contained within such NOTICE file, + excluding those notices that do not pertain to any part of the + Derivative Works, in at least one of the following places: within a + NOTICE text file distributed as part of the Derivative Works; within the + Source form or documentation, if provided along with the Derivative Works; + or, within a display generated by the Derivative Works, if and wherever + such third-party notices normally appear. The contents of the NOTICE file + are for informational purposes only and do not modify the License. + You may add Your own attribution notices within Derivative Works that + You distribute, alongside or as an addendum to the NOTICE text from the + Work, provided that such additional attribution notices cannot be construed + as modifying the License. + +You may add Your own copyright statement to Your modifications and may +provide additional or different license terms and conditions for use, +reproduction, or distribution of Your modifications, or for any such +Derivative Works as a whole, provided Your use, reproduction, and +distribution of the Work otherwise complies with the conditions stated +in this License. + +5. Submission of Contributions. +Unless You explicitly state otherwise, any Contribution intentionally +submitted for inclusion in the Work by You to the Licensor shall be under +the terms and conditions of this License, without any additional +terms or conditions. Notwithstanding the above, nothing herein shall supersede +or modify the terms of any separate license agreement you may have executed +with Licensor regarding such Contributions. + +6. Trademarks. +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. +Unless required by applicable law or agreed to in writing, Licensor provides the Work +(and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. +You are solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to damages +for loss of goodwill, work stoppage, computer failure or malfunction, or any and +all other commercial damages or losses), even if such Contributor has been +advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. +While redistributing the Work or Derivative Works thereof, You may choose to offer, +and charge a fee for, acceptance of support, warranty, indemnity, or other +liability obligations and/or rights consistent with this License. However, in +accepting such obligations, You may act only on Your own behalf and on Your sole +responsibility, not on behalf of any other Contributor, and only if You agree to +indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + +Copyright (c) 2025 KevinQiu + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/README-CN.md b/README-CN.md new file mode 100644 index 0000000000000000000000000000000000000000..93cc0fc31aa2f707d061d441315b36ce04caef54 --- /dev/null +++ b/README-CN.md @@ -0,0 +1,102 @@ +
+ +# ⚡️FastCDM + +[**[GitHub Repo]**](https://github.com/BinyangQiu/FastCDM) | [**[HuggingFace Spaces]**](https://huggingface.co/spaces) + +

+ + PyPI package version + + + Python versions + + + GitHub license + +

+ +
+ +## 🚀 简介 + +[CDM](https://github.com/opendatalab/UniMERNet/tree/main/cdm) 通过将预测和真实的LaTeX公式渲染为图像,然后使用视觉特征提取和定位技术进行精确的字符级匹配,结合空间位置信息,确保了评估的客观性和准确性。 + +**FastCDM** 旨在解决性能问题。作为原版 [CDM](https://github.com/opendatalab/UniMERNet/tree/main/cdm) 的高性能优化版本,FastCDM采用浏览器的Katex渲染引擎,而非传统的Latex编译,速度得到了极大的提升。 + +### 🎯 项目目标 + +FastCDM的核心目标是**在训练过程中提供便捷的使用体验**,帮助推动公式识别任务的进步。我们致力于: +- 提供简单易用的API接口,方便在训练循环中集成评估 +- 支持实时评估和批量评估两种模式 +- 提供训练过程中的评估指标可视化工具 + +### 为什么选择 FastCDM? + +1. **极速性能**:基于KaTeX的渲染引擎,相比传统LaTeX编译流程快数十倍 +2. **简化部署**:无需安装复杂的LaTeX环境(ImageMagick、texlive-full等) +3. **准确评估**:采用字符检测匹配方法,避免传统文本指标的不公平性问题 +4. **持续优化**:对CDM符号支持进行补充完善,并持续迭代改进 +5. **易于集成**:提供统一的API接口,方便集成到各种训练框架中,未来将集成PyTorch、Transformers等多个主流训练框架 + +### ⚠️ 注意 + +虽然 KaTeX 跑得比八卦记者还快,但它毕竟是为了 Web 优化的轻量级选手,无法做到对所有 LaTeX 诡异语法的 **100%** 支持。 + +对于绝大部分的常规公式,它完美胜任。这是一个合理且能走得长远的技术选型。 + +可以在这里查阅 KaTeX 的支持范围:🔗 [KaTeX Support Table](https://katex.org/docs/support_table) + +--- + +## 使用方法 + +### 安装 + +```bash +pip install fastcdm +``` + +### 快速开始 + +```python +from fastcdm import FastCDM + +chromedriver_path = "driver/chromedriver" + +# 初始化 FastCDM 评估器 +evaluator = FastCDM(chromedriver_path=chromedriver_path) + +# 评估 +cdm_score, recall, precision = evaluator.compute(gt="E = mc^2", pred="E + 1 = mc^2", visualize=False) + +# 评估,并可视化 +cdm_score, recall, precision, vis_img = evaluator.compute(gt="E = mc^2", pred="E + 1 = mc^2", visualize=True) +``` + +### 交互Demo + +我们提供了一个Gradio开发的可视化Demo,您可以在[HuggingFace Spaces](https://huggingface.co/spaces)中尝试使用。也可以本地启动: + +```bash +python3 scripts/app.py +``` + +## 贡献与反馈 + +我们欢迎所有形式的贡献,包括但不限于: +- 提交问题报告 +- 建议改进 +- 提交代码变更(请先开issue讨论) + +请通过项目的[issues](https://github.com/BinyangQiu/FastCDM/issues)与我们联系。 + +--- + +## 协议 + +本项目基于 Apache 2.0 协议开源。您可以在遵守协议条款的前提下自由使用、修改和分发本项目的代码。 + diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9fe0d0b6027363ed74408103a1bfffefd4945bb0 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +
+ +# ⚡️FastCDM + +[**[GitHub Repo]**](https://github.com/BinyangQiu/FastCDM) | [**[HuggingFace Spaces]**](https://huggingface.co/spaces) + +

+ + PyPI package version + + + Python versions + + + GitHub license + +

+ +
+ +## 🚀 Introduction + +[CDM](https://github.com/opendatalab/UniMERNet/tree/main/cdm) ensures the objectivity and accuracy of evaluation by rendering predicted and ground-truth LaTeX formulas into images, and then using visual feature extraction and localization techniques to perform precise character-level matching, combined with spatial position information. + +**FastCDM** aims to address performance issues. As a high-performance optimized version of the original [CDM](https://github.com/opendatalab/UniMERNet/tree/main/cdm), FastCDM employs the browser-based KaTeX rendering engine instead of traditional LaTeX compilation, resulting in significantly improved speed. + +### 🎯 Project Goals + +The core objective of FastCDM is to **provide a convenient user experience during the training process**, helping to advance formula recognition tasks. We are committed to: +- Providing simple and easy-to-use API interfaces for convenient integration of evaluation within the training loop. +- Supporting both real-time evaluation and batch evaluation modes. +- Providing visualization tools for evaluation metrics during the training process. + +### Why Choose FastCDM? + +1. **Extreme Performance**: Based on the KaTeX rendering engine, it is tens of times faster than the traditional LaTeX compilation process. +2. **Simplified Deployment**: No need to install complex LaTeX environments (ImageMagick, texlive-full, etc.). +3. **Accurate Evaluation**: Adopts character detection matching methods to avoid the unfairness issues associated with traditional text metrics. +4. **Continuous Optimization**: Supplements and refines CDM symbol support, with continuous iterative improvements. +5. **Easy Integration**: Provides a unified API interface for easy integration into various training frameworks. Future integration with mainstream training frameworks such as PyTorch and Transformers is planned. + +### ⚠️ Note + +Although KaTeX is extremely fast, it is a lightweight solution optimized for the Web and cannot support **100%** of all obscure LaTeX syntax. + +For the vast majority of standard formulas, it performs perfectly. This is a reasonable and sustainable technical choice. + +You can check KaTeX's support coverage here: 🔗 [KaTeX Support Table](https://katex.org/docs/support_table) + +--- + +## Usage + +### Installation + +```bash +pip install fastcdm +``` + +### Quick Start + +```python +from fastcdm import FastCDM + +chromedriver_path = "driver/chromedriver" + +# Initialize FastCDM evaluator +evaluator = FastCDM(chromedriver_path=chromedriver_path) + +# Evaluate +cdm_score, recall, precision = evaluator.compute(gt="E = mc^2", pred="E + 1 = mc^2", visualize=False) + +# Evaluate and visualize +cdm_score, recall, precision, vis_img = evaluator.compute(gt="E = mc^2", pred="E + 1 = mc^2", visualize=True) +``` + +### Interactive Demo + +We provide a visualization Demo developed with Gradio, which you can try on [HuggingFace Spaces](https://huggingface.co/spaces). You can also launch it locally: + +```bash +python3 scripts/app.py +``` + +## Contribution and Feedback + +We welcome all forms of contribution, including but not limited to: +- Submitting issue reports +- Suggesting improvements +- Submitting code changes (please open an issue for discussion first) + +Please contact us via the project's [issues](https://github.com/BinyangQiu/FastCDM/issues). + +--- + +## License + +This project is open-sourced under the Apache 2.0 license. You are free to use, modify, and distribute the code of this project under the terms of the license. diff --git a/docs/chromedriver_installation.md b/docs/chromedriver_installation.md new file mode 100644 index 0000000000000000000000000000000000000000..8bea967f60c9eaacc5c05fb51ecd8d415bec1ee1 --- /dev/null +++ b/docs/chromedriver_installation.md @@ -0,0 +1,53 @@ +# Chromedriver 安装 + +Chromedriver 是 Google 提供的用于控制 Chrome 浏览器的驱动程序。所以你需要先安装 Chrome 浏览器,然后下载对应的 Chromedriver 版本。 + +## 1. 安装 Chrome 浏览器 + +Win和Mac都非常容易安装。我们主要介绍Ubuntu上的安装方法: + +```bash +sudo apt update +sudo apt install libxss1 libappindicator1 libindicator7 +wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb +sudo dpkg -i google-chrome*.deb # Might show "errors", fixed by next line +sudo apt install -f +google-chrome --version # 查看版本 +``` + +## 2. 安装 Chromedriver + +### 自动安装 + +我们提供了一个自动安装脚本,你可以直接运行它来安装 Chromedriver。 + +> 注意:自动安装脚本需要配置代理才能正常工作。如果你的网络环境需要代理,请先配置好代理。否则采用后面手动安装的方法。 + +```bash +python3 scripts/auto_install_chromedriver.py --dest driver/ +``` + +### 手动安装 + +通过前面的`google-chrome --version`命令查看 Chrome 浏览器的版本号,例如`126.0.6478.126`。然后根据版本号执行以下命令: + +```bash +mkdir driver/ +cd driver/ + +# 下载对应版本的 Chromedriver,注意替换版本号 +wget https://storage.googleapis.com/chrome-for-testing-public/126.0.6478.126/linux64/chromedriver-linux64.zip + +unzip chromedriver-linux64.zip +mv chromedriver-linux64/chromedriver ./ +sudo chmod 777 chromedriver +rm -r chromedriver-linux64 chromedriver-linux64.zip +``` + +## 3. 测试 Chromedriver + +安装完成后,你可以测试一下 Chromedriver 是否正常工作。执行以下命令: + +```bash +python3 scripts/test_driver.py --driver driver/chromedriver --url https://www.baidu.com +``` diff --git a/fastcdm/__init__.py b/fastcdm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..028c0f0018099e084541e7ed6751429bdfd81ca9 --- /dev/null +++ b/fastcdm/__init__.py @@ -0,0 +1,5 @@ +from .core import FastCDM +from .clean import clean + +__all__ = ["FastCDM", "clean"] +__version__ = "0.1.0" diff --git a/fastcdm/box.py b/fastcdm/box.py new file mode 100644 index 0000000000000000000000000000000000000000..7c5bf6be3a02b0ca34844091a837d16a199a5c7c --- /dev/null +++ b/fastcdm/box.py @@ -0,0 +1,23 @@ +import numpy as np +import cv2 + + +def get_bboxes_from_array(img_array, colors): + bboxes = [] + for color in colors: # color is RGB + # img_array is BGR (cv2 default) + r, g, b = color + target_bgr = np.array([b, g, r], dtype=np.uint8) + + # Create mask + mask = cv2.inRange(img_array, target_bgr, target_bgr) + coords = np.argwhere(mask) + + if coords.size > 0: + y_min, x_min = coords.min(axis=0) + y_max, x_max = coords.max(axis=0) + # [xmin, ymin, xmax, ymax] + bboxes.append([int(x_min), int(y_min), int(x_max), int(y_max)]) + else: + bboxes.append([]) # Empty list for missing token + return bboxes diff --git a/fastcdm/clean.py b/fastcdm/clean.py new file mode 100644 index 0000000000000000000000000000000000000000..e4ead478968e84ff3b557832e107b3b71394951d --- /dev/null +++ b/fastcdm/clean.py @@ -0,0 +1,48 @@ +import re + +# 移除 pred 字段首尾的 \[ 和 \] +# 该正则用于去掉字符串开头被颜色宏包裹的 \[,确保公式起始干净 +PATTERN_STRIP_START_BRACKET = re.compile( + r"^\s*\\color\{#[\da-fA-F]{6}\}\{\s*\\\[\s*\}\s*" +) +# 该正则用于去掉字符串末尾被颜色宏包裹的 \],确保公式结尾干净 +PATTERN_STRIP_END_BRACKET = re.compile( + r"\s*\\color\{#[\da-fA-F]{6}\}\{\s*\\\]\s*\}\s*$" +) + + +def full_to_half_width(s: str) -> str: + """ + 将字符串中的全角字符转换为半角字符。 + """ + res = "" + for char in s: + inside_code = ord(char) + if inside_code == 12288: # 全角空格 + inside_code = 32 + elif 65281 <= inside_code <= 65374: # 其他全角字符 + inside_code -= 65248 + res += chr(inside_code) + return res + + +def clean_latex_delimiters(latex_string: str) -> str: + """ + 清理 LaTeX 字符串中的美元符号和双美元符号。 + """ + s = latex_string.strip() + if s.startswith("$$") and s.endswith("$$"): + return s[2:-2].strip() + if s.startswith("$") and s.endswith("$"): + return s[1:-1].strip() + return s + + +def clean(s: str) -> str: + """ + 对字符串进行预处理,包括全角转半角、去空格等。 + """ + processed_text = full_to_half_width(s) + processed_text = processed_text.replace("{/[}", r" \[").replace("{/]}", r" \]") + cleaned_text = clean_latex_delimiters(processed_text) + return cleaned_text diff --git a/fastcdm/colorize.py b/fastcdm/colorize.py new file mode 100644 index 0000000000000000000000000000000000000000..12ac553c4283fd4a77ca9ae83051910b8c521bb6 --- /dev/null +++ b/fastcdm/colorize.py @@ -0,0 +1,53 @@ +import colorsys +import re +import sys +from typing import List, Tuple +from fastcdm.latex_processor import normalize_latex, token_add_color_RGB + + +def generate_high_contrast_colors(num_colors: int) -> List[Tuple[int, int, int]]: + """ + 使用 HSV 颜色空间生成一系列高对比度、视觉上易于区分的颜色。 + """ + colors_rgb = [] + golden_ratio_conjugate = 0.61803398875 + hue = 0.5 + for _ in range(num_colors): + hue += golden_ratio_conjugate + hue %= 1 + rgb_float = colorsys.hsv_to_rgb(hue, 0.9, 0.95) + rgb_int = tuple(int(c * 255) for c in rgb_float) + colors_rgb.append(rgb_int) + return colors_rgb + + +def process_for_katex(pre_tokenized_latex: str) -> Tuple[str, List[str]]: + + if not pre_tokenized_latex or not isinstance(pre_tokenized_latex, str): + return "", [] + + try: + normalized_latex = normalize_latex(pre_tokenized_latex) + l_split = [token for token in normalized_latex.strip().split(" ") if token] + + idx = 0 + token_list = [] + temp_l_split = list(l_split) + + while idx < len(temp_l_split): + temp_l_split, idx, token_list = token_add_color_RGB( + temp_l_split, idx, token_list + ) + + colored_template_original = " ".join(temp_l_split) + katex_template = re.sub( + r"\\mathcolor\[RGB\]{}", + r"\\color{__COLOR__\1__}", + colored_template_original, + ) + return katex_template, token_list + + except Exception as e: + print(f"\n在 KaTeX 处理过程中出错: {e}", file=sys.stderr) + print(f"有问题的 LaTeX 字符串: {pre_tokenized_latex}", file=sys.stderr) + return "[PROCESSING FAILED]", [] diff --git a/fastcdm/core.py b/fastcdm/core.py new file mode 100644 index 0000000000000000000000000000000000000000..22fc7b1942aa1507cbe23f25253153098ef9f275 --- /dev/null +++ b/fastcdm/core.py @@ -0,0 +1,306 @@ +from fastcdm.render.render_worker import RenderWorker +from fastcdm.matcher import update_inliers, HungarianMatcher, SimpleAffineTransform +from fastcdm.clean import ( + clean, + PATTERN_STRIP_START_BRACKET, + PATTERN_STRIP_END_BRACKET, +) +from fastcdm.tokenize import tokenize +from fastcdm.colorize import process_for_katex, generate_high_contrast_colors +from fastcdm.box import get_bboxes_from_array + +import cv2 +import numpy as np +from typing import List, Tuple +from pathlib import Path +from skimage.measure import ransac +import traceback + +root_dir = Path(__file__).parent +TEMPLATE_FILE = root_dir / "render" / "templates" / "formula.html" + +def preprocess(s: str): + # --- Step 1: Clean & Tokenization --- + clean_s = clean(s) + success_tokenization, tokenized_s = tokenize(clean_s) + + if not success_tokenization: + print("Tokenization failed") + return 0.0 + + # --- Step 2: Prepare Colorized Latex for KaTeX --- + katex_template, token_list = process_for_katex(tokenized_s) + + # Generate colors + num_colors = len(token_list) + 10 + colors_rgb = generate_high_contrast_colors(num_colors) + + final_latex = katex_template + color_map = [] # List of (token, rgb_color) + for c_idx, token in enumerate(token_list): + r, g, b = colors_rgb[c_idx % len(colors_rgb)] + final_latex = final_latex.replace( + f"__COLOR__{c_idx}__", f"#{r:02x}{g:02x}{b:02x}" + ) + color_map.append((token, (r, g, b))) + + # 移除首尾括号 + final_latex = PATTERN_STRIP_START_BRACKET.sub("", final_latex) + final_latex = PATTERN_STRIP_END_BRACKET.sub("", final_latex) + + return final_latex, color_map + + +def render(latex_strings: List[str], chromedriver: str): + try: + worker = RenderWorker( + template_file="file://" + str(TEMPLATE_FILE.resolve()), + timeout=30, + driver_path=chromedriver, + ) + except Exception as e: + print("Failed to init RenderWorker:") + print("="*30) + print(traceback.format_exc(e)) + return None + + try: + latex_strings = [ + f"$${s}$$" if not s.startswith("$$") else s for s in latex_strings + ] + imgs = worker.render(latex_strings) + except Exception as e: + print("Rendering failed:") + print("="*30) + print(traceback.format_exc(e)) + return None + finally: + worker.close() + + assert len(imgs) == len( + latex_strings + ), "Number of rendered images must match number of input strings" + return imgs + + +def calculate_metrics(gt_len, pred_len, match_num): + """计算F1-score, Recall, Precision。""" + recall = match_num / gt_len if gt_len > 0 else 0 + precision = match_num / pred_len if pred_len > 0 else 0 + f1_score = ( + 2 * (precision * recall) / (precision + recall) if recall + precision > 0 else 0 + ) + return f1_score, recall, precision + + +def postprocess( + img_gt: np.ndarray, + img_pred: np.ndarray, + gt_color_map: List[Tuple[str, Tuple[int, int, int]]], + pred_color_map: List[Tuple[str, Tuple[int, int, int]]], + visualize: bool, +): + # Normalize Image Sizes (Max Height/Width) + h_gt, w_gt = img_gt.shape[:2] + h_pred, w_pred = img_pred.shape[:2] + max_h = max(h_gt, h_pred) + max_w = max(w_gt, w_pred) + + # Create canvas + final_gt_img = np.full((max_h, max_w, 3), 255, dtype=np.uint8) + final_gt_img[0:h_gt, 0:w_gt] = img_gt + + final_pred_img = np.full((max_h, max_w, 3), 255, dtype=np.uint8) + final_pred_img[0:h_pred, 0:w_pred] = img_pred + + vis_img = None + + gt_colors = [c[1] for c in gt_color_map] + pred_colors = [c[1] for c in pred_color_map] + + # bboxes format: [xmin, ymin, xmax, ymax] + gt_bboxes_list = get_bboxes_from_array(final_gt_img, gt_colors) + pred_bboxes_list = get_bboxes_from_array(final_pred_img, pred_colors) + + # --- Step 3: Match Tokens --- + # Convert to list of dicts as expected by HungarianMatcher + # item = {'bbox': [xmin, ymin, xmax, ymax], 'token': token_str} + + gt_data = [] + for i, bbox in enumerate(gt_bboxes_list): + if bbox: + gt_data.append({"bbox": bbox, "token": gt_color_map[i][0]}) + + pred_data = [] + for i, bbox in enumerate(pred_bboxes_list): + if bbox: + pred_data.append({"bbox": bbox, "token": pred_color_map[i][0]}) + + matcher = HungarianMatcher() + size_tuple = (max_w, max_h) + + matched_idxes = matcher(gt_data, pred_data, size_tuple, size_tuple) + + # RANSAC Verification + src, dst = [], [] + for idx1, idx2 in matched_idxes: + # Center points + x1_c = (gt_data[idx1]["bbox"][0] + gt_data[idx1]["bbox"][2]) / 2 + y1_c = (gt_data[idx1]["bbox"][1] + gt_data[idx1]["bbox"][3]) / 2 + x2_c = (pred_data[idx2]["bbox"][0] + pred_data[idx2]["bbox"][2]) / 2 + y2_c = (pred_data[idx2]["bbox"][1] + pred_data[idx2]["bbox"][3]) / 2 + src.append([y1_c, x1_c]) + dst.append([y2_c, x2_c]) + + src, dst = np.array(src), np.array(dst) + min_samples = 3 + + if src.shape[0] <= min_samples: + inliers = np.ones(len(matched_idxes), dtype=bool) + else: + inliers = np.zeros(len(matched_idxes), dtype=bool) + for _ in range(5): + if np.sum(~inliers) <= min_samples: + break + # SimpleAffineTransform expects (N, 2) + # RANSAC fits model to data + try: + model, inliers_1 = ransac( + (src[~inliers], dst[~inliers]), + SimpleAffineTransform, + min_samples=min_samples, + residual_threshold=20, + max_trials=50, + ) + if inliers_1 is not None and inliers_1.any(): + inliers = update_inliers(inliers, inliers_1) + else: + break + except Exception: + # Ransac might fail if data is degenerate + break + + # Double check token cost for inliers + for idx, (a, b) in enumerate(matched_idxes): + # matcher.cost['token'] is (gt_len, pred_len) + # If token cost is 1 (completely different), reject even if spatially aligned? + # visual_matcher.py logic: if inliers[idx] and matcher.cost['token'][a, b] == 1: inliers[idx] = False + if inliers[idx] and matcher.cost["token"][a, b] == 1: + inliers[idx] = False + + match_num = np.sum(inliers) + + num_gt = len(gt_bboxes_list) + num_pred = len(pred_bboxes_list) + + f1, recall, precision = calculate_metrics(num_gt, num_pred, match_num) + + if visualize: + vis_img = np.full((max_h * 2 + 10, max_w, 3), 255, dtype=np.uint8) + vis_img[0:max_h, 0:max_w] = final_gt_img + vis_img[max_h + 10 : max_h + 10 + max_h, 0:max_w] = final_pred_img + + # Draw matches that are inliers + for idx, (gt_idx, pred_idx) in enumerate(matched_idxes): + if inliers[idx]: + gt_box = gt_data[gt_idx]["bbox"] + pred_box = pred_data[pred_idx]["bbox"] + + # Draw boxes + cv2.rectangle( + vis_img, + (gt_box[0], gt_box[1]), + (gt_box[2], gt_box[3]), + (0, 255, 0), + 1, + ) + y_offset = max_h + 10 + cv2.rectangle( + vis_img, + (pred_box[0], pred_box[1] + y_offset), + (pred_box[2], pred_box[3] + y_offset), + (0, 0, 255), + 1, + ) + + # Draw line + pt1 = ( + int((gt_box[0] + gt_box[2]) / 2), + int((gt_box[1] + gt_box[3]) / 2), + ) + pt2 = ( + int((pred_box[0] + pred_box[2]) / 2), + int((pred_box[1] + pred_box[3]) / 2) + y_offset, + ) + cv2.line(vis_img, pt1, pt2, (255, 0, 0), 1) + + return (f1, recall, precision, vis_img) if visualize else (f1, recall, precision) + + +class FastCDM: + def __init__(self, chromedriver: str=None) -> None: + self.chromedriver = chromedriver + + def compute(self, gt: str, pred: str, visualize: bool=False) -> tuple: + """ + 计算给定的 GT 和预测 LaTeX 表达式的 CDM 分数。 + + 参数: + gt (str): ground truth LaTeX 表达式。 + pred (str): 预测 LaTeX 表达式。 + + 返回: + tuple: 包含 F1 分数、召回率和准确率的元组。 + """ + gt_latex, gt_color_map = preprocess(gt) + pred_latex, pred_color_map = preprocess(pred) + + imgs = render([gt_latex, pred_latex], self.chromedriver) + gt_img, pred_img = imgs[0], imgs[1] + + result = postprocess(gt_img, pred_img, gt_color_map, pred_color_map, visualize) + return result + + def batch_compute(self, gt_list: list, pred_list: list) -> list: + """ + TODO + 批量计算给定的 GT 和预测 LaTeX 表达式的 CDM 分数。 + + 参数: + gt_list (list): ground truth LaTeX 表达式列表。 + pred_list (list): 预测 LaTeX 表达式列表。 + + 返回: + list: 包含每个表达式的 F1 分数、召回率和准确率的元组列表。 + """ + raise NotImplementedError("batch_compute is not implemented yet.") + + +if __name__ == "__main__": + # gt = r"A_{M123}=u\,A^{M}" + # pred = r"A_M123 = \hat{u} A^M" + + # gt = r"r = \frac { \alpha } { \beta } \vert \sin \beta \left( \sigma _ { 1 } \pm \sigma _ { 2 } \right) \vert" + # pred = r"r={\frac{\alpha}{\beta}}|\sin\beta\left(\sigma_{2}+\sigma_{1}\right)|" + + # gt = r"\frac{1}{2}" + # pred = r"\frac{1}{2}" + + # gt = r"\tilde{\theta}_k(t)=\frac{\hat{\theta}_k(t+1)-\hat{\theta}_k(t)}{T_s}" + # pred = r"\tilde{\theta}_k(t)=\frac{\hat{\theta}_k(t+1)-\hat{\theta}_k(t)}{T_s}" + + gt = r"\begin{bmatrix}(\mathbf{I}-\mathbf{A}^{\mathsf{DD}})&-\mathbf{A}^{\mathsf{DP }}&-\mathbf{A}^{\mathsf{DN}}\\ 0&\mathbf{I}&0\\ -\mathbf{A}^{\mathsf{ND}}&-\mathbf{A}^{\mathsf{NP}}&(\mathbf{I}-\mathbf{A}^{ \mathsf{NN}})\end{bmatrix}^{-1}=\begin{bmatrix}\mathbf{B}^{\mathsf{DD}}& \mathbf{B}^{\mathsf{DP}}&\mathbf{B}^{\mathsf{DN}}\\ \mathbf{B}^{\mathsf{PD}}&\mathbf{B}^{\mathsf{PP}}&\mathbf{B}^{\mathsf{PN}}\\ \mathbf{B}^{\mathsf{ND}}&\mathbf{B}^{\mathsf{NP}}&\mathbf{B}^{\mathsf{NN}} \end{bmatrix}" + pred = r"\left[ \begin{array} { c c c } { ( I - A ^ { \mathrm { D D } } ) } & { - A ^ { \mathrm { D P } } } & { - A ^ { \mathrm { D N } } } \\ { 0 } & { \mathbf { I } } & { 0 } \\ { - A ^ { \mathrm { N D } } } & { - A ^ { \mathrm { N P } } } & { ( I - A ^ { \mathrm { N N } } ) } \end{array} \right] ^ { - 1 } = \left[ \begin{array} { c c c } { \mathbf { B } ^ { \mathrm { D D } } } & { \mathbf { B } ^ { \mathrm { D P } } } & { \mathbf { B } ^ { \mathrm { D N } } } \\ { \mathbf { B } ^ { \mathrm { P D } } } & { \mathbf { B } ^ { \mathrm { P P } } } & { \mathbf { B } ^ { \mathrm { P N } } } \\ { \mathbf { B } ^ { \mathrm { N D } } } & { \mathbf { B } ^ { \mathrm { N P } } } & { \mathbf { B } ^ { \mathrm { N N } } } \end{array} \right]" + + fastcdm = FastCDM(chromedriver="driver/chromedriver") + res = fastcdm.compute(gt, pred, visualize=True) + f1, recall, precision, vis_img = res + print(f"CDM Score (F1): {f1:.4f}") + print(f"Recall: {recall:.4f}") + print(f"Precision: {precision:.4f}") + try: + out_dir = Path(__file__).parent.parent / "vis" + out_dir.mkdir(parents=True, exist_ok=True) + cv2.imwrite(str(out_dir / "match_vis.png"), vis_img) + except Exception: + pass diff --git a/fastcdm/latex_processor.py b/fastcdm/latex_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..bb262d720fb3ee96701b3ef48eb2550eb0036d87 --- /dev/null +++ b/fastcdm/latex_processor.py @@ -0,0 +1,814 @@ +import re + +# 以下列表定义了在后续 token_add_color 系列函数中“跳过”的正则模式。 +# 任何匹配这些模式的 token 都不会被着色,而是保持原样(通常用黑色标记)。 +# 主要用于括号、环境边界、上下标等结构元素。 +SKIP_PATTERNS = [ + r"\{", + r"\}", + r"[\[\]]", + r"\\begin\{.*?\}", + r"\\end\{.*?\}", + r"\^", + r"\_", + r"\\.*rule.*", + r"\\.*line.*", + r"\[[\-.0-9]+[epm][xtm]\]", +] + +# 以下列表中的 LaTeX 命令在后续处理中被视为“透明”或“无意义”的 token。 +# 它们不会触发着色逻辑,直接跳过,避免干扰真正的数学内容。 +SKIP_Tokens = [ + "\\", + "\\\\", + "\\index", + "\\a", + "&", + "$", + "\\multirow", + "\\def", + "\\edef", + "\\raggedright", + "\\url", + "\\cr", + "\\ensuremath", + "\\left", + "\\left[", + "\\left(", + "\\left{", + "\\right", + "\\right]", + "\\right)", + "\\right}", + "\\mathchoice", + "\\scriptstyle", + "\\displaystyle", + "\\qquad", + "\\quad", + "\\,", + "\\!", + "~", + "\\boldmath", + "\\gdef", + "\\today", + "\\the", +] + +# PHANTOM_Tokens 中的命令在后续着色时被视为“幻影”命令: +# 它们本身不直接参与颜色标记,但其参数仍需递归处理。 +# 主要用于字体、引用、颜色等不影响数学结构的命令。 +PHANTOM_Tokens = [ + "\\fontfamily", + "\\vphantom", + "\\phantom", + "\\rowcolor", + "\\ref", + "\\thesubequation", + "\\global", + "\\theboldgroup", +] + +# 以下命令在后续处理中被识别为“双尾”命令:它们需要两个 {} 参数。 +# 例如 \frac{分子}{分母},在着色时会分别对两个参数进行灰色处理。 +TWO_Tail_Tokens = ["\\frac", "\\binom"] + +# AB_Tail_Tokens 中的命令具有“可选+必选”参数结构: +# 第一个参数可以是 [],第二个必须是 {}。 +# 例如 \xrightarrow[下方]{上方},在着色时会分别处理两个参数。 +AB_Tail_Tokens = ["\\xrightarrow", "\\xleftarrow", "\\sqrt"] # special token \xxx [] {} + +# 以下命令也是“双尾”但被视为“不可见”结构,着色逻辑与 TWO_Tail_Tokens 类似, +# 但通常用于上下堆叠等排版,不影响数学含义。 +TWO_Tail_Invisb_Tokens = ["\\overset", "\\underset", "\\stackrel"] + +# ONE_Tail_Tokens 中的命令只需一个 {} 参数,且会显著改变数学符号外观。 +# 在着色时,命令本身保持黑色,参数内容置灰。 +ONE_Tail_Tokens = [ + "\\widetilde", + "\\overline", + "\\hat", + "\\widehat", + "\\tilde", + "\\Tilde", + "\\dot", + "\\bar", + "\\vec", + "\\underline", + "\\underbrace", + "\\check", + "\\breve", + "\\Bar", + "\\Vec", + "\\mathring", + "\\ddot", + "\\Ddot", + "\\dddot", + "\\ddddot", +] + +# ONE_Tail_Invisb_Tokens 中的命令同样只需一个 {} 参数, +# 但主要用于字体或样式切换,不改变数学含义,因此整体视为“不可见”, +# 在着色时命令本身不标记,仅对其参数递归处理。 +ONE_Tail_Invisb_Tokens = [ + "\\boldsymbol", + "\\pmb", + "\\textbf", + "\\mathrm", + "\\mathbf", + "\\mathsf", + "\\mathbb", + "\\mathcal", + "\\mathinner", + "\\mathit", + "\\mathnormal", + "\\mathring", + "\\mathscr", + "\\mathtt", + "\\textmd", + "\\texttt", + "\\textnormal", + "\\text", + "\\textit", + "\\textup", + "\\mathop", + "\\mathbin", + "\\smash", + "\\operatorname", + "\\textrm", + "\\mathfrak", + "\\emph", + "\\textsf", + "\\textsc", +] + + +def flatten_multiline(latex): + brace_map = { + "\\left(": "\\right)", + "\\left[": "\\right]", + "\\left{": "\\right}", + } + l_split = latex.split(" ") + if l_split[0] == "\\begin{array}": + if l_split[-1] == "\\end{array}": + l_split = l_split[2:-1] + else: + l_split = l_split[2:] + + idx = 0 + while idx < len(l_split): + token = l_split[idx] + if token.startswith("\\left") and token in brace_map.keys(): + end_idx = find_matching_brace(l_split, idx, brace=[token, brace_map[token]]) + if end_idx != -1: + idx = end_idx + elif token in ["\\\\", "~", "\\qquad"]: + l_split = l_split[0:idx] + l_split[idx + 1 :] + idx -= 1 + idx += 1 + latex = " ".join(l_split) + return "$ " + latex + " $" + + +def clean_latex(text): + cleaned_text = re.sub(r"(?<=[^\\])\s+(?=[^\\])", "", text) + for item in [ + "\\hline", + "\\midrule", + "\\times", + "\\bf", + "\\footnotesize", + "\\cr", + "\\log", + ]: + cleaned_text = cleaned_text.replace(item, item + " ") + cleaned_text = cleaned_text.replace(" \\mathcolor{black}", "\\mathcolor{black}") + return cleaned_text + + +def remove_trailing_latex(formula): + pattern = r"(\\(hspace\*?\{[^{}]*?\}|vspace\*?\{[^{}]*?\}|smallskip|medskip|quad|qquad|bigskip|[;,])|\~|\.)*$" + cleaned_formula = re.sub(pattern, "", formula, count=1) + return cleaned_formula + + +def find_matching_brace(sequence, start_index, brace=["{", "}"]): + left_brace, right_brace = brace + depth = 0 + for i, char in enumerate(sequence[start_index:], start=start_index): + if char == left_brace: + depth += 1 + elif char == right_brace: + depth -= 1 + if depth == 0: + return i + if depth > 0: + error_info = "Warning! found no matching brace in sequence !" + raise ValueError(error_info) + return -1 + + +def normalize_latex(l, rm_trail=False): + if "tabular" in l: + latex_type = "tabular" + else: + latex_type = "formula" + + if rm_trail: + l = remove_trailing_latex(l) + l = l.strip().replace(r"\pmatrix", r"\mypmatrix").replace(r"\matrix", r"\mymatrix") + + for item in ["\\raggedright", "\\arraybackslash"]: + l = l.replace(item, "") + + for item in ["\\lowercase", "\\uppercase"]: + l = l.replace(item, "") + + pattern = r"\\[hv]space { [.0-9a-z ]+ }" + old_token = re.findall(pattern, l, re.DOTALL) + if latex_type == "tabular": + new_token = ["" for item in old_token] + else: + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + if latex_type == "tabular": + l = l.replace("\\begin {tabular}", "\\begin{tabular}") + l = l.replace("\\end {tabular}", "\\end{tabular}") + l = l.replace("\\begin {array}", "\\begin{array}") + l = l.replace("\\end {array}", "\\end{array}") + l_split = l.split(" ") + idx = 0 + while idx < len(l_split): + token = l_split[idx] + if token == "\\begin{tabular}": + sub_idx = idx + 1 + end_idx = find_matching_brace(l_split, sub_idx) + new_token = "".join(l_split[idx : end_idx + 1]) + l_split = l_split[0:idx] + [new_token] + l_split[end_idx + 1 :] + break + idx += 1 + l = " ".join(l_split) + l_split = l.split(" ") + idx = 0 + while idx < len(l_split): + token = l_split[idx] + if token in ["\\cmidrule", "\\cline"]: + sub_idx = idx + 1 + if l_split[sub_idx] == "(": + mid_end = find_matching_brace(l_split, sub_idx, brace=["(", ")"]) + end_idx = find_matching_brace(l_split, mid_end + 1) + else: + end_idx = find_matching_brace(l_split, sub_idx) + new_token = "".join(l_split[idx : end_idx + 1]) + l_split = l_split[0:idx] + [new_token] + l_split[end_idx + 1 :] + idx += 1 + l = " ".join(l_split) + + pattern = r"\\begin{array} { [lrc ]+ }" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [ + item.replace("\\begin{array} ", "") + .replace(" ", "") + .replace("", "\\begin{array} ") + for item in old_token + ] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + l = " " + l + " " + l = re.sub(r"(?<=\s)--(?=\s)", r"- -", l) + l = re.sub(r"(?<=\s)---(?=\s)", r"- - -", l) + l = re.sub(r"(?<=\s)…(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\ldots(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\hdots(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\cdots(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dddot(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dots(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dotsc(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dotsi(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dotsm(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dotso(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\dotsb(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\mathellipsis(?=\s)", r". . .", l) + l = re.sub(r"(?<=\s)\\ex(?=\s)", r"\\mathrm { e x }", l) + l = re.sub(r"(?<=\s)\\ln(?=\s)", r"\\mathrm { l n }", l) + l = re.sub(r"(?<=\s)\\lg(?=\s)", r"\\mathrm { l g }", l) + l = re.sub(r"(?<=\s)\\cot(?=\s)", r"\\mathrm { c o t }", l) + l = re.sub(r"(?<=\s)\\mod(?=\s)", r"\\mathrm { m o d }", l) + l = re.sub(r"(?<=\s)\\bmod(?=\s)", r"\\mathrm { m o d }", l) + l = re.sub(r"(?<=\s)\\pmod(?=\s)", r"\\mathrm { m o d }", l) + l = re.sub(r"(?<=\s)\\min(?=\s)", r"\\mathrm { m i n }", l) + l = re.sub(r"(?<=\s)\\max(?=\s)", r"\\mathrm { m a x }", l) + l = re.sub(r"(?<=\s)\\ker(?=\s)", r"\\mathrm { k e r }", l) + l = re.sub(r"(?<=\s)\\hom(?=\s)", r"\\mathrm { h o m }", l) + l = re.sub(r"(?<=\s)\\sec(?=\s)", r"\\mathrm { s e c }", l) + l = re.sub(r"(?<=\s)\\scs(?=\s)", r"\\mathrm { s c s }", l) + l = re.sub(r"(?<=\s)\\csc(?=\s)", r"\\mathrm { c s c }", l) + l = re.sub(r"(?<=\s)\\deg(?=\s)", r"\\mathrm { d e g }", l) + l = re.sub(r"(?<=\s)\\arg(?=\s)", r"\\mathrm { a r g }", l) + l = re.sub(r"(?<=\s)\\log(?=\s)", r"\\mathrm { l o g }", l) + l = re.sub(r"(?<=\s)\\dim(?=\s)", r"\\mathrm { d i m }", l) + l = re.sub(r"(?<=\s)\\exp(?=\s)", r"\\mathrm { e x p }", l) + l = re.sub(r"(?<=\s)\\sin(?=\s)", r"\\mathrm { s i n }", l) + l = re.sub(r"(?<=\s)\\cos(?=\s)", r"\\mathrm { c o s }", l) + l = re.sub(r"(?<=\s)\\tan(?=\s)", r"\\mathrm { t a n }", l) + l = re.sub(r"(?<=\s)\\tanh(?=\s)", r"\\mathrm { t a n h }", l) + l = re.sub(r"(?<=\s)\\cosh(?=\s)", r"\\mathrm { c o s h }", l) + l = re.sub(r"(?<=\s)\\sinh(?=\s)", r"\\mathrm { s i n h }", l) + l = re.sub(r"(?<=\s)\\coth(?=\s)", r"\\mathrm { c o t h }", l) + l = re.sub(r"(?<=\s)\\arcsin(?=\s)", r"\\mathrm { a r c s i n }", l) + l = re.sub(r"(?<=\s)\\arccos(?=\s)", r"\\mathrm { a r c c o s }", l) + l = re.sub(r"(?<=\s)\\arctan(?=\s)", r"\\mathrm { a r c t a n }", l) + l = re.sub(r"(?<=\s)\\bf([a-zA-Z])", r"\\mathbf{\1}", l) + + pattern = r"\\string [^ ]+ " + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft + " ") + + pattern = r"\\[Bb]ig[g]?[glrm]? [(){}|\[\]] " + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft + " ") + + pattern = r"\\[Bb]ig[g]?[glrm]? \\.*? " + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft + " ") + + pattern = r"\\operatorname \*" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = ["\\operatorname" for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + l = l.replace("\\lefteqn", "") + + l = l.replace("\\footnote ", "^ ") + + pattern = r"\\\' [^{] " + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft + " ") + + if latex_type == "tabular": + pattern = r"\[ [\-.0-9 ]+[exptcm ]+ \]" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + pattern = r"\\parbox {[^{]+}" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + pattern = r"\\raisebox {[^{]+} [\[\]0-9 exptcm]+{" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft[0:-1] + " {") + + pattern = r"{ \\char[0-9\' ]+}" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, "{ " + aft[1:-1] + " }") + + pattern = r"\\rule {[ .0-9a-z]+} {[ .0-9a-z]+}" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + pattern = r"\\specialrule {[ .0-9a-z]+} {[ .0-9a-z]+} {[ .0-9a-z]+}" + old_token = re.findall(pattern, l, re.DOTALL) + new_token = [item.replace(" ", "") for item in old_token] + for bef, aft in zip(old_token, new_token): + l = l.replace(bef, aft) + + pattern = r"\\colorbox[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } |\\color[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } |\\textcolor[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } |\\cellcolor[ \[\]RGBrgb]+{ [A-Za-z 0-9,!]+ } " + old_token = re.findall(pattern, l, re.DOTALL) + for bef in old_token: + l = l.replace(bef, "") + + l_split = l.split(" ") + idx = 0 + while idx < len(l_split): + token = l_split[idx] + if token in ONE_Tail_Tokens + ONE_Tail_Invisb_Tokens: + sub_idx = idx + 1 + while ( + sub_idx < len(l_split) + and l_split[sub_idx] in ONE_Tail_Tokens + ONE_Tail_Invisb_Tokens + ): + sub_idx += 1 + new_split = l_split[0:idx] + for ii in range(idx, sub_idx): + new_split = new_split + [l_split[ii], "{"] + if l_split[sub_idx] != "{": + new_split = new_split + [l_split[sub_idx]] + ["}"] * (sub_idx - idx) + l_split = new_split + l_split[sub_idx + 1 :] + else: + end_idx = find_matching_brace(l_split, sub_idx) + new_split = ( + new_split + l_split[sub_idx + 1 : end_idx] + ["}"] * (sub_idx - idx) + ) + l_split = new_split + l_split[end_idx + 1 :] + elif token in AB_Tail_Tokens: + if l_split[idx + 1] != "[" and l_split[idx + 1] != "{": + l_split = ( + l_split[0 : idx + 1] + + ["{"] + + [l_split[idx + 1]] + + ["}"] + + l_split[idx + 2 :] + ) + else: + if l_split[idx + 1] == "[": + end1 = find_matching_brace(l_split, idx + 1, brace=["[", "]"]) + else: + end1 = idx + if l_split[end1 + 1] != "{": + l_split = ( + l_split[0 : end1 + 1] + + ["{"] + + [l_split[end1 + 1]] + + ["}"] + + l_split[end1 + 2 :] + ) + elif token in TWO_Tail_Tokens + TWO_Tail_Invisb_Tokens: + if l_split[idx + 1] != "{": + l_split = ( + l_split[0 : idx + 1] + + ["{"] + + [l_split[idx + 1]] + + ["}"] + + l_split[idx + 2 :] + ) + end1 = find_matching_brace(l_split, idx + 1) + if l_split[end1 + 1] != "{": + l_split = ( + l_split[0 : end1 + 1] + + ["{"] + + [l_split[end1 + 1]] + + ["}"] + + l_split[end1 + 2 :] + ) + + idx += 1 + l = " ".join(l_split) + + return l + + +def token_add_color(l_split, idx, render_dict): + token = l_split[idx] + if token in PHANTOM_Tokens: + if l_split[idx + 1] == "{": + brace_end = find_matching_brace(l_split, idx + 1) + else: + brace_end = idx + 1 + next_idx = brace_end + 1 + elif token in TWO_Tail_Tokens: + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + den_start = num_end + 1 + den_end = find_matching_brace(l_split, den_start) + l_split_copy = ( + l_split[:idx] + + [r"\mathcolor{black}{" + token + "{"] + + [r"\mathcolor{gray}{"] + + l_split[num_start + 1 : num_end] + + ["}"] + + [r"}{"] + + [r"\mathcolor{gray}{"] + + l_split[den_start + 1 : den_end] + + ["}"] + + ["}"] + + ["}"] + + l_split[den_end + 1 :] + ) + + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, token + next_idx = idx + 1 + elif token in ONE_Tail_Tokens: + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + l_split_copy = ( + l_split[:idx] + + [r"\mathcolor{black}{"] + + l_split[idx : num_start + 1] + + [r"\mathcolor{gray}{"] + + l_split[num_start + 1 : num_end] + + ["}"] + + l_split[num_end : num_end + 1] + + ["}"] + + l_split[num_end + 1 :] + ) + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, token + next_idx = idx + 1 + elif token in ONE_Tail_Invisb_Tokens: + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + sub_idx = num_start + 1 + if num_end - num_start == 2: + l_split_copy = l_split.copy() + l_split_copy[sub_idx] = ( + r"{\mathcolor{black}{" + l_split_copy[sub_idx] + "}}" + ) + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, l_split[sub_idx] + next_idx = num_end + else: + while sub_idx < num_end: + l_split, sub_idx, render_dict = token_add_color( + l_split, sub_idx, render_dict + ) + next_idx = num_end + 1 + elif token in AB_Tail_Tokens: + if l_split[idx + 1] == "{": + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + l_split_copy = ( + l_split[:idx] + + [r"\mathcolor{black}{"] + + l_split[idx : idx + 2] + + [r"\mathcolor{gray}{"] + + l_split[num_start + 1 : num_end] + + ["}}"] + + l_split[num_end:] + ) + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, token + sub_idx = num_start + 1 + while sub_idx < num_end: + l_split, sub_idx, render_dict = token_add_color( + l_split, sub_idx, render_dict + ) + next_idx = num_end + 1 + elif l_split[idx + 1] == "[": + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start, brace=["[", "]"]) + den_start = num_end + 1 + den_end = find_matching_brace(l_split, den_start) + l_split_copy = ( + l_split[:idx] + + [r"{\mathcolor{black}{"] + + l_split[idx : idx + 2] + + [r"\mathcolor{gray}{"] + + l_split[idx + 2 : num_end] + + ["}"] + + l_split[num_end : den_start + 1] + + [r"\mathcolor{gray}{"] + + l_split[den_start + 1 : den_end] + + ["}"] + + l_split[den_end : den_end + 1] + + ["}}"] + + l_split[den_end + 1 :] + ) + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, token + sub_idx = num_start + 1 + while sub_idx < num_end: + l_split, sub_idx, render_dict = token_add_color( + l_split, sub_idx, render_dict + ) + sub_idx = den_start + 1 + while sub_idx < den_end: + l_split, sub_idx, render_dict = token_add_color( + l_split, sub_idx, render_dict + ) + next_idx = den_end + 1 + elif token in ["\\multicolumn", "\\multirow"]: + first_start = idx + 1 + first_end = find_matching_brace(l_split, first_start) + second_start = first_end + 1 + second_end = find_matching_brace(l_split, second_start) + third_start = second_end + 1 + third_end = find_matching_brace(l_split, third_start) + + sub_idx = third_start + 1 + while sub_idx < third_end: + l_split, sub_idx, render_dict = token_add_color( + l_split, sub_idx, render_dict + ) + next_idx = third_end + 1 + elif token in SKIP_Tokens + TWO_Tail_Invisb_Tokens or any( + re.match(pattern, token) for pattern in SKIP_PATTERNS + ): + if (token == "[" and l_split[idx - 1] != "\\sqrt") or ( + token == "]" and idx >= 3 and l_split[idx - 3] != "\\sqrt" + ): + l_split_copy = l_split.copy() + l_split_copy[idx] = r"\mathcolor{black}{ " + l_split_copy[idx] + " }" + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, token + next_idx = idx + 1 + else: + next_idx = idx + 1 + else: + l_split_copy = l_split.copy() + l_split_copy[idx] = r"\mathcolor{black}{ " + l_split_copy[idx] + " }" + + l_new = " ".join(l_split_copy) + l_new = r"\mathcolor{gray}{ " + l_new + " }" + render_dict[str(idx)] = l_new, token + next_idx = idx + 1 + + return l_split, next_idx, render_dict + + +def token_add_color_RGB(l_split, idx, token_list, brace_color=False): + """using \mathcolor[RGB]{r,g,b} to render latex.""" + token = l_split[idx] + if not token: + next_idx = idx + 1 + elif token in PHANTOM_Tokens: + if l_split[idx + 1] == "{": + brace_end = find_matching_brace(l_split, idx + 1) + else: + brace_end = idx + 1 + next_idx = brace_end + 1 + elif token in TWO_Tail_Tokens: + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + den_start = num_end + 1 + den_end = find_matching_brace(l_split, den_start) + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + l_split = ( + l_split[:idx] + + [color_token + token] + + l_split[idx + 1 : den_end + 1] + + ["}"] + + l_split[den_end + 1 :] + ) + token_list.append(token) + next_idx = idx + 1 + elif token in ONE_Tail_Tokens: + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + if ( + token != "\\underbrace" + and num_end + 1 < len(l_split) + and l_split[num_end + 1] == "_" + ): + l_split = ( + l_split[:idx] + + ["{" + color_token + token] + + l_split[idx + 1 : num_end + 1] + + ["}}"] + + l_split[num_end + 1 :] + ) + else: + l_split = ( + l_split[:idx] + + [color_token + token] + + l_split[idx + 1 : num_end + 1] + + ["}"] + + l_split[num_end + 1 :] + ) + token_list.append(token) + next_idx = idx + 1 + elif token in ONE_Tail_Invisb_Tokens: + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + sub_idx = num_start + 1 + if num_end - num_start == 2: + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + token_list.append(l_split[num_start + 1]) + l_split = ( + l_split[: num_start + 1] + + [color_token + l_split[num_start + 1] + "}"] + + l_split[num_end:] + ) + else: + while sub_idx < num_end: + l_split, sub_idx, token_list = token_add_color_RGB( + l_split, sub_idx, token_list + ) + next_idx = num_end + 1 + elif token in AB_Tail_Tokens: + if l_split[idx + 1] == "{": + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start) + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + l_split = ( + l_split[:idx] + + [color_token + token] + + l_split[idx + 1 : num_end + 1] + + ["}"] + + l_split[num_end + 1 :] + ) + token_list.append(token) + sub_idx = num_start + 1 + while sub_idx < num_end: + l_split, sub_idx, token_list = token_add_color_RGB( + l_split, sub_idx, token_list + ) + next_idx = num_end + 1 + elif l_split[idx + 1] == "[": + num_start = idx + 1 + num_end = find_matching_brace(l_split, num_start, brace=["[", "]"]) + den_start = num_end + 1 + den_end = find_matching_brace(l_split, den_start) + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + l_split = ( + l_split[:idx] + + [color_token + token] + + l_split[idx + 1 : den_end + 1] + + ["}"] + + l_split[den_end + 1 :] + ) + token_list.append(token) + sub_idx = num_start + 1 + while sub_idx < num_end: + l_split, sub_idx, token_list = token_add_color_RGB( + l_split, sub_idx, token_list, brace_color=True + ) + sub_idx = den_start + 1 + while sub_idx < den_end: + l_split, sub_idx, token_list = token_add_color_RGB( + l_split, sub_idx, token_list + ) + next_idx = den_end + 1 + elif token in ["\\multicolumn", "\\multirow"]: + first_start = idx + 1 + first_end = find_matching_brace(l_split, first_start) + second_start = first_end + 1 + second_end = find_matching_brace(l_split, second_start) + third_start = second_end + 1 + third_end = find_matching_brace(l_split, third_start) + + sub_idx = third_start + 1 + while sub_idx < third_end: + l_split, sub_idx, token_list = token_add_color_RGB( + l_split, sub_idx, token_list + ) + next_idx = third_end + 1 + elif token in SKIP_Tokens + TWO_Tail_Invisb_Tokens or any( + re.match(pattern, token) for pattern in SKIP_PATTERNS + ): + + if (token == "[" and l_split[idx - 1] != "\\sqrt") or ( + token == "]" and idx >= 3 and l_split[idx - 3] != "\\sqrt" + ): + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + l_split = ( + l_split[:idx] + [color_token + l_split[idx] + "}"] + l_split[idx + 1 :] + ) + token_list.append(token) + next_idx = idx + 1 + else: + next_idx = idx + 1 + else: + + if brace_color or (idx > 1 and l_split[idx - 1] == "_"): + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + l_split = ( + l_split[:idx] + + ["{" + color_token + l_split[idx] + "}}"] + + l_split[idx + 1 :] + ) + token_list.append(token) + next_idx = idx + 1 + else: + color_token = "\\mathcolor[RGB]{>}{".replace( + "", str(len(token_list)) + ) + l_split = ( + l_split[:idx] + [color_token + l_split[idx] + "}"] + l_split[idx + 1 :] + ) + token_list.append(token) + next_idx = idx + 1 + return l_split, next_idx, token_list diff --git a/fastcdm/matcher.py b/fastcdm/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..b73f3e743e64451f6da6449966b5ce1000810c61 --- /dev/null +++ b/fastcdm/matcher.py @@ -0,0 +1,222 @@ +import cv2 +import numpy as np +from scipy.spatial.distance import cdist +from scipy.optimize import linear_sum_assignment + + +class SimpleAffineTransform: + def __init__(self, translation=(0, 0), scale=1.0): + self.translation = np.array(translation) + self.scale = scale + + def estimate(self, src, dst): + src_center = np.mean(src, axis=0) + dst_center = np.mean(dst, axis=0) + self.translation = dst_center - src_center + src_dists = np.linalg.norm(src - src_center, axis=1) + dst_dists = np.linalg.norm(dst - dst_center, axis=1) + self.scale = np.mean(dst_dists) / (np.mean(src_dists) + 1e-10) + + def inverse(self): + return SimpleAffineTransform(-self.translation, 1.0 / self.scale) + + def __call__(self, coords): + return ( + self.scale * (coords - np.mean(coords, axis=0)) + + np.mean(coords, axis=0) + + self.translation + ) + + def residuals(self, src, dst): + return np.sqrt(np.sum((self(src) - dst) ** 2, axis=1)) + + +def norm_coords(x, left, right): + if x < left: + return left + if x > right: + return right + return x + + +def norm_same_token(token): + special_map = { + "\\dot": ".", + "\\Dot": ".", + "\\cdot": ".", + "\\cdotp": ".", + "\\ldotp": ".", + "\\mid": "|", + "\\rightarrow": "\\to", + "\\top": "T", + "\\Tilde": "\\tilde", + "\\prime": "'", + "\\ast": "*", + "\\left<": "\\langle", + "\\right>": "\\rangle", + "\\lbrace": "\{", + "\\rbrace": "\}", + "\\lbrack": "[", + "\\rbrack": "]", + "\\blackslash": "/", + "\\slash": "/", + "\\leq": "\\le", + "\\geq": "\\ge", + "\\neq": "\\ne", + "\\Vert": "\\|", + "\\lVert": "\\|", + "\\rVert": "\\|", + "\\vert": "|", + "\\lvert": "|", + "\\rvert": "|", + "\\colon": ":", + "\\Ddot": "\\ddot", + "\\Bar": "\\bar", + "\\Vec": "\\vec", + "\\parallel": "\\|", + "\\dag": "\\dagger", + "\\ddag": "\\ddagger", + "\\textlangle": "<", + "\\textrangle": ">", + "\\textgreater": ">", + "\\textless": "<", + "\\textbackslash": "\\", + "\\textunderscore": "_", + "\\=": "=", + "\\neg": "\\lnot", + "\\neq": "\\not=", + } + if token.startswith("\\left") or token.startswith("\\right"): + if ( + "arrow" not in token + and "<" not in token + and ">" not in token + and "harpoon" not in token + ): + token = token.replace("\\left", "").replace("\\right", "") + if token.startswith("\\big") or token.startswith("\\Big"): + if "\\" in token[4:]: + token = "\\" + token[4:].split("\\")[-1] + else: + token = token[-1] + if token in special_map.keys(): + token = special_map[token] + if token.startswith("\\wide"): + return token.replace("wide", "") + if token.startswith("\\var"): + return token.replace("var", "") + if token.startswith("\\string"): + return token.replace("\\string", "") + return token + + +class HungarianMatcher: + def __init__( + self, + cost_token: float = 1, + cost_position: float = 0.05, + cost_order: float = 0.15, + ): + self.cost_token = cost_token + self.cost_position = cost_position + self.cost_order = cost_order + self.cost = {} + + def calculate_token_cost(self, box_gt, box_pred): + + all_tokens = [data["token"] for data in box_gt + box_pred] + unique_tokens = sorted(list(set(all_tokens))) + token2id = {token: i for i, token in enumerate(unique_tokens)} + num_classes = len(token2id) + + all_norm_tokens = [norm_same_token(data["token"]) for data in box_gt + box_pred] + unique_norm_tokens = sorted(list(set(all_norm_tokens))) + token2id_norm = {token: i for i, token in enumerate(unique_norm_tokens)} + num_classes_norm = len(token2id_norm) + + gt_token_array = np.array([token2id[data["token"]] for data in box_gt]) + norm_gt_token_array = np.array( + [token2id_norm[norm_same_token(data["token"])] for data in box_gt] + ) + + pred_token_logits = np.zeros((len(box_pred), num_classes)) + for i, data in enumerate(box_pred): + if data["token"] in token2id: + pred_token_logits[i, token2id[data["token"]]] = 1 + + norm_pred_token_logits = np.zeros((len(box_pred), num_classes_norm)) + for i, data in enumerate(box_pred): + norm_token = norm_same_token(data["token"]) + if norm_token in token2id_norm: + norm_pred_token_logits[i, token2id_norm[norm_token]] = 1 + + if gt_token_array.size == 0 or pred_token_logits.shape[0] == 0: + return np.empty((len(box_gt), len(box_pred))) + + token_cost = 1.0 - pred_token_logits[:, gt_token_array] + norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array] + + token_cost[np.logical_and(token_cost == 1, norm_token_cost == 0)] = 0.005 + return token_cost.T + + def box2array(self, box_list, size): + W, H = size + box_array = [] + for box in box_list: + x_min, y_min, x_max, y_max = box["bbox"] + box_array.append( + [ + (x_min + x_max) / (2 * W), + (y_min + y_max) / (2 * H), + (x_max - x_min) / W, + (y_max - y_min) / H, + ] + ) + return np.array(box_array) + + def order2array(self, box_list, max_token_lens=None): + if not max_token_lens: + max_token_lens = len(box_list) + return np.array([[idx / max_token_lens] for idx, _ in enumerate(box_list)]) + + def calculate_l1_cost(self, gt_array, pred_array): + if gt_array.shape[0] == 0 or pred_array.shape[0] == 0: + return np.empty((gt_array.shape[0], pred_array.shape[0])) + return cdist(gt_array, pred_array, "minkowski", p=1) / gt_array.shape[-1] + + def __call__(self, box_gt, box_pred, gt_size, pred_size): + if not box_gt or not box_pred: + return [] + gt_box_array = self.box2array(box_gt, gt_size) + pred_box_array = self.box2array(box_pred, pred_size) + max_token_lens = max(len(box_gt), len(box_pred)) + gt_order_array = self.order2array(box_gt, max_token_lens) + pred_order_array = self.order2array(box_pred, max_token_lens) + token_cost = self.calculate_token_cost(box_gt, box_pred) + position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array) + order_cost = self.calculate_l1_cost(gt_order_array, pred_order_array) + self.cost = { + "token": token_cost, + "position": position_cost, + "order": order_cost, + } + cost = ( + self.cost_token * token_cost + + self.cost_position * position_cost + + self.cost_order * order_cost + ) + cost[np.isnan(cost) | np.isinf(cost)] = 100 + row_ind, col_ind = linear_sum_assignment(cost) + return list(zip(row_ind, col_ind)) + + +def update_inliers(ori_inliers, sub_inliers): + inliers = np.copy(ori_inliers) + sub_idx = -1 + for idx in range(len(ori_inliers)): + if ori_inliers[idx] == False: + sub_idx += 1 + if sub_inliers[sub_idx] == True: + inliers[idx] = True + return inliers + diff --git a/fastcdm/render/render_worker.py b/fastcdm/render/render_worker.py new file mode 100644 index 0000000000000000000000000000000000000000..2e2b2fbef4dc4fcb7546cecaa16f45b34493cf8b --- /dev/null +++ b/fastcdm/render/render_worker.py @@ -0,0 +1,179 @@ +import os +import cv2 +import random +import numpy as np +from typing import List + +from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromeService +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from webdriver_manager.chrome import ChromeDriverManager + + +class RenderWorker: + """ + 一个使用 Selenium Headless Chrome 渲染HTML内容的工具类。 + 它可以加载一个HTML模板,通过JavaScript渲染内容(如数学公式), + 并截取渲染后各元素的图像。 + """ + + def __init__(self, template_file: str, timeout: int = 15, driver_path: str = None): + # --- 配置浏览器选项 --- + opts = Options() + opts.add_argument("--headless") + opts.add_argument("--no-sandbox") + opts.add_argument("--disable-gpu") + opts.add_argument("--hide-scrollbars") + opts.add_argument("--disable-dev-shm-usage") + opts.add_argument("--log-level=3") + opts.add_experimental_option("excludeSwitches", ["enable-logging"]) + opts.add_argument("--disable-font-antialiasing") + opts.add_argument("--allow-file-access-from-files") + + # --- 使用 webdriver-manager 自动管理 ChromeDriver(需要代理下载)--- + if driver_path is None: + driver_path = ChromeDriverManager().install() + print(f"Installed driver_path: {driver_path}") + elif not os.path.exists(driver_path): + # TODO:如果指定路径不存在,则在该路径安装 ChromeDriver + raise FileNotFoundError(f"ChromeDriver 未找到:{driver_path}") + + # 临时清除代理环境变量,避免 Selenium 连接到 ChromeDriver 时出现问题 + saved_http_proxy = os.environ.pop("http_proxy", None) + saved_https_proxy = os.environ.pop("https_proxy", None) + + try: + service = ChromeService(driver_path) + # --- 初始化 WebDriver --- + self.driver = webdriver.Chrome(service=service, options=opts) + finally: + # 恢复代理设置 + if saved_http_proxy: + os.environ["http_proxy"] = saved_http_proxy + if saved_https_proxy: + os.environ["https_proxy"] = saved_https_proxy + + self.driver.get(template_file) + + self.timeout = timeout + + # 定义一个固定的窗口宽度 + self.window_fix_width = 2000 + self.window_init_height = 300 + + self.driver.set_window_size(self.window_fix_width, self.window_init_height) + + # 动态计算浏览器边框和工具栏的高度 + self.outer_height = self.window_init_height - self.driver.execute_script( + "return window.innerHeight" + ) + + # 等待页面容器加载完成 + WebDriverWait(self.driver, self.timeout).until( + EC.presence_of_all_elements_located((By.ID, "container")) + ) + + def render(self, contents: List[str]) -> List[np.ndarray]: + """ + 渲染一组内容并返回每个元素的截图。 + """ + # 通过JS调用页面内的render函数 + self.driver.execute_script( + "document.body.classList.remove('rendering-complete');" + ) + self.driver.execute_script(f"render({contents}, false)") + + # 等待JS渲染完成的信号 + WebDriverWait(self.driver, self.timeout).until( + EC.presence_of_element_located((By.CLASS_NAME, "rendering-complete")) + ) + + # 根据内容的总高度调整窗口大小,以确保能截取完整图像 + scroll_height = self.driver.execute_script( + "return document.getElementById('container').scrollHeight" + ) + # Chrome 窗口高度限制(通常最大约为 2^31 像素,但实际会更小) + # 设置一个安全的最大高度值 + MAX_WINDOW_HEIGHT = 10000 + # 确保 outer_height 不会导致负值或过小值 + # 使用绝对值并添加额外的边距以确保内容完全可见 + safe_outer_height = max(abs(self.outer_height), 100) + target_height = min( + max(scroll_height + safe_outer_height, 100), MAX_WINDOW_HEIGHT + ) + self.driver.set_window_size(self.window_fix_width, target_height) + + # 获取整个页面的截图 + png = self.driver.get_screenshot_as_png() + nparr = np.frombuffer(png, np.uint8) + fullpage_img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + + # 获取每个渲染元素的边界框 + rects = self.get_rects() + cropped_imgs = [] + img_h, img_w = fullpage_img.shape[:2] + + # 根据边界框裁剪出每个元素的图像 + for rect in rects: + if rect is None: + cropped_imgs.append(None) + else: + x, y, w, h = rect + # 计算一个小的随机边距,让截图更自然 + max_side = max(w, h) + base_border = int(max_side * 0.03) + border_size = int(base_border * random.uniform(0.8, 1.2)) + x1 = max(0, x - border_size) + y1 = max(0, y - border_size) + x2 = min(img_w, x + w + border_size) + y2 = min(img_h, y + h + border_size) + + cropped = fullpage_img[y1:y2, x1:x2] + cropped_imgs.append(cropped) + + return cropped_imgs + + def get_rects(self) -> list: + """ + 获取页面上所有渲染元素的位置和大小信息。 + """ + elements = WebDriverWait(self.driver, self.timeout).until( + EC.presence_of_all_elements_located((By.CLASS_NAME, "screenshot")) + ) + + rects = [] + for element in elements: + location = element.location + size = element.size + x = int(location["x"]) + y = int(location["y"]) + w = int(size["width"]) + h = int(size["height"]) + + # 如果元素宽度超过窗口,可能是一个渲染错误,标记为None + if w > self.window_fix_width: + rects.append(None) + else: + rects.append((x, y, w, h)) + + return rects + + def close(self): + """ + 关闭浏览器驱动并释放资源。 + """ + if self.driver: + self.driver.quit() + self.driver = None + + def __del__(self): + """ + 对象销毁时确保浏览器被关闭。 + """ + try: + self.close() + except: + pass diff --git a/fastcdm/render/templates/formula.html b/fastcdm/render/templates/formula.html new file mode 100644 index 0000000000000000000000000000000000000000..5fa85fc5a3c60bf93f1b6df19758a0a4a39dc28f --- /dev/null +++ b/fastcdm/render/templates/formula.html @@ -0,0 +1,67 @@ + + + + + + Document + + + + + + +
+ + + + \ No newline at end of file diff --git a/fastcdm/tokenize.py b/fastcdm/tokenize.py new file mode 100644 index 0000000000000000000000000000000000000000..71f5e27f07fc855e1f59065163d48c5be54f57ae --- /dev/null +++ b/fastcdm/tokenize.py @@ -0,0 +1,277 @@ +import re +import subprocess +from pathlib import Path +from typing import Tuple +import sys + + +IMPLICIT_MULTIPLICATION_TARGETS = [ + "arccos", + "arcsin", + "arctan", + "sinh", + "cosh", + "tanh", + "coth", + "sech", + "csch", + "lim", + "log", + "ln", + "exp", + "sin", + "cos", + "tan", + "cot", + "sec", + "csc", + "ArcCos", + "ArcSin", + "ArcTan", + "Sinh", + "Cosh", + "Tanh", + "Coth", + "Sech", + "Csch", + "Lim", + "Log", + "Ln", + "Exp", + "Sin", + "Cos", + "Tan", + "Cot", + "Sec", + "Csc", + "det", + "dim", + "min", + "max", + "sup", + "inf", + "deg", + "gcd", + "lcm", + "ker", + "im", + "Pr", + "E", + "Var", + "Cov", + "rank", + "Tr", + "span", + "proj", + "grad", + "div", + "curl", + "Res", + "pi", + "alpha", + "beta", + "gamma", + "delta", + "epsilon", + "zeta", + "eta", + "theta", + "iota", + "kappa", + "lambda", + "mu", + "nu", + "xi", + "omicron", + "rho", + "sigma", + "tau", + "upsilon", + "phi", + "chi", + "psi", + "omega", + "Pi", + "Alpha", + "Beta", + "Gamma", + "Delta", + "Epsilon", + "Zeta", + "Eta", + "Theta", + "Iota", + "Kappa", + "Lambda", + "Mu", + "Nu", + "Xi", + "Omicron", + "Rho", + "Sigma", + "Tau", + "Upsilon", + "Phi", + "Chi", + "Psi", + "Omega", + "varepsilon", + "vartheta", + "varpi", + "varrho", + "varsigma", + "varphi", + "partial", + "nabla", + "int", + "oint", + "sum", + "prod", + "wedge", + "vee", + "cap", + "cup", + "therefore", + "because", + "Rightarrow", + "rightarrow", + "Leftarrow", + "leftarrow", + "Leftrightarrow", + "leftrightarrow", + "in", + "ni", + "subset", + "supset", + "subseteq", + "supseteq", + "equiv", + "sim", + "simeq", + "approx", + "propto", + "cdot", + "times", + "otimes", + "oplus", + "quad", + "qquad", + "ldots", + "cdots", + "ddots", + "forall", + "exists", + "neg", + "infty", + "hbar", + "emptyset", + "angle", + "geqslant", + "hat", + "bar", + "tilde", + "vec", + "dot", + "ddot", + "sqrt", +] +IMPLICIT_MULTIPLICATION_TARGETS.sort(key=len, reverse=True) +TARGETS_PATTERN = "|".join(IMPLICIT_MULTIPLICATION_TARGETS) + +PATTERN_ALIGN_ENV = re.compile( + r"\\begin{(split|align|alignedat|alignat|eqnarray)\*?}(.+?)\\end{\1\*?}", re.S +) +PATTERN_SMALLMATRIX = re.compile(r"\\begin{(smallmatrix)\*?}(.+?)\\end{\1\*?}", re.S) +PATTERN_INVALID_SINGLE_CHAR_CMD = re.compile(r"\\([a-zA-Z0-9])(?![a-zA-Z])") +PATTERN_LATEX_CMD_CONCAT_CMD = re.compile( + r"\\(" + TARGETS_PATTERN + r")" + r"(\\[a-zA-Z])" +) +PATTERN_LATEX_CMD_CONCAT_TEXT = re.compile(r"\\(" + TARGETS_PATTERN + r")([a-zA-Z])") +PATTERN_NON_CMD_IMPLICIT_MULT = re.compile( + r"\b(" + TARGETS_PATTERN + r")([a-zA-Z][a-zA-Z0-9]*)\b" +) + +OPERATORS = "\s?".join( + "|".join( + [ + "arccos", + "arcsin", + "arctan", + "arg", + "cos", + "cosh", + "cot", + "coth", + "csc", + "deg", + "det", + "dim", + "exp", + "gcd", + "hom", + "inf", + "injlim", + "ker", + "lg", + "lim", + "liminf", + "limsup", + "ln", + "log", + "max", + "min", + "Pr", + "projlim", + "sec", + "sin", + "sinh", + "sup", + "tan", + "tanh", + ] + ) +) +PATTERN_OPERATOR_NAME = re.compile(r"\\operatorname {(%s)}" % OPERATORS) + + + +def tokenize(latex_code: str) -> Tuple[bool, str]: + + if not latex_code: + return True, "" + + root_dir = Path(__file__).parent + formula_script = root_dir / "tokenize_latex" / "preprocess_formula.js" + + prepre = latex_code.replace("\n", " ") + prepre = PATTERN_ALIGN_ENV.sub(r"\\begin{aligned}\2\\end{aligned}", prepre) + prepre = PATTERN_SMALLMATRIX.sub(r"\\begin{matrix}\2\\end{matrix}", prepre) + prepre = PATTERN_INVALID_SINGLE_CHAR_CMD.sub(r"\1", prepre) + prepre = PATTERN_LATEX_CMD_CONCAT_CMD.sub(r"\\\1 \2", prepre) + prepre = PATTERN_LATEX_CMD_CONCAT_TEXT.sub(r"\\\1 \2", prepre) + prepre = PATTERN_NON_CMD_IMPLICIT_MULT.sub(r"\1 \2", prepre) + + try: + proc = subprocess.run( + ["node", str(formula_script), "normalize"], + input=prepre, + capture_output=True, + text=True, + check=True, + encoding="utf-8", + ) + normalized_latex = proc.stdout + except (subprocess.CalledProcessError, FileNotFoundError) as e: + print(f"Error executing Node.js script (formula): {e}", file=sys.stderr) + if hasattr(e, "stderr"): + print(f"Node.js stderr: {e.stderr}", file=sys.stderr) + return False, latex_code + + names = [ + "\\" + x.replace(" ", "") + for x in re.findall(PATTERN_OPERATOR_NAME, normalized_latex) + ] + post = PATTERN_OPERATOR_NAME.sub( + lambda match: str(names.pop(0)), normalized_latex + ).replace(r"\\ \end{array}", r"\end{array}") + return True, post.strip() diff --git a/fastcdm/tokenize_latex/preprocess_formula.js b/fastcdm/tokenize_latex/preprocess_formula.js new file mode 100644 index 0000000000000000000000000000000000000000..7929bc3862f1c0824565d0aff5d6a75cbb3376da --- /dev/null +++ b/fastcdm/tokenize_latex/preprocess_formula.js @@ -0,0 +1,387 @@ +const path = require('path'); +var katex = require(path.join(__dirname,"third_party/katex/katex.js")) +options = require(path.join(__dirname,"third_party/katex/src/Options.js")) +var readline = require('readline'); +var rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + terminal: false +}); + + +rl.on('line', function(line){ + a = line + if (line[0] == "%") { + line = line.substr(1, line.length - 1); + } + line = line.split('%')[0]; + + line = line.split('\\~').join(' '); + + for (var i = 0; i < 300; i++) { + line = line.replace(/\\>/, " "); + line = line.replace('$', ' '); + line = line.replace(/\\label{.*?}/, ""); + } + + if (line.indexOf("matrix") == -1 && line.indexOf("cases")==-1 && + line.indexOf("array")==-1 && line.indexOf("begin")==-1) { + for (var i = 0; i < 300; i++) { + line = line.replace(/\\\\/, "\\,"); + } + } + + + line = line + " " + // global_str is tokenized version (build in parser.js) + // norm_str is normalized version build by renderer below. + try { + + + if (process.argv[2] == "tokenize") { + var tree = katex.__parse(line, {}); + console.log(global_str.replace(/\\label { .*? }/, "")); + } else { + for (var i = 0; i < 300; ++i) { + line = line.replace(/{\\rm/, "\\mathrm{"); + line = line.replace(/{ \\rm/, "\\mathrm{"); + line = line.replace(/\\rm{/, "\\mathrm{"); + } + + var tree = katex.__parse(line, {}); + buildExpression(tree, new options({})); + for (var i = 0; i < 300; ++i) { + norm_str = norm_str.replace('SSSSSS', '$'); + norm_str = norm_str.replace(' S S S S S S', '$'); + } + console.log(norm_str.replace(/\\label { .*? }/, "")); + } + } catch (e) { + console.error(line); + console.error(norm_str); + console.error(e); + console.log(); + } + global_str = "" + norm_str = "" +}) + + + +// This is a LaTeX AST to LaTeX Renderer (modified version of KaTeX AST-> MathML). +norm_str = "" + +var groupTypes = {}; + +groupTypes.mathord = function(group, options) { + if (options.font == "mathrm"){ + for (i = 0; i < group.value.length; ++i ) { + if (group.value[i] == " ") { + norm_str = norm_str + group.value[i] + "\; "; + } else { + norm_str = norm_str + group.value[i] + " "; + } + } + } else { + norm_str = norm_str + group.value + " "; + } +}; + +groupTypes.textord = function(group, options) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.bin = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.rel = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.open = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.close = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.inner = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.punct = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.ordgroup = function(group, options) { + norm_str = norm_str + "{ "; + + buildExpression(group.value, options); + + norm_str = norm_str + "} "; +}; + +groupTypes.text = function(group, options) { + + norm_str = norm_str + "\\mathrm { "; + + buildExpression(group.value.body, options); + norm_str = norm_str + "} "; +}; + +groupTypes.color = function(group, options) { + var inner = buildExpression(group.value.value, options); + + var node = new mathMLTree.MathNode("mstyle", inner); + + node.setAttribute("mathcolor", group.value.color); + + return node; +}; + +groupTypes.supsub = function(group, options) { + buildGroup(group.value.base, options); + + if (group.value.sub) { + norm_str = norm_str + "_ "; + if (group.value.sub.type != 'ordgroup') { + norm_str = norm_str + " { "; + buildGroup(group.value.sub, options); + norm_str = norm_str + "} "; + } else { + buildGroup(group.value.sub, options); + } + + } + + if (group.value.sup) { + norm_str = norm_str + "^ "; + if (group.value.sup.type != 'ordgroup') { + norm_str = norm_str + " { "; + buildGroup(group.value.sup, options); + norm_str = norm_str + "} "; + } else { + buildGroup(group.value.sup, options); + } + } + +}; + +groupTypes.genfrac = function(group, options) { + if (!group.value.hasBarLine) { + norm_str = norm_str + "\\binom "; + } else { + norm_str = norm_str + "\\frac "; + } + buildGroup(group.value.numer, options); + buildGroup(group.value.denom, options); + +}; + +groupTypes.array = function(group, options) { + norm_str = norm_str + "\\begin{array} { "; + if (group.value.cols) { + group.value.cols.map(function(start) { + if (start && start.align) { + norm_str = norm_str + start.align + " ";}}); + } else { + group.value.body[0].map(function(start) { + norm_str = norm_str + "l "; + } ); + } + norm_str = norm_str + "} "; + group.value.body.map(function(row) { + if (row.some(cell => cell.value.length > 0)) { // orginal code: if (row[0].value.length > 0) + out = row.map(function(cell) { + buildGroup(cell, options); + if (norm_str.length > 4 + && norm_str.substring(norm_str.length-4, norm_str.length) == "{ } ") { + norm_str = norm_str.substring(0, norm_str.length-4) ; + } + norm_str = norm_str + "& "; + }); + norm_str = norm_str.substring(0, norm_str.length-2) + "\\\\ "; + } + }); + norm_str = norm_str + "\\end{array} "; +}; + +groupTypes.sqrt = function(group, options) { + var node; + if (group.value.index) { + norm_str = norm_str + "\\sqrt [ "; + buildExpression(group.value.index.value, options); + norm_str = norm_str + "] "; + buildGroup(group.value.body, options); + } else { + norm_str = norm_str + "\\sqrt "; + buildGroup(group.value.body, options); + } +}; + +groupTypes.leftright = function(group, options) { + + + + norm_str = norm_str + "\\left" + group.value.left + " "; + buildExpression(group.value.body, options); + norm_str = norm_str + "\\right" + group.value.right + " "; +}; + +groupTypes.accent = function(group, options) { + if (group.value.base.type != 'ordgroup') { + norm_str = norm_str + group.value.accent + " { "; + buildGroup(group.value.base, options); + norm_str = norm_str + "} "; + } else { + norm_str = norm_str + group.value.accent + " "; + buildGroup(group.value.base, options); + } +}; + +groupTypes.spacing = function(group) { + var node; + if (group.value == " ") { + norm_str = norm_str + "~ "; + } else { + norm_str = norm_str + group.value + " "; + } + return node; +}; + +groupTypes.op = function(group) { + var node; + + // TODO(emily): handle big operators using the `largeop` attribute + + + if (group.value.symbol) { + // This is a symbol. Just add the symbol. + norm_str = norm_str + group.value.body + " "; + + } else { + if (group.value.limits == false) { + norm_str = norm_str + "\\\operatorname { "; + } else { + norm_str = norm_str + "\\\operatorname* { "; + } + for (i = 1; i < group.value.body.length; ++i ) { + norm_str = norm_str + group.value.body[i] + " "; + } + norm_str = norm_str + "} "; + } +}; + +groupTypes.katex = function(group) { + var node = new mathMLTree.MathNode( + "mtext", [new mathMLTree.TextNode("KaTeX")]); + + return node; +}; + + + +groupTypes.font = function(group, options) { + var font = group.value.font; + if (font == "mbox" || font == "hbox") { + font = "mathrm"; + } + norm_str = norm_str + "\\" + font + " "; + buildGroup(group.value.body, options.withFont(font)); +}; + +groupTypes.delimsizing = function(group) { + var children = []; + norm_str = norm_str + group.value.funcName + " " + group.value.value + " "; +}; + +groupTypes.styling = function(group, options) { + norm_str = norm_str + " " + group.value.original + " "; + buildExpression(group.value.value, options); + +}; + +groupTypes.sizing = function(group, options) { + + if (group.value.original == "\\rm") { + norm_str = norm_str + "\\mathrm { "; + buildExpression(group.value.value, options.withFont("mathrm")); + norm_str = norm_str + "} "; + } else { + norm_str = norm_str + " " + group.value.original + " "; + buildExpression(group.value.value, options); + } +}; + +groupTypes.overline = function(group, options) { + norm_str = norm_str + "\\overline { "; + + buildGroup(group.value.body, options); + norm_str = norm_str + "} "; + norm_str = norm_str; + +}; + +groupTypes.underline = function(group, options) { + norm_str = norm_str + "\\underline { "; + buildGroup(group.value.body, options); + norm_str = norm_str + "} "; + + norm_str = norm_str; + +}; + +groupTypes.rule = function(group) { + norm_str = norm_str + "\\rule { "+group.value.width.number+" "+group.value.width.unit+" } { "+group.value.height.number+" "+group.value.height.unit+ " } "; + +}; + +groupTypes.llap = function(group, options) { + norm_str = norm_str + "\\llap "; + buildGroup(group.value.body, options); +}; + +groupTypes.rlap = function(group, options) { + norm_str = norm_str + "\\rlap "; + buildGroup(group.value.body, options); + +}; + +groupTypes.phantom = function(group, options, prev) { + norm_str = norm_str + "\\phantom { "; + buildExpression(group.value.value, options); + norm_str = norm_str + "} "; + +}; + +/** + * Takes a list of nodes, builds them, and returns a list of the generated + * MathML nodes. A little simpler than the HTML version because we don't do any + * previous-node handling. + */ +var buildExpression = function(expression, options) { + var groups = []; + for (var i = 0; i < expression.length; i++) { + var group = expression[i]; + buildGroup(group, options); + } + // console.log(norm_str); + // return groups; +}; + +/** + * Takes a group from the parser and calls the appropriate groupTypes function + * on it to produce a MathML node. + */ +var buildGroup = function(group, options) { + if (groupTypes[group.type]) { + groupTypes[group.type](group, options); + } else { + throw new ParseError( + "Got group of unknown type: '" + group.type + "'"); + } +}; + + diff --git a/fastcdm/tokenize_latex/preprocess_tabular.js b/fastcdm/tokenize_latex/preprocess_tabular.js new file mode 100644 index 0000000000000000000000000000000000000000..71bb93944a281bf4d1f68e5b856093a2a1aed7cb --- /dev/null +++ b/fastcdm/tokenize_latex/preprocess_tabular.js @@ -0,0 +1,395 @@ +const path = require('path'); +var katex = require(path.join(__dirname,"third_party/katex/katex.js")) +options = require(path.join(__dirname,"third_party/katex/src/Options.js")) +var readline = require('readline'); +var rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + terminal: false +}); + + +rl.on('line', function(line){ + a = line + if (line[0] == "%") { + line = line.substr(1, line.length - 1); + } + // line = line.split('%')[0]; + + line = line.split('\\~').join(' '); + + for (var i = 0; i < 300; i++) { + line = line.replace(/\\>/, " "); + // line = line.replace('$', ' '); + line = line.replace(/\\label{.*?}/, ""); + } + + if (line.indexOf("matrix") == -1 && line.indexOf("cases")==-1 && + line.indexOf("array")==-1 && line.indexOf("begin")==-1) { + for (var i = 0; i < 300; i++) { + line = line.replace(/\\\\/, "\\,"); + } + } + + + line = line + " " + // global_str is tokenized version (build in parser.js) + // norm_str is normalized version build by renderer below. + try { + + + if (process.argv[2] == "tokenize") { + var tree = katex.__parse(line, {}); + console.log(global_str.replace(/\\label { .*? }/, "")); + } else { + for (var i = 0; i < 300; ++i) { + line = line.replace(/{\\rm/, "\\mathrm{"); + line = line.replace(/{ \\rm/, "\\mathrm{"); + line = line.replace(/\\rm{/, "\\mathrm{"); + } + + var tree = katex.__parse(line, {}); + buildExpression(tree, new options({})); + for (var i = 0; i < 300; ++i) { + norm_str = norm_str.replace('SSSSSS', '$'); + norm_str = norm_str.replace(' S S S S S S', '$'); + } + console.log(norm_str.replace(/\\label { .*? }/, "")); + } + } catch (e) { + console.error(line); + console.error(norm_str); + console.error(e); + console.log(""); + } + global_str = "" + norm_str = "" +}) + + + +// This is a LaTeX AST to LaTeX Renderer (modified version of KaTeX AST-> MathML). +norm_str = "" + +var groupTypes = {}; + +groupTypes.mathord = function(group, options) { + if (options.font == "mathrm"){ + for (i = 0; i < group.value.length; ++i ) { + if (group.value[i] == " ") { + norm_str = norm_str + group.value[i] + "\; "; + } else { + norm_str = norm_str + group.value[i] + " "; + } + } + } else { + norm_str = norm_str + group.value + " "; + } +}; + +groupTypes.textord = function(group, options) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.bin = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.rel = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.open = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.close = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.inner = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.punct = function(group) { + norm_str = norm_str + group.value + " "; +}; + +groupTypes.ordgroup = function(group, options) { + norm_str = norm_str + "{ "; + + buildExpression(group.value, options); + + norm_str = norm_str + "} "; +}; + +groupTypes.text = function(group, options) { + + norm_str = norm_str + "\\mathrm { "; + + buildExpression(group.value.body, options); + norm_str = norm_str + "} "; +}; + +groupTypes.color = function(group, options) { + var inner = buildExpression(group.value.value, options); + + var node = new mathMLTree.MathNode("mstyle", inner); + + node.setAttribute("mathcolor", group.value.color); + + return node; +}; + +groupTypes.supsub = function(group, options) { + buildGroup(group.value.base, options); + + if (group.value.sub) { + norm_str = norm_str + "_ "; + if (group.value.sub.type != 'ordgroup') { + norm_str = norm_str + " { "; + buildGroup(group.value.sub, options); + norm_str = norm_str + "} "; + } else { + buildGroup(group.value.sub, options); + } + + } + + if (group.value.sup) { + norm_str = norm_str + "^ "; + if (group.value.sup.type != 'ordgroup') { + norm_str = norm_str + " { "; + buildGroup(group.value.sup, options); + norm_str = norm_str + "} "; + } else { + buildGroup(group.value.sup, options); + } + } + +}; + +groupTypes.genfrac = function(group, options) { + if (!group.value.hasBarLine) { + norm_str = norm_str + "\\binom "; + } else { + norm_str = norm_str + "\\frac "; + } + buildGroup(group.value.numer, options); + buildGroup(group.value.denom, options); + +}; + +groupTypes.array = function(group, options) { + norm_str = norm_str + "\\begin{" + group.value.style + "} "; + + if (group.value.style == "array" || group.value.style == "tabular" || group.value.style == "tabularx") { + norm_str = norm_str + "{ "; + if (group.value.cols) { + group.value.cols.map(function(start) { + if (start) { + if (start.type == "align") { + norm_str = norm_str + start.align + " "; + } else if (start.type == "separator") { + norm_str = norm_str + start.separator + " "; + } + } + }); + } else { + group.value.body[0].map(function(start) { + norm_str = norm_str + "c "; + } ); + } + norm_str = norm_str + "} "; + } + group.value.body.map(function(row) { + if (row.length > 1 || row[0].value.length > 0) { + if (row[0].value[0] && row[0].value[0].value == "\\hline") { + norm_str = norm_str + "\\hline "; + row[0].value = row[0].value.slice(1); + } + out = row.map(function(cell) { + buildGroup(cell, options); + norm_str = norm_str + "& "; + }); + norm_str = norm_str.substring(0, norm_str.length-2) + "\\\\ "; + } + }); + norm_str = norm_str + "\\end{" + group.value.style + "} "; +}; + +groupTypes.sqrt = function(group, options) { + var node; + if (group.value.index) { + norm_str = norm_str + "\\sqrt [ " + group.value.index + " ] "; + buildGroup(group.value.body, options); + } else { + norm_str = norm_str + "\\sqrt "; + buildGroup(group.value.body, options); + } +}; + +groupTypes.leftright = function(group, options) { + + + + norm_str = norm_str + "\\left" + group.value.left + " "; + buildExpression(group.value.body, options); + norm_str = norm_str + "\\right" + group.value.right + " "; +}; + +groupTypes.accent = function(group, options) { + if (group.value.base.type != 'ordgroup') { + norm_str = norm_str + group.value.accent + " { "; + buildGroup(group.value.base, options); + norm_str = norm_str + "} "; + } else { + norm_str = norm_str + group.value.accent + " "; + buildGroup(group.value.base, options); + } +}; + +groupTypes.spacing = function(group) { + var node; + if (group.value == " ") { + norm_str = norm_str + "~ "; + } else { + norm_str = norm_str + group.value + " "; + } + return node; +}; + +groupTypes.op = function(group) { + var node; + + // TODO(emily): handle big operators using the `largeop` attribute + + + if (group.value.symbol) { + // This is a symbol. Just add the symbol. + norm_str = norm_str + group.value.body + " "; + + } else { + if (group.value.limits == false) { + norm_str = norm_str + "\\\operatorname { "; + } else { + norm_str = norm_str + "\\\operatorname* { "; + } + for (i = 1; i < group.value.body.length; ++i ) { + norm_str = norm_str + group.value.body[i] + " "; + } + norm_str = norm_str + "} "; + } +}; + +groupTypes.katex = function(group) { + var node = new mathMLTree.MathNode( + "mtext", [new mathMLTree.TextNode("KaTeX")]); + + return node; +}; + + + +groupTypes.font = function(group, options) { + var font = group.value.font; + if (font == "mbox" || font == "hbox") { + font = "mathrm"; + } + norm_str = norm_str + "\\" + font + " "; + buildGroup(group.value.body, options.withFont(font)); +}; + +groupTypes.delimsizing = function(group) { + var children = []; + norm_str = norm_str + group.value.funcName + " " + group.value.value + " "; +}; + +groupTypes.styling = function(group, options) { + norm_str = norm_str + " " + group.value.original + " "; + buildExpression(group.value.value, options); + +}; + +groupTypes.sizing = function(group, options) { + + if (group.value.original == "\\rm") { + norm_str = norm_str + "\\mathrm { "; + buildExpression(group.value.value, options.withFont("mathrm")); + norm_str = norm_str + "} "; + } else { + norm_str = norm_str + " " + group.value.original + " "; + buildExpression(group.value.value, options); + } +}; + +groupTypes.overline = function(group, options) { + norm_str = norm_str + "\\overline { "; + + buildGroup(group.value.body, options); + norm_str = norm_str + "} "; + norm_str = norm_str; + +}; + +groupTypes.underline = function(group, options) { + norm_str = norm_str + "\\underline { "; + buildGroup(group.value.body, options); + norm_str = norm_str + "} "; + + norm_str = norm_str; + +}; + +groupTypes.rule = function(group) { + norm_str = norm_str + "\\rule { "+group.value.width.number+" "+group.value.width.unit+" } { "+group.value.height.number+" "+group.value.height.unit+ " } "; + +}; + +groupTypes.llap = function(group, options) { + norm_str = norm_str + "\\llap "; + buildGroup(group.value.body, options); +}; + +groupTypes.rlap = function(group, options) { + norm_str = norm_str + "\\rlap "; + buildGroup(group.value.body, options); + +}; + +groupTypes.phantom = function(group, options, prev) { + norm_str = norm_str + "\\phantom { "; + buildExpression(group.value.value, options); + norm_str = norm_str + "} "; + +}; + +/** + * Takes a list of nodes, builds them, and returns a list of the generated + * MathML nodes. A little simpler than the HTML version because we don't do any + * previous-node handling. + */ +var buildExpression = function(expression, options) { + var groups = []; + for (var i = 0; i < expression.length; i++) { + var group = expression[i]; + buildGroup(group, options); + } + // console.log(norm_str); + // return groups; +}; + +/** + * Takes a group from the parser and calls the appropriate groupTypes function + * on it to produce a MathML node. + */ +var buildGroup = function(group, options) { + if (groupTypes[group.type]) { + groupTypes[group.type](group, options); + } else { + throw new ParseError( + "Got group of unknown type: '" + group.type + "'"); + } +}; + + diff --git a/fastcdm/tokenize_latex/third_party/README.md b/fastcdm/tokenize_latex/third_party/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bc74abab4cb911fd6ac07e0d0e0f642226a1b39f --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/README.md @@ -0,0 +1 @@ +Directly taken from https://github.com/harvardnlp/im2markup diff --git a/fastcdm/tokenize_latex/third_party/katex/README.md b/fastcdm/tokenize_latex/third_party/katex/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31cf658d8eb680407693ce5e3d79168250f32bac --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/README.md @@ -0,0 +1,68 @@ +# [KaTeX](https://khan.github.io/KaTeX/) [![Build Status](https://travis-ci.org/Khan/KaTeX.svg?branch=master)](https://travis-ci.org/Khan/KaTeX) + +[![Join the chat at https://gitter.im/Khan/KaTeX](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Khan/KaTeX?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + +KaTeX is a fast, easy-to-use JavaScript library for TeX math rendering on the web. + + * **Fast:** KaTeX renders its math synchronously and doesn't need to reflow the page. See how it compares to a competitor in [this speed test](http://jsperf.com/katex-vs-mathjax/). + * **Print quality:** KaTeX’s layout is based on Donald Knuth’s TeX, the gold standard for math typesetting. + * **Self contained:** KaTeX has no dependencies and can easily be bundled with your website resources. + * **Server side rendering:** KaTeX produces the same output regardless of browser or environment, so you can pre-render expressions using Node.js and send them as plain HTML. + +KaTeX supports all major browsers, including Chrome, Safari, Firefox, Opera, and IE 8 - IE 11. A list of supported commands can be on the [wiki](https://github.com/Khan/KaTeX/wiki/Function-Support-in-KaTeX). + +## Usage + +You can [download KaTeX](https://github.com/khan/katex/releases) and host it on your server or include the `katex.min.js` and `katex.min.css` files on your page directly from a CDN: + +```html + + +``` + +#### In-browser rendering + +Call `katex.render` with a TeX expression and a DOM element to render into: + +```js +katex.render("c = \\pm\\sqrt{a^2 + b^2}", element); +``` + +If KaTeX can't parse the expression, it throws a `katex.ParseError` error. + +#### Server side rendering or rendering to a string + +To generate HTML on the server or to generate an HTML string of the rendered math, you can use `katex.renderToString`: + +```js +var html = katex.renderToString("c = \\pm\\sqrt{a^2 + b^2}"); +// '...' +``` + +Make sure to include the CSS and font files, but there is no need to include the JavaScript. Like `render`, `renderToString` throws if it can't parse the expression. + +#### Rendering options + +You can provide an object of options as the last argument to `katex.render` and `katex.renderToString`. Available options are: + +- `displayMode`: `boolean`. If `true` the math will be rendered in display mode, which will put the math in display style (so `\int` and `\sum` are large, for example), and will center the math on the page on its own line. If `false` the math will be rendered in inline mode. (default: `false`) +- `throwOnError`: `boolean`. If `true`, KaTeX will throw a `ParseError` when it encounters an unsupported command. If `false`, KaTeX will render the unsupported command as text in the color given by `errorColor`. (default: `true`) +- `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color which unsupported commands are rendered in. (default: `#cc0000`) + +For example: + +```js +katex.render("c = \\pm\\sqrt{a^2 + b^2}", element, { displayMode: true }); +``` + +#### Automatic rendering of math on a page + +Math on the page can be automatically rendered using the auto-render extension. See [the Auto-render README](contrib/auto-render/README.md) for more information. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) + +## License + +KaTeX is licensed under the [MIT License](http://opensource.org/licenses/MIT). diff --git a/fastcdm/tokenize_latex/third_party/katex/cli.js b/fastcdm/tokenize_latex/third_party/katex/cli.js new file mode 100644 index 0000000000000000000000000000000000000000..b64de377ca50e8a48b679c6c9fde830ccbca3a18 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/cli.js @@ -0,0 +1,32 @@ +#!/usr/bin/env node +// Simple CLI for KaTeX. +// Reads TeX from stdin, outputs HTML to stdout. +/* eslint no-console:0 */ + +var katex = require("./"); +var input = ""; + +// Skip the first two args, which are just "node" and "cli.js" +var args = process.argv.slice(2); + +if (args.indexOf("--help") !== -1) { + console.log(process.argv[0] + " " + process.argv[1] + + " [ --help ]" + + " [ --display-mode ]"); + + console.log("\n" + + "Options:"); + console.log(" --help Display this help message"); + console.log(" --display-mode Render in display mode (not inline mode)"); + process.exit(); +} + +process.stdin.on("data", function(chunk) { + input += chunk.toString(); +}); + +process.stdin.on("end", function() { + var options = { displayMode: args.indexOf("--display-mode") !== -1 }; + var output = katex.renderToString(input, options); + console.log(output); +}); diff --git a/fastcdm/tokenize_latex/third_party/katex/katex.js b/fastcdm/tokenize_latex/third_party/katex/katex.js new file mode 100644 index 0000000000000000000000000000000000000000..4d64606bf521fabb63cce5e203274aec1356cf31 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/katex.js @@ -0,0 +1,74 @@ +/* eslint no-console:0 */ +/** + * This is the main entry point for KaTeX. Here, we expose functions for + * rendering expressions either to DOM nodes or to markup strings. + * + * We also expose the ParseError class to check if errors thrown from KaTeX are + * errors in the expression, or errors in javascript handling. + */ + +var ParseError = require("./src/ParseError"); +var Settings = require("./src/Settings"); + +var buildTree = require("./src/buildTree"); +var parseTree = require("./src/parseTree"); +var utils = require("./src/utils"); + +/** + * Parse and build an expression, and place that expression in the DOM node + * given. + */ +var render = function(expression, baseNode, options) { + utils.clearNode(baseNode); + + var settings = new Settings(options); + + var tree = parseTree(expression, settings); + var node = buildTree(tree, expression, settings).toNode(); + + baseNode.appendChild(node); +}; + +// KaTeX's styles don't work properly in quirks mode. Print out an error, and +// disable rendering. +if (typeof document !== "undefined") { + if (document.compatMode !== "CSS1Compat") { + typeof console !== "undefined" && console.warn( + "Warning: KaTeX doesn't work in quirks mode. Make sure your " + + "website has a suitable doctype."); + + render = function() { + throw new ParseError("KaTeX doesn't work in quirks mode."); + }; + } +} + +/** + * Parse and build an expression, and return the markup for that. + */ +var renderToString = function(expression, options) { + var settings = new Settings(options); + + var tree = parseTree(expression, settings); + return buildTree(tree, expression, settings).toMarkup(); +}; + +/** + * Parse an expression and return the parse tree. + */ +var generateParseTree = function(expression, options) { + var settings = new Settings(options); + return parseTree(expression, settings); +}; + +module.exports = { + render: render, + renderToString: renderToString, + /** + * NOTE: This method is not currently recommended for public use. + * The internal tree representation is unstable and is very likely + * to change. Use at your own risk. + */ + __parse: generateParseTree, + ParseError: ParseError, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/package.json b/fastcdm/tokenize_latex/third_party/katex/package.json new file mode 100644 index 0000000000000000000000000000000000000000..ba3c940588191dd92962301e9c325addab6a266c --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/package.json @@ -0,0 +1,108 @@ +{ + "_args": [ + [ + "katex", + "/home/srush/Projects/im2latex" + ] + ], + "_from": "katex@latest", + "_id": "katex@0.6.0", + "_inCache": true, + "_installable": true, + "_location": "/katex", + "_nodeVersion": "4.2.1", + "_npmOperationalInternal": { + "host": "packages-12-west.internal.npmjs.com", + "tmp": "tmp/katex-0.6.0.tgz_1460769444991_0.38667152682319283" + }, + "_npmUser": { + "email": "kevinb7@gmail.com", + "name": "kevinbarabash" + }, + "_npmVersion": "2.15.2", + "_phantomChildren": {}, + "_requested": { + "name": "katex", + "raw": "katex", + "rawSpec": "", + "scope": null, + "spec": "latest", + "type": "tag" + }, + "_requiredBy": [ + "#USER" + ], + "_resolved": "https://registry.npmjs.org/katex/-/katex-0.6.0.tgz", + "_shasum": "12418e09121c05c92041b6b3b9fb6bab213cb6f3", + "_shrinkwrap": null, + "_spec": "katex", + "_where": "/home/srush/Projects/im2latex", + "bin": { + "katex": "cli.js" + }, + "bugs": { + "url": "https://github.com/Khan/KaTeX/issues" + }, + "dependencies": { + "match-at": "^0.1.0" + }, + "description": "Fast math typesetting for the web.", + "devDependencies": { + "browserify": "^10.2.4", + "clean-css": "~2.2.15", + "eslint": "^1.10.2", + "express": "~3.3.3", + "glob": "^5.0.15", + "jasmine": "^2.3.2", + "jasmine-core": "^2.3.4", + "js-yaml": "^3.3.1", + "jspngopt": "^0.1.0", + "less": "~1.7.5", + "nomnom": "^1.8.1", + "pako": "0.2.7", + "selenium-webdriver": "^2.46.1", + "uglify-js": "~2.4.15" + }, + "directories": {}, + "dist": { + "shasum": "12418e09121c05c92041b6b3b9fb6bab213cb6f3", + "tarball": "https://registry.npmjs.org/katex/-/katex-0.6.0.tgz" + }, + "files": [ + "cli.js", + "dist/", + "katex.js", + "src/" + ], + "gitHead": "b94fc6534d5c23f944906a52a592bee4e0090665", + "homepage": "https://github.com/Khan/KaTeX#readme", + "license": "MIT", + "main": "katex.js", + "maintainers": [ + { + "name": "kevinbarabash", + "email": "kevinb7@gmail.com" + }, + { + "name": "spicyj", + "email": "ben@benalpert.com" + }, + { + "name": "xymostech", + "email": "xymostech@gmail.com" + } + ], + "name": "katex", + "optionalDependencies": {}, + "readme": "ERROR: No README data found!", + "repository": { + "type": "git", + "url": "git://github.com/Khan/KaTeX.git" + }, + "scripts": { + "prepublish": "make dist", + "start": "node server.js", + "test": "make lint test" + }, + "version": "0.6.0" +} diff --git a/fastcdm/tokenize_latex/third_party/katex/src/Lexer.js b/fastcdm/tokenize_latex/third_party/katex/src/Lexer.js new file mode 100644 index 0000000000000000000000000000000000000000..c00ff093116d0b58a71eefdd6ee60d2bb853b4db --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/Lexer.js @@ -0,0 +1,162 @@ +/** + * The Lexer class handles tokenizing the input in various ways. Since our + * parser expects us to be able to backtrack, the lexer allows lexing from any + * given starting point. + * + * Its main exposed function is the `lex` function, which takes a position to + * lex from and a type of token to lex. It defers to the appropriate `_innerLex` + * function. + * + * The various `_innerLex` functions perform the actual lexing of different + * kinds. + */ + +var matchAt = require("../../match-at/lib/matchAt"); + +var ParseError = require("./ParseError"); + +// The main lexer class +function Lexer(input) { + this._input = input; +} + +// The resulting token returned from `lex`. +function Token(text, data, position) { + this.text = text; + this.data = data; + this.position = position; +} + +/* The following tokenRegex + * - matches typical whitespace (but not NBSP etc.) using its first group + * - matches symbol combinations which result in a single output character + * - does not match any control character \x00-\x1f except whitespace + * - does not match a bare backslash + * - matches any ASCII character except those just mentioned + * - does not match the BMP private use area \uE000-\uF8FF + * - does not match bare surrogate code units + * - matches any BMP character except for those just described + * - matches any valid Unicode surrogate pair + * - matches a backslash followed by one or more letters + * - matches a backslash followed by any BMP character, including newline + * Just because the Lexer matches something doesn't mean it's valid input: + * If there is no matching function or symbol definition, the Parser will + * still reject the input. + */ +var tokenRegex = new RegExp( + "([ \r\n\t]+)|(" + // whitespace + "---?" + // special combinations + "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint + "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair + "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name + ")" +); + +var whitespaceRegex = /\s*/; + +/** + * This function lexes a single normal token. It takes a position and + * whether it should completely ignore whitespace or not. + */ +Lexer.prototype._innerLex = function(pos, ignoreWhitespace) { + var input = this._input; + if (pos === input.length) { + return new Token("EOF", null, pos); + } + var match = matchAt(tokenRegex, input, pos); + if (match === null) { + throw new ParseError( + "Unexpected character: '" + input[pos] + "'", + this, pos); + } else if (match[2]) { // matched non-whitespace + return new Token(match[2], null, pos + match[2].length); + } else if (ignoreWhitespace) { + return this._innerLex(pos + match[1].length, true); + } else { // concatenate whitespace to a single space + return new Token(" ", null, pos + match[1].length); + } +}; + +// A regex to match a CSS color (like #ffffff or BlueViolet) +var cssColor = /#[a-z0-9]+|[a-z]+/i; + +/** + * This function lexes a CSS color. + */ +Lexer.prototype._innerLexColor = function(pos) { + var input = this._input; + + // Ignore whitespace + var whitespace = matchAt(whitespaceRegex, input, pos)[0]; + pos += whitespace.length; + + var match; + if ((match = matchAt(cssColor, input, pos))) { + // If we look like a color, return a color + return new Token(match[0], null, pos + match[0].length); + } else { + throw new ParseError("Invalid color", this, pos); + } +}; + +// A regex to match a dimension. Dimensions look like +// "1.2em" or ".4pt" or "1 ex" +var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/; + +/** + * This function lexes a dimension. + */ +Lexer.prototype._innerLexSize = function(pos) { + var input = this._input; + + // Ignore whitespace + var whitespace = matchAt(whitespaceRegex, input, pos)[0]; + pos += whitespace.length; + + var match; + if ((match = matchAt(sizeRegex, input, pos))) { + var unit = match[3]; + // We only currently handle "em" and "ex" units + // if (unit !== "em" && unit !== "ex") { + // throw new ParseError("Invalid unit: '" + unit + "'", this, pos); + // } + return new Token(match[0], { + number: +(match[1] + match[2]), + unit: unit, + }, pos + match[0].length); + } + + throw new ParseError("Invalid size", this, pos); +}; + +/** + * This function lexes a string of whitespace. + */ +Lexer.prototype._innerLexWhitespace = function(pos) { + var input = this._input; + + var whitespace = matchAt(whitespaceRegex, input, pos)[0]; + pos += whitespace.length; + + return new Token(whitespace[0], null, pos); +}; + +/** + * This function lexes a single token starting at `pos` and of the given mode. + * Based on the mode, we defer to one of the `_innerLex` functions. + */ +Lexer.prototype.lex = function(pos, mode) { + if (mode === "math") { + return this._innerLex(pos, true); + } else if (mode === "text") { + return this._innerLex(pos, false); + } else if (mode === "color") { + return this._innerLexColor(pos); + } else if (mode === "size") { + return this._innerLexSize(pos); + } else if (mode === "whitespace") { + return this._innerLexWhitespace(pos); + } +}; + +module.exports = Lexer; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/Options.js b/fastcdm/tokenize_latex/third_party/katex/src/Options.js new file mode 100644 index 0000000000000000000000000000000000000000..39ff37bfc17cf508db6e01f9a5eca3ba9d29ed50 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/Options.js @@ -0,0 +1,189 @@ +/** + * This file contains information about the options that the Parser carries + * around with it while parsing. Data is held in an `Options` object, and when + * recursing, a new `Options` object can be created with the `.with*` and + * `.reset` functions. + */ + +/** + * This is the main options class. It contains the style, size, color, and font + * of the current parse level. It also contains the style and size of the parent + * parse level, so size changes can be handled efficiently. + * + * Each of the `.with*` and `.reset` functions passes its current style and size + * as the parentStyle and parentSize of the new options class, so parent + * handling is taken care of automatically. + */ +function Options(data) { + this.style = data.style; + this.color = data.color; + this.size = data.size; + this.phantom = data.phantom; + this.font = data.font; + + if (data.parentStyle === undefined) { + this.parentStyle = data.style; + } else { + this.parentStyle = data.parentStyle; + } + + if (data.parentSize === undefined) { + this.parentSize = data.size; + } else { + this.parentSize = data.parentSize; + } +} + +/** + * Returns a new options object with the same properties as "this". Properties + * from "extension" will be copied to the new options object. + */ +Options.prototype.extend = function(extension) { + var data = { + style: this.style, + size: this.size, + color: this.color, + parentStyle: this.style, + parentSize: this.size, + phantom: this.phantom, + font: this.font, + }; + + for (var key in extension) { + if (extension.hasOwnProperty(key)) { + data[key] = extension[key]; + } + } + + return new Options(data); +}; + +/** + * Create a new options object with the given style. + */ +Options.prototype.withStyle = function(style) { + return this.extend({ + style: style, + }); +}; + +/** + * Create a new options object with the given size. + */ +Options.prototype.withSize = function(size) { + return this.extend({ + size: size, + }); +}; + +/** + * Create a new options object with the given color. + */ +Options.prototype.withColor = function(color) { + return this.extend({ + color: color, + }); +}; + +/** + * Create a new options object with "phantom" set to true. + */ +Options.prototype.withPhantom = function() { + return this.extend({ + phantom: true, + }); +}; + +/** + * Create a new options objects with the give font. + */ +Options.prototype.withFont = function(font) { + return this.extend({ + font: font, + }); +}; + +/** + * Create a new options object with the same style, size, and color. This is + * used so that parent style and size changes are handled correctly. + */ +Options.prototype.reset = function() { + return this.extend({}); +}; + +/** + * A map of color names to CSS colors. + * TODO(emily): Remove this when we have real macros + */ +var colorMap = { + "katex-blue": "#6495ed", + "katex-orange": "#ffa500", + "katex-pink": "#ff00af", + "katex-red": "#df0030", + "katex-green": "#28ae7b", + "katex-gray": "gray", + "katex-purple": "#9d38bd", + "katex-blueA": "#c7e9f1", + "katex-blueB": "#9cdceb", + "katex-blueC": "#58c4dd", + "katex-blueD": "#29abca", + "katex-blueE": "#1c758a", + "katex-tealA": "#acead7", + "katex-tealB": "#76ddc0", + "katex-tealC": "#5cd0b3", + "katex-tealD": "#55c1a7", + "katex-tealE": "#49a88f", + "katex-greenA": "#c9e2ae", + "katex-greenB": "#a6cf8c", + "katex-greenC": "#83c167", + "katex-greenD": "#77b05d", + "katex-greenE": "#699c52", + "katex-goldA": "#f7c797", + "katex-goldB": "#f9b775", + "katex-goldC": "#f0ac5f", + "katex-goldD": "#e1a158", + "katex-goldE": "#c78d46", + "katex-redA": "#f7a1a3", + "katex-redB": "#ff8080", + "katex-redC": "#fc6255", + "katex-redD": "#e65a4c", + "katex-redE": "#cf5044", + "katex-maroonA": "#ecabc1", + "katex-maroonB": "#ec92ab", + "katex-maroonC": "#c55f73", + "katex-maroonD": "#a24d61", + "katex-maroonE": "#94424f", + "katex-purpleA": "#caa3e8", + "katex-purpleB": "#b189c6", + "katex-purpleC": "#9a72ac", + "katex-purpleD": "#715582", + "katex-purpleE": "#644172", + "katex-mintA": "#f5f9e8", + "katex-mintB": "#edf2df", + "katex-mintC": "#e0e5cc", + "katex-grayA": "#fdfdfd", + "katex-grayB": "#f7f7f7", + "katex-grayC": "#eeeeee", + "katex-grayD": "#dddddd", + "katex-grayE": "#cccccc", + "katex-grayF": "#aaaaaa", + "katex-grayG": "#999999", + "katex-grayH": "#555555", + "katex-grayI": "#333333", + "katex-kaBlue": "#314453", + "katex-kaGreen": "#639b24", +}; + +/** + * Gets the CSS color of the current options object, accounting for the + * `colorMap`. + */ +Options.prototype.getColor = function() { + if (this.phantom) { + return "transparent"; + } else { + return colorMap[this.color] || this.color; + } +}; + +module.exports = Options; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/ParseError.js b/fastcdm/tokenize_latex/third_party/katex/src/ParseError.js new file mode 100644 index 0000000000000000000000000000000000000000..320f0bd6974d9aaeba3947e9a84c15611ec779d9 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/ParseError.js @@ -0,0 +1,40 @@ +/** + * This is the ParseError class, which is the main error thrown by KaTeX + * functions when something has gone wrong. This is used to distinguish internal + * errors from errors in the expression that the user provided. + */ +function ParseError(message, lexer, position) { + var error = "KaTeX parse error: " + message; + + if (lexer !== undefined && position !== undefined) { + // If we have the input and a position, make the error a bit fancier + + // Prepend some information + error += " at position " + position + ": "; + + // Get the input + var input = lexer._input; + // Insert a combining underscore at the correct position + input = input.slice(0, position) + "\u0332" + + input.slice(position); + + // Extract some context from the input and add it to the error + var begin = Math.max(0, position - 15); + var end = position + 15; + error += input.slice(begin, end); + } + + // Some hackery to make ParseError a prototype of Error + // See http://stackoverflow.com/a/8460753 + var self = new Error(error); + self.name = "ParseError"; + self.__proto__ = ParseError.prototype; + + self.position = position; + return self; +} + +// More hackery +ParseError.prototype.__proto__ = Error.prototype; + +module.exports = ParseError; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/Parser.js b/fastcdm/tokenize_latex/third_party/katex/src/Parser.js new file mode 100644 index 0000000000000000000000000000000000000000..efd45f03f26c20053fea8a879d60d14b9db3c5ab --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/Parser.js @@ -0,0 +1,798 @@ +/* eslint no-constant-condition:0 */ +var functions = require("./functions"); +var environments = require("./environments"); +var Lexer = require("./Lexer"); +var symbols = require("./symbols"); +var utils = require("./utils"); + +var parseData = require("./parseData"); +var ParseError = require("./ParseError"); + +global_str = "" + +/** + * This file contains the parser used to parse out a TeX expression from the + * input. Since TeX isn't context-free, standard parsers don't work particularly + * well. + * + * The strategy of this parser is as such: + * + * The main functions (the `.parse...` ones) take a position in the current + * parse string to parse tokens from. The lexer (found in Lexer.js, stored at + * this.lexer) also supports pulling out tokens at arbitrary places. When + * individual tokens are needed at a position, the lexer is called to pull out a + * token, which is then used. + * + * The parser has a property called "mode" indicating the mode that + * the parser is currently in. Currently it has to be one of "math" or + * "text", which denotes whether the current environment is a math-y + * one or a text-y one (e.g. inside \text). Currently, this serves to + * limit the functions which can be used in text mode. + * + * The main functions then return an object which contains the useful data that + * was parsed at its given point, and a new position at the end of the parsed + * data. The main functions can call each other and continue the parsing by + * using the returned position as a new starting point. + * + * There are also extra `.handle...` functions, which pull out some reused + * functionality into self-contained functions. + * + * The earlier functions return ParseNodes. + * The later functions (which are called deeper in the parse) sometimes return + * ParseFuncOrArgument, which contain a ParseNode as well as some data about + * whether the parsed object is a function which is missing some arguments, or a + * standalone object which can be used as an argument to another function. + */ + +/** + * Main Parser class + */ +function Parser(input, settings) { + // Make a new lexer + this.lexer = new Lexer(input); + // Store the settings for use in parsing + this.settings = settings; +} + +var ParseNode = parseData.ParseNode; + +/** + * An initial function (without its arguments), or an argument to a function. + * The `result` argument should be a ParseNode. + */ +function ParseFuncOrArgument(result, isFunction) { + this.result = result; + // Is this a function (i.e. is it something defined in functions.js)? + this.isFunction = isFunction; +} + +/** + * Checks a result to make sure it has the right type, and throws an + * appropriate error otherwise. + * + * @param {boolean=} consume whether to consume the expected token, + * defaults to true + */ +Parser.prototype.expect = function(text, consume) { + if (this.nextToken.text !== text) { + throw new ParseError( + "Expected '" + text + "', got '" + this.nextToken.text + "'", + this.lexer, this.nextToken.position + ); + } + if (consume !== false) { + this.consume(); + } +}; + +/** + * Considers the current look ahead token as consumed, + * and fetches the one after that as the new look ahead. + */ +Parser.prototype.consume = function() { + this.pos = this.nextToken.position; + + global_str = global_str + " " + this.nextToken.text + this.nextToken = this.lexer.lex(this.pos, this.mode); +}; + +/** + * Main parsing function, which parses an entire input. + * + * @return {?Array.} + */ +Parser.prototype.parse = function() { + // Try to parse the input + this.mode = "math"; + this.pos = 0; + this.nextToken = this.lexer.lex(this.pos, this.mode); + var parse = this.parseInput(); + return parse; +}; + +/** + * Parses an entire input tree. + */ +Parser.prototype.parseInput = function() { + // Parse an expression + var expression = this.parseExpression(false); + // If we succeeded, make sure there's an EOF at the end + this.expect("EOF", false); + return expression; +}; + +var endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"]; + +/** + * Parses an "expression", which is a list of atoms. + * + * @param {boolean} breakOnInfix Should the parsing stop when we hit infix + * nodes? This happens when functions have higher precendence + * than infix nodes in implicit parses. + * + * @param {?string} breakOnToken The token that the expression should end with, + * or `null` if something else should end the expression. + * + * @return {ParseNode} + */ +Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) { + var body = []; + // Keep adding atoms to the body until we can't parse any more atoms (either + // we reached the end, a }, or a \right) + while (true) { + var lex = this.nextToken; + var pos = this.pos; + if (endOfExpression.indexOf(lex.text) !== -1) { + break; + } + if (breakOnToken && lex.text === breakOnToken) { + break; + } + var atom = this.parseAtom(); + if (!atom) { + if (!this.settings.throwOnError && lex.text[0] === "\\") { + var errorNode = this.handleUnsupportedCmd(); + body.push(errorNode); + + pos = lex.position; + continue; + } + + break; + } + if (breakOnInfix && atom.type === "infix") { + // rewind so we can parse the infix atom again + this.pos = pos; + this.nextToken = lex; + break; + } + body.push(atom); + } + return this.handleInfixNodes(body); +}; + +/** + * Rewrites infix operators such as \over with corresponding commands such + * as \frac. + * + * There can only be one infix operator per group. If there's more than one + * then the expression is ambiguous. This can be resolved by adding {}. + * + * @returns {Array} + */ +Parser.prototype.handleInfixNodes = function(body) { + var overIndex = -1; + var funcName; + + for (var i = 0; i < body.length; i++) { + var node = body[i]; + if (node.type === "infix") { + if (overIndex !== -1) { + throw new ParseError("only one infix operator per group", + this.lexer, -1); + } + overIndex = i; + funcName = node.value.replaceWith; + } + } + + if (overIndex !== -1) { + var numerNode; + var denomNode; + + var numerBody = body.slice(0, overIndex); + var denomBody = body.slice(overIndex + 1); + + if (numerBody.length === 1 && numerBody[0].type === "ordgroup") { + numerNode = numerBody[0]; + } else { + numerNode = new ParseNode("ordgroup", numerBody, this.mode); + } + + if (denomBody.length === 1 && denomBody[0].type === "ordgroup") { + denomNode = denomBody[0]; + } else { + denomNode = new ParseNode("ordgroup", denomBody, this.mode); + } + + var value = this.callFunction( + funcName, [numerNode, denomNode], null); + return [new ParseNode(value.type, value, this.mode)]; + } else { + return body; + } +}; + +// The greediness of a superscript or subscript +var SUPSUB_GREEDINESS = 1; + +/** + * Handle a subscript or superscript with nice errors. + */ +Parser.prototype.handleSupSubscript = function(name) { + var symbol = this.nextToken.text; + var symPos = this.pos; + this.consume(); + var group = this.parseGroup(); + + if (!group) { + if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") { + return this.handleUnsupportedCmd(); + } else { + // throw new ParseError( + // "Expected group after '" + symbol + "'", + // this.lexer, + // symPos + 1 + // ); + } + } else if (group.isFunction) { + // ^ and _ have a greediness, so handle interactions with functions' + // greediness + var funcGreediness = functions[group.result].greediness; + if (funcGreediness > SUPSUB_GREEDINESS) { + return this.parseFunction(group); + } else { + throw new ParseError( + "Got function '" + group.result + "' with no arguments " + + "as " + name, + this.lexer, symPos + 1); + } + } else { + return group.result; + } +}; + +/** + * Converts the textual input of an unsupported command into a text node + * contained within a color node whose color is determined by errorColor + */ +Parser.prototype.handleUnsupportedCmd = function() { + var text = this.nextToken.text; + var textordArray = []; + + for (var i = 0; i < text.length; i++) { + textordArray.push(new ParseNode("textord", text[i], "text")); + } + + var textNode = new ParseNode( + "text", + { + body: textordArray, + type: "text", + }, + this.mode); + + var colorNode = new ParseNode( + "color", + { + color: this.settings.errorColor, + value: [textNode], + type: "color", + }, + this.mode); + + this.consume(); + return colorNode; +}; + +/** + * Parses a group with optional super/subscripts. + * + * @return {?ParseNode} + */ +Parser.prototype.parseAtom = function() { + // The body of an atom is an implicit group, so that things like + // \left(x\right)^2 work correctly. + var base = this.parseImplicitGroup(); + + // In text mode, we don't have superscripts or subscripts + if (this.mode === "text") { + return base; + } + + // Note that base may be empty (i.e. null) at this point. + + var superscript; + var subscript; + while (true) { + // Lex the first token + var lex = this.nextToken; + + if (lex.text === "\\limits" || lex.text === "\\nolimits") { + // We got a limit control + if (!base || base.type !== "op") { + throw new ParseError( + "Limit controls must follow a math operator", + this.lexer, this.pos); + } else { + var limits = lex.text === "\\limits"; + base.value.limits = limits; + base.value.alwaysHandleSupSub = true; + } + this.consume(); + } else if (lex.text === "^") { + // We got a superscript start + // if (superscript) { + // throw new ParseError( + // "Double superscript", this.lexer, this.pos); + // } + superscript = this.handleSupSubscript("superscript"); + } else if (lex.text === "_") { + // We got a subscript start + // if (subscript) { + // throw new ParseError( + // "Double subscript", this.lexer, this.pos); + // } + subscript = this.handleSupSubscript("subscript"); + } else if (lex.text === "'") { + // We got a prime + var prime = new ParseNode("textord", "\\prime", this.mode); + + // Many primes can be grouped together, so we handle this here + var primes = [prime]; + this.consume(); + // Keep lexing tokens until we get something that's not a prime + while (this.nextToken.text === "'") { + // For each one, add another prime to the list + primes.push(prime); + this.consume(); + } + // Put them into an ordgroup as the superscript + superscript = new ParseNode("ordgroup", primes, this.mode); + } else { + // If it wasn't ^, _, or ', stop parsing super/subscripts + break; + } + } + + if (superscript || subscript) { + // If we got either a superscript or subscript, create a supsub + return new ParseNode("supsub", { + base: base, + sup: superscript, + sub: subscript, + }, this.mode); + } else { + // Otherwise return the original body + return base; + } +}; + +// A list of the size-changing functions, for use in parseImplicitGroup +var sizeFuncs = [ + "\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize", + "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge", "\\textrm", "\\rm", "\\cal", + "\\bf", "\\siptstyle", "\\boldmath", "\\it" +]; + +// A list of the style-changing functions, for use in parseImplicitGroup +var styleFuncs = [ + "\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle", +]; + +/** + * Parses an implicit group, which is a group that starts at the end of a + * specified, and ends right before a higher explicit group ends, or at EOL. It + * is used for functions that appear to affect the current style, like \Large or + * \textrm, where instead of keeping a style we just pretend that there is an + * implicit grouping after it until the end of the group. E.g. + * small text {\Large large text} small text again + * It is also used for \left and \right to get the correct grouping. + * + * @return {?ParseNode} + */ +Parser.prototype.parseImplicitGroup = function() { + var start = this.parseSymbol(); + + if (start == null) { + // If we didn't get anything we handle, fall back to parseFunction + return this.parseFunction(); + } + + var func = start.result; + var body; + if (func === "\\left") { + // If we see a left: + // Parse the entire left function (including the delimiter) + var left = this.parseFunction(start); + // Parse out the implicit body + body = this.parseExpression(false); + // Check the next token + this.expect("\\right", false); + var right = this.parseFunction(); + return new ParseNode("leftright", { + body: body, + left: left.value.value, + right: right.value.value, + }, this.mode); + } else if (func === "\\begin") { + // begin...end is similar to left...right + var begin = this.parseFunction(start); + var envName = begin.value.name; + var name = (begin.value.name + "") + + global_str = global_str.substring(0, global_str.length - (name.length * 2 + 2)) + name + "}" + + if (!environments.hasOwnProperty(envName)) { + throw new ParseError( + "No such environment: " + envName, + this.lexer, begin.value.namepos); + } + // Build the environment object. Arguments and other information will + // be made available to the begin and end methods using properties. + var env = environments[envName]; + var args = this.parseArguments("\\begin{" + envName + "}", env); + var context = { + mode: this.mode, + envName: envName, + parser: this, + lexer: this.lexer, + positions: args.pop(), + }; + var result = env.handler(context, args); + this.expect("\\end", false); + var end = this.parseFunction(); + + var name = (begin.value.name + "") + + global_str = global_str.substring(0, global_str.length - (name.length * 2 + 2)) + name + "}" + if (end.value.name !== envName) { + throw new ParseError( + "Mismatch: \\begin{" + envName + "} matched " + + "by \\end{" + end.value.name + "}", + this.lexer /* , end.value.namepos */); + // TODO: Add position to the above line and adjust test case, + // requires #385 to get merged first + } + result.position = end.position; + + return result; + + } else if (func.value == "\\matrix" || func.value == "\\pmatrix" || func.value == "\\cases") { + // if (!environments.hasOwnProperty(envName)) { + // throw new ParseError( + // "No such environment: " + envName, + // this.lexer, begin.value.namepos); + // } + // Build the environment object. Arguments and other information will + // be made available to the begin and end methods using properties. + + envName = func.value.slice(1); + var env = environments[envName]; + // var args = this.parseArguments("\\matrix{", env); + this.expect("{", true); + var context = { + mode: this.mode, + envName: envName, + parser: this, + lexer: this.lexer + }; + + var result = env.handler(context, {} ); + // exit(); + this.expect("}", true); + // var end = this.parseFunction(); + var next = this.nextToken.text; + // exit(); + // console.log(next); + // var name = ( + "") + + // global_str = global_str.substring(0, global_str.length - (name.length * 2 + 2)) + name + "}" + // result.position = end.position; + + return result; + + } else if (utils.contains(sizeFuncs, func)) { + // If we see a sizing function, parse out the implict body + body = this.parseExpression(false); + + return new ParseNode("sizing", { + // Figure out what size to use based on the list of functions above + original: func, + size: "size" + (utils.indexOf(sizeFuncs, func) + 1), + value: body, + }, this.mode); + } else if (utils.contains(styleFuncs, func)) { + // If we see a styling function, parse out the implict body + body = this.parseExpression(true); + return new ParseNode("styling", { + // Figure out what style to use by pulling out the style from + // the function name + original: func, + style: func.slice(1, func.length - 5), + value: body, + }, this.mode); + } else { + // Defer to parseFunction if it's not a function we handle + return this.parseFunction(start); + } +}; + +/** + * Parses an entire function, including its base and all of its arguments. + * The base might either have been parsed already, in which case + * it is provided as an argument, or it's the next group in the input. + * + * @param {ParseFuncOrArgument=} baseGroup optional as described above + * @return {?ParseNode} + */ +Parser.prototype.parseFunction = function(baseGroup) { + if (!baseGroup) { + baseGroup = this.parseGroup(); + } + + if (baseGroup) { + if (baseGroup.isFunction) { + var func = baseGroup.result; + var funcData = functions[func]; + if (this.mode === "text" && !funcData.allowedInText) { + // throw new ParseError( + // "Can't use function '" + func + "' in text mode", + // this.lexer, baseGroup.position); + } + + var args = this.parseArguments(func, funcData); + var result = this.callFunction(func, args, args.pop()); + return new ParseNode(result.type, result, this.mode); + } else { + return baseGroup.result; + } + } else { + return null; + } +}; + +/** + * Call a function handler with a suitable context and arguments. + */ +Parser.prototype.callFunction = function(name, args, positions) { + var context = { + funcName: name, + parser: this, + lexer: this.lexer, + positions: positions, + }; + return functions[name].handler(context, args); +}; + +/** + * Parses the arguments of a function or environment + * + * @param {string} func "\name" or "\begin{name}" + * @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData + * @return the array of arguments, with the list of positions as last element + */ +Parser.prototype.parseArguments = function(func, funcData) { + var totalArgs = funcData.numArgs + funcData.numOptionalArgs; + if (totalArgs === 0) { + return [[this.pos]]; + } + + var baseGreediness = funcData.greediness; + var positions = [this.pos]; + var args = []; + + for (var i = 0; i < totalArgs; i++) { + var argType = funcData.argTypes && funcData.argTypes[i]; + var arg; + if (i < funcData.numOptionalArgs) { + if (argType) { + arg = this.parseSpecialGroup(argType, true); + } else { + arg = this.parseOptionalGroup(); + } + if (!arg) { + args.push(null); + positions.push(this.pos); + continue; + } + } else { + if (argType) { + arg = this.parseSpecialGroup(argType); + } else { + arg = this.parseGroup(); + } + if (!arg) { + if (!this.settings.throwOnError && + this.nextToken.text[0] === "\\") { + arg = new ParseFuncOrArgument( + this.handleUnsupportedCmd(this.nextToken.text), + false); + } else { + throw new ParseError( + "Expected group after '" + func + "'", + this.lexer, this.pos); + } + } + } + var argNode; + if (arg.isFunction) { + var argGreediness = + functions[arg.result].greediness; + if (argGreediness > baseGreediness) { + argNode = this.parseFunction(arg); + } else { + // throw new ParseError( + // "Got function '" + arg.result + "' as " + + // "argument to '" + func + "'", + // this.lexer, this.pos - 1); + } + } else { + argNode = arg.result; + } + args.push(argNode); + positions.push(this.pos); + } + + args.push(positions); + + return args; +}; + + +/** + * Parses a group when the mode is changing. Takes a position, a new mode, and + * an outer mode that is used to parse the outside. + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseSpecialGroup = function(innerMode, optional) { + var outerMode = this.mode; + // Handle `original` argTypes + if (innerMode === "original") { + innerMode = outerMode; + } + + if (innerMode === "color" || innerMode === "size") { + // color and size modes are special because they should have braces and + // should only lex a single symbol inside + var openBrace = this.nextToken; + if (optional && openBrace.text !== "[") { + // optional arguments should return null if they don't exist + return null; + } + // The call to expect will lex the token after the '{' in inner mode + this.mode = innerMode; + this.expect(optional ? "[" : "{"); + var inner = this.nextToken; + this.mode = outerMode; + var data; + if (innerMode === "color") { + data = inner.text; + } else { + data = inner.data; + } + this.consume(); // consume the token stored in inner + this.expect(optional ? "]" : "}"); + return new ParseFuncOrArgument( + new ParseNode(innerMode, data, outerMode), + false); + } else if (innerMode === "text") { + // text mode is special because it should ignore the whitespace before + // it + var whitespace = this.lexer.lex(this.pos, "whitespace"); + this.pos = whitespace.position; + } + + // By the time we get here, innerMode is one of "text" or "math". + // We switch the mode of the parser, recurse, then restore the old mode. + this.mode = innerMode; + this.nextToken = this.lexer.lex(this.pos, innerMode); + var res; + if (optional) { + res = this.parseOptionalGroup(); + } else { + res = this.parseGroup(); + } + this.mode = outerMode; + this.nextToken = this.lexer.lex(this.pos, outerMode); + return res; +}; + +/** + * Parses a group, which is either a single nucleus (like "x") or an expression + * in braces (like "{x+y}") + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseGroup = function() { + // Try to parse an open brace + if (this.nextToken.text === "{") { + // If we get a brace, parse an expression + this.consume(); + var expression = this.parseExpression(false); + // Make sure we get a close brace + this.expect("}"); + return new ParseFuncOrArgument( + new ParseNode("ordgroup", expression, this.mode), + false); + } else { + // Otherwise, just return a nucleus + return this.parseSymbol(); + } +}; + +/** + * Parses a group, which is an expression in brackets (like "[x+y]") + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseOptionalGroup = function() { + // Try to parse an open bracket + if (this.nextToken.text === "[") { + // If we get a brace, parse an expression + this.consume(); + var expression = this.parseExpression(false, "]"); + // Make sure we get a close bracket + this.expect("]"); + return new ParseFuncOrArgument( + new ParseNode("ordgroup", expression, this.mode), + false); + } else { + // Otherwise, return null, + return null; + } +}; + +/** + * Parse a single symbol out of the string. Here, we handle both the functions + * we have defined, as well as the single character symbols + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseSymbol = function() { + var nucleus = this.nextToken; + + if (functions[nucleus.text]) { + this.consume(); + // If there exists a function with this name, we return the function and + // say that it is a function. + return new ParseFuncOrArgument( + nucleus.text, + true); + } else if (symbols[this.mode][nucleus.text]) { + this.consume(); + // Otherwise if this is a no-argument function, find the type it + // corresponds to in the symbols map + return new ParseFuncOrArgument( + new ParseNode(symbols[this.mode][nucleus.text].group, + nucleus.text, this.mode), + false); + } else if (nucleus.text == "EOF" || nucleus.text == "{") { + return null; + + } else { + this.consume(); + // console.error(nucleus); + return new ParseFuncOrArgument( + new ParseNode(symbols["math"]["\\sigma"].group, + nucleus.text, this.mode), + false); + // console.log(nucleus.text); + // return null; + } +}; + +Parser.prototype.ParseNode = ParseNode; + +module.exports = Parser; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/Settings.js b/fastcdm/tokenize_latex/third_party/katex/src/Settings.js new file mode 100644 index 0000000000000000000000000000000000000000..6440145047ff5ff8a3954aa192785cf10902c7b6 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/Settings.js @@ -0,0 +1,28 @@ +/** + * This is a module for storing settings passed into KaTeX. It correctly handles + * default settings. + */ + +/** + * Helper function for getting a default value if the value is undefined + */ +function get(option, defaultValue) { + return option === undefined ? defaultValue : option; +} + +/** + * The main Settings object + * + * The current options stored are: + * - displayMode: Whether the expression should be typeset by default in + * textstyle or displaystyle (default false) + */ +function Settings(options) { + // allow null options + options = options || {}; + this.displayMode = get(options.displayMode, false); + this.throwOnError = get(options.throwOnError, true); + this.errorColor = get(options.errorColor, "#cc0000"); +} + +module.exports = Settings; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/Style.js b/fastcdm/tokenize_latex/third_party/katex/src/Style.js new file mode 100644 index 0000000000000000000000000000000000000000..10e5ef2cc98bc6dca710d58168a49071ce3a9ea2 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/Style.js @@ -0,0 +1,126 @@ +/** + * This file contains information and classes for the various kinds of styles + * used in TeX. It provides a generic `Style` class, which holds information + * about a specific style. It then provides instances of all the different kinds + * of styles possible, and provides functions to move between them and get + * information about them. + */ + +/** + * The main style class. Contains a unique id for the style, a size (which is + * the same for cramped and uncramped version of a style), a cramped flag, and a + * size multiplier, which gives the size difference between a style and + * textstyle. + */ +function Style(id, size, multiplier, cramped) { + this.id = id; + this.size = size; + this.cramped = cramped; + this.sizeMultiplier = multiplier; +} + +/** + * Get the style of a superscript given a base in the current style. + */ +Style.prototype.sup = function() { + return styles[sup[this.id]]; +}; + +/** + * Get the style of a subscript given a base in the current style. + */ +Style.prototype.sub = function() { + return styles[sub[this.id]]; +}; + +/** + * Get the style of a fraction numerator given the fraction in the current + * style. + */ +Style.prototype.fracNum = function() { + return styles[fracNum[this.id]]; +}; + +/** + * Get the style of a fraction denominator given the fraction in the current + * style. + */ +Style.prototype.fracDen = function() { + return styles[fracDen[this.id]]; +}; + +/** + * Get the cramped version of a style (in particular, cramping a cramped style + * doesn't change the style). + */ +Style.prototype.cramp = function() { + return styles[cramp[this.id]]; +}; + +/** + * HTML class name, like "displaystyle cramped" + */ +Style.prototype.cls = function() { + return sizeNames[this.size] + (this.cramped ? " cramped" : " uncramped"); +}; + +/** + * HTML Reset class name, like "reset-textstyle" + */ +Style.prototype.reset = function() { + return resetNames[this.size]; +}; + +// IDs of the different styles +var D = 0; +var Dc = 1; +var T = 2; +var Tc = 3; +var S = 4; +var Sc = 5; +var SS = 6; +var SSc = 7; + +// String names for the different sizes +var sizeNames = [ + "displaystyle textstyle", + "textstyle", + "scriptstyle", + "scriptscriptstyle", +]; + +// Reset names for the different sizes +var resetNames = [ + "reset-textstyle", + "reset-textstyle", + "reset-scriptstyle", + "reset-scriptscriptstyle", +]; + +// Instances of the different styles +var styles = [ + new Style(D, 0, 1.0, false), + new Style(Dc, 0, 1.0, true), + new Style(T, 1, 1.0, false), + new Style(Tc, 1, 1.0, true), + new Style(S, 2, 0.7, false), + new Style(Sc, 2, 0.7, true), + new Style(SS, 3, 0.5, false), + new Style(SSc, 3, 0.5, true), +]; + +// Lookup tables for switching from one style to another +var sup = [S, Sc, S, Sc, SS, SSc, SS, SSc]; +var sub = [Sc, Sc, Sc, Sc, SSc, SSc, SSc, SSc]; +var fracNum = [T, Tc, S, Sc, SS, SSc, SS, SSc]; +var fracDen = [Tc, Tc, Sc, Sc, SSc, SSc, SSc, SSc]; +var cramp = [Dc, Dc, Tc, Tc, Sc, Sc, SSc, SSc]; + +// We only export some of the styles. Also, we don't export the `Style` class so +// no more styles can be generated. +module.exports = { + DISPLAY: styles[D], + TEXT: styles[T], + SCRIPT: styles[S], + SCRIPTSCRIPT: styles[SS], +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/buildCommon.js b/fastcdm/tokenize_latex/third_party/katex/src/buildCommon.js new file mode 100644 index 0000000000000000000000000000000000000000..b60e1860a227ae840d1e591235e4673dc45d5c63 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/buildCommon.js @@ -0,0 +1,450 @@ +/* eslint no-console:0 */ +/** + * This module contains general functions that can be used for building + * different kinds of domTree nodes in a consistent manner. + */ + +var domTree = require("./domTree"); +var fontMetrics = require("./fontMetrics"); +var symbols = require("./symbols"); +var utils = require("./utils"); + +var greekCapitals = [ + "\\Gamma", + "\\Delta", + "\\Theta", + "\\Lambda", + "\\Xi", + "\\Pi", + "\\Sigma", + "\\Upsilon", + "\\Phi", + "\\Psi", + "\\Omega", +]; + +var dotlessLetters = [ + "\u0131", // dotless i, \imath + "\u0237", // dotless j, \jmath +]; + +/** + * Makes a symbolNode after translation via the list of symbols in symbols.js. + * Correctly pulls out metrics for the character, and optionally takes a list of + * classes to be attached to the node. + */ +var makeSymbol = function(value, style, mode, color, classes) { + // Replace the value with its replaced value from symbol.js + if (symbols[mode][value] && symbols[mode][value].replace) { + value = symbols[mode][value].replace; + } + + var metrics = fontMetrics.getCharacterMetrics(value, style); + + var symbolNode; + if (metrics) { + symbolNode = new domTree.symbolNode( + value, metrics.height, metrics.depth, metrics.italic, metrics.skew, + classes); + } else { + // TODO(emily): Figure out a good way to only print this in development + typeof console !== "undefined" && console.warn( + "No character metrics for '" + value + "' in style '" + + style + "'"); + symbolNode = new domTree.symbolNode(value, 0, 0, 0, 0, classes); + } + + if (color) { + symbolNode.style.color = color; + } + + return symbolNode; +}; + +/** + * Makes a symbol in Main-Regular or AMS-Regular. + * Used for rel, bin, open, close, inner, and punct. + */ +var mathsym = function(value, mode, color, classes) { + // Decide what font to render the symbol in by its entry in the symbols + // table. + // Have a special case for when the value = \ because the \ is used as a + // textord in unsupported command errors but cannot be parsed as a regular + // text ordinal and is therefore not present as a symbol in the symbols + // table for text + if (value === "\\" || symbols[mode][value].font === "main") { + return makeSymbol(value, "Main-Regular", mode, color, classes); + } else { + return makeSymbol( + value, "AMS-Regular", mode, color, classes.concat(["amsrm"])); + } +}; + +/** + * Makes a symbol in the default font for mathords and textords. + */ +var mathDefault = function(value, mode, color, classes, type) { + if (type === "mathord") { + return mathit(value, mode, color, classes); + } else if (type === "textord") { + return makeSymbol( + value, "Main-Regular", mode, color, classes.concat(["mathrm"])); + } else { + throw new Error("unexpected type: " + type + " in mathDefault"); + } +}; + +/** + * Makes a symbol in the italic math font. + */ +var mathit = function(value, mode, color, classes) { + if (/[0-9]/.test(value.charAt(0)) || + // glyphs for \imath and \jmath do not exist in Math-Italic so we + // need to use Main-Italic instead + utils.contains(dotlessLetters, value) || + utils.contains(greekCapitals, value)) { + return makeSymbol( + value, "Main-Italic", mode, color, classes.concat(["mainit"])); + } else { + return makeSymbol( + value, "Math-Italic", mode, color, classes.concat(["mathit"])); + } +}; + +/** + * Makes either a mathord or textord in the correct font and color. + */ +var makeOrd = function(group, options, type) { + var mode = group.mode; + var value = group.value; + if (symbols[mode][value] && symbols[mode][value].replace) { + value = symbols[mode][value].replace; + } + + var classes = ["mord"]; + var color = options.getColor(); + + var font = options.font; + if (font) { + if (font === "mathit" || utils.contains(dotlessLetters, value)) { + return mathit(value, mode, color, classes); + } else { + var fontName = fontMap[font].fontName; + if (fontMetrics.getCharacterMetrics(value, fontName)) { + return makeSymbol( + value, fontName, mode, color, classes.concat([font])); + } else { + return mathDefault(value, mode, color, classes, type); + } + } + } else { + return mathDefault(value, mode, color, classes, type); + } +}; + +/** + * Calculate the height, depth, and maxFontSize of an element based on its + * children. + */ +var sizeElementFromChildren = function(elem) { + var height = 0; + var depth = 0; + var maxFontSize = 0; + + if (elem.children) { + for (var i = 0; i < elem.children.length; i++) { + if (elem.children[i].height > height) { + height = elem.children[i].height; + } + if (elem.children[i].depth > depth) { + depth = elem.children[i].depth; + } + if (elem.children[i].maxFontSize > maxFontSize) { + maxFontSize = elem.children[i].maxFontSize; + } + } + } + + elem.height = height; + elem.depth = depth; + elem.maxFontSize = maxFontSize; +}; + +/** + * Makes a span with the given list of classes, list of children, and color. + */ +var makeSpan = function(classes, children, color) { + var span = new domTree.span(classes, children); + + sizeElementFromChildren(span); + + if (color) { + span.style.color = color; + } + + return span; +}; + +/** + * Makes a document fragment with the given list of children. + */ +var makeFragment = function(children) { + var fragment = new domTree.documentFragment(children); + + sizeElementFromChildren(fragment); + + return fragment; +}; + +/** + * Makes an element placed in each of the vlist elements to ensure that each + * element has the same max font size. To do this, we create a zero-width space + * with the correct font size. + */ +var makeFontSizer = function(options, fontSize) { + var fontSizeInner = makeSpan([], [new domTree.symbolNode("\u200b")]); + fontSizeInner.style.fontSize = + (fontSize / options.style.sizeMultiplier) + "em"; + + var fontSizer = makeSpan( + ["fontsize-ensurer", "reset-" + options.size, "size5"], + [fontSizeInner]); + + return fontSizer; +}; + +/** + * Makes a vertical list by stacking elements and kerns on top of each other. + * Allows for many different ways of specifying the positioning method. + * + * Arguments: + * - children: A list of child or kern nodes to be stacked on top of each other + * (i.e. the first element will be at the bottom, and the last at + * the top). Element nodes are specified as + * {type: "elem", elem: node} + * while kern nodes are specified as + * {type: "kern", size: size} + * - positionType: The method by which the vlist should be positioned. Valid + * values are: + * - "individualShift": The children list only contains elem + * nodes, and each node contains an extra + * "shift" value of how much it should be + * shifted (note that shifting is always + * moving downwards). positionData is + * ignored. + * - "top": The positionData specifies the topmost point of + * the vlist (note this is expected to be a height, + * so positive values move up) + * - "bottom": The positionData specifies the bottommost point + * of the vlist (note this is expected to be a + * depth, so positive values move down + * - "shift": The vlist will be positioned such that its + * baseline is positionData away from the baseline + * of the first child. Positive values move + * downwards. + * - "firstBaseline": The vlist will be positioned such that + * its baseline is aligned with the + * baseline of the first child. + * positionData is ignored. (this is + * equivalent to "shift" with + * positionData=0) + * - positionData: Data used in different ways depending on positionType + * - options: An Options object + * + */ +var makeVList = function(children, positionType, positionData, options) { + var depth; + var currPos; + var i; + if (positionType === "individualShift") { + var oldChildren = children; + children = [oldChildren[0]]; + + // Add in kerns to the list of children to get each element to be + // shifted to the correct specified shift + depth = -oldChildren[0].shift - oldChildren[0].elem.depth; + currPos = depth; + for (i = 1; i < oldChildren.length; i++) { + var diff = -oldChildren[i].shift - currPos - + oldChildren[i].elem.depth; + var size = diff - + (oldChildren[i - 1].elem.height + + oldChildren[i - 1].elem.depth); + + currPos = currPos + diff; + + children.push({type: "kern", size: size}); + children.push(oldChildren[i]); + } + } else if (positionType === "top") { + // We always start at the bottom, so calculate the bottom by adding up + // all the sizes + var bottom = positionData; + for (i = 0; i < children.length; i++) { + if (children[i].type === "kern") { + bottom -= children[i].size; + } else { + bottom -= children[i].elem.height + children[i].elem.depth; + } + } + depth = bottom; + } else if (positionType === "bottom") { + depth = -positionData; + } else if (positionType === "shift") { + depth = -children[0].elem.depth - positionData; + } else if (positionType === "firstBaseline") { + depth = -children[0].elem.depth; + } else { + depth = 0; + } + + // Make the fontSizer + var maxFontSize = 0; + for (i = 0; i < children.length; i++) { + if (children[i].type === "elem") { + maxFontSize = Math.max(maxFontSize, children[i].elem.maxFontSize); + } + } + var fontSizer = makeFontSizer(options, maxFontSize); + + // Create a new list of actual children at the correct offsets + var realChildren = []; + currPos = depth; + for (i = 0; i < children.length; i++) { + if (children[i].type === "kern") { + currPos += children[i].size; + } else { + var child = children[i].elem; + + var shift = -child.depth - currPos; + currPos += child.height + child.depth; + + var childWrap = makeSpan([], [fontSizer, child]); + childWrap.height -= shift; + childWrap.depth += shift; + childWrap.style.top = shift + "em"; + + realChildren.push(childWrap); + } + } + + // Add in an element at the end with no offset to fix the calculation of + // baselines in some browsers (namely IE, sometimes safari) + var baselineFix = makeSpan( + ["baseline-fix"], [fontSizer, new domTree.symbolNode("\u200b")]); + realChildren.push(baselineFix); + + var vlist = makeSpan(["vlist"], realChildren); + // Fix the final height and depth, in case there were kerns at the ends + // since the makeSpan calculation won't take that in to account. + vlist.height = Math.max(currPos, vlist.height); + vlist.depth = Math.max(-depth, vlist.depth); + return vlist; +}; + +// A table of size -> font size for the different sizing functions +var sizingMultiplier = { + size1: 0.5, + size2: 0.7, + size3: 0.8, + size4: 0.9, + size5: 1.0, + size6: 1.2, + size7: 1.44, + size8: 1.73, + size9: 2.07, + size10: 2.49, +}; + +// A map of spacing functions to their attributes, like size and corresponding +// CSS class +var spacingFunctions = { + "\\qquad": { + size: "2em", + className: "qquad", + }, + "\\quad": { + size: "1em", + className: "quad", + }, + "\\enspace": { + size: "0.5em", + className: "enspace", + }, + "\\;": { + size: "0.277778em", + className: "thickspace", + }, + "\\:": { + size: "0.22222em", + className: "mediumspace", + }, + "\\,": { + size: "0.16667em", + className: "thinspace", + }, + "\\!": { + size: "-0.16667em", + className: "negativethinspace", + }, +}; + +/** + * Maps TeX font commands to objects containing: + * - variant: string used for "mathvariant" attribute in buildMathML.js + * - fontName: the "style" parameter to fontMetrics.getCharacterMetrics + */ +// A map between tex font commands an MathML mathvariant attribute values +var fontMap = { + // styles + "mathbf": { + variant: "bold", + fontName: "Main-Bold", + }, + "mathrm": { + variant: "normal", + fontName: "Main-Regular", + }, + + // "mathit" is missing because it requires the use of two fonts: Main-Italic + // and Math-Italic. This is handled by a special case in makeOrd which ends + // up calling mathit. + + // families + "mathbb": { + variant: "double-struck", + fontName: "AMS-Regular", + }, + "mathcal": { + variant: "script", + fontName: "Caligraphic-Regular", + }, + "mathfrak": { + variant: "fraktur", + fontName: "Fraktur-Regular", + }, + "mathscr": { + variant: "script", + fontName: "Script-Regular", + }, + "mathsf": { + variant: "sans-serif", + fontName: "SansSerif-Regular", + }, + "mathtt": { + variant: "monospace", + fontName: "Typewriter-Regular", + }, +}; + +module.exports = { + fontMap: fontMap, + makeSymbol: makeSymbol, + mathsym: mathsym, + makeSpan: makeSpan, + makeFragment: makeFragment, + makeVList: makeVList, + makeOrd: makeOrd, + sizingMultiplier: sizingMultiplier, + spacingFunctions: spacingFunctions, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/buildHTML.js b/fastcdm/tokenize_latex/third_party/katex/src/buildHTML.js new file mode 100644 index 0000000000000000000000000000000000000000..42c33a6c4c7026a5970163923594412a36d7148b --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/buildHTML.js @@ -0,0 +1,1402 @@ +/* eslint no-console:0 */ +/** + * This file does the main work of building a domTree structure from a parse + * tree. The entry point is the `buildHTML` function, which takes a parse tree. + * Then, the buildExpression, buildGroup, and various groupTypes functions are + * called, to produce a final HTML tree. + */ + +var ParseError = require("./ParseError"); +var Style = require("./Style"); + +var buildCommon = require("./buildCommon"); +var delimiter = require("./delimiter"); +var domTree = require("./domTree"); +var fontMetrics = require("./fontMetrics"); +var utils = require("./utils"); + +var makeSpan = buildCommon.makeSpan; + +/** + * Take a list of nodes, build them in order, and return a list of the built + * nodes. This function handles the `prev` node correctly, and passes the + * previous element from the list as the prev of the next element. + */ +var buildExpression = function(expression, options, prev) { + var groups = []; + for (var i = 0; i < expression.length; i++) { + var group = expression[i]; + groups.push(buildGroup(group, options, prev)); + prev = group; + } + return groups; +}; + +// List of types used by getTypeOfGroup, +// see https://github.com/Khan/KaTeX/wiki/Examining-TeX#group-types +var groupToType = { + mathord: "mord", + textord: "mord", + bin: "mbin", + rel: "mrel", + text: "mord", + open: "mopen", + close: "mclose", + inner: "minner", + genfrac: "mord", + array: "mord", + spacing: "mord", + punct: "mpunct", + ordgroup: "mord", + op: "mop", + katex: "mord", + overline: "mord", + underline: "mord", + rule: "mord", + leftright: "minner", + sqrt: "mord", + accent: "mord", +}; + +/** + * Gets the final math type of an expression, given its group type. This type is + * used to determine spacing between elements, and affects bin elements by + * causing them to change depending on what types are around them. This type + * must be attached to the outermost node of an element as a CSS class so that + * spacing with its surrounding elements works correctly. + * + * Some elements can be mapped one-to-one from group type to math type, and + * those are listed in the `groupToType` table. + * + * Others (usually elements that wrap around other elements) often have + * recursive definitions, and thus call `getTypeOfGroup` on their inner + * elements. + */ +var getTypeOfGroup = function(group) { + if (group == null) { + // Like when typesetting $^3$ + return groupToType.mathord; + } else if (group.type === "supsub") { + return getTypeOfGroup(group.value.base); + } else if (group.type === "llap" || group.type === "rlap") { + return getTypeOfGroup(group.value); + } else if (group.type === "color") { + return getTypeOfGroup(group.value.value); + } else if (group.type === "sizing") { + return getTypeOfGroup(group.value.value); + } else if (group.type === "styling") { + return getTypeOfGroup(group.value.value); + } else if (group.type === "delimsizing") { + return groupToType[group.value.delimType]; + } else { + return groupToType[group.type]; + } +}; + +/** + * Sometimes, groups perform special rules when they have superscripts or + * subscripts attached to them. This function lets the `supsub` group know that + * its inner element should handle the superscripts and subscripts instead of + * handling them itself. + */ +var shouldHandleSupSub = function(group, options) { + if (!group) { + return false; + } else if (group.type === "op") { + // Operators handle supsubs differently when they have limits + // (e.g. `\displaystyle\sum_2^3`) + return group.value.limits && + (options.style.size === Style.DISPLAY.size || + group.value.alwaysHandleSupSub); + } else if (group.type === "accent") { + return isCharacterBox(group.value.base); + } else { + return null; + } +}; + +/** + * Sometimes we want to pull out the innermost element of a group. In most + * cases, this will just be the group itself, but when ordgroups and colors have + * a single element, we want to pull that out. + */ +var getBaseElem = function(group) { + if (!group) { + return false; + } else if (group.type === "ordgroup") { + if (group.value.length === 1) { + return getBaseElem(group.value[0]); + } else { + return group; + } + } else if (group.type === "color") { + if (group.value.value.length === 1) { + return getBaseElem(group.value.value[0]); + } else { + return group; + } + } else { + return group; + } +}; + +/** + * TeXbook algorithms often reference "character boxes", which are simply groups + * with a single character in them. To decide if something is a character box, + * we find its innermost group, and see if it is a single character. + */ +var isCharacterBox = function(group) { + var baseElem = getBaseElem(group); + + // These are all they types of groups which hold single characters + return baseElem.type === "mathord" || + baseElem.type === "textord" || + baseElem.type === "bin" || + baseElem.type === "rel" || + baseElem.type === "inner" || + baseElem.type === "open" || + baseElem.type === "close" || + baseElem.type === "punct"; +}; + +var makeNullDelimiter = function(options) { + return makeSpan([ + "sizing", "reset-" + options.size, "size5", + options.style.reset(), Style.TEXT.cls(), + "nulldelimiter", + ]); +}; + +/** + * This is a map of group types to the function used to handle that type. + * Simpler types come at the beginning, while complicated types come afterwards. + */ +var groupTypes = {}; + +groupTypes.mathord = function(group, options, prev) { + return buildCommon.makeOrd(group, options, "mathord"); +}; + +groupTypes.textord = function(group, options, prev) { + return buildCommon.makeOrd(group, options, "textord"); +}; + +groupTypes.bin = function(group, options, prev) { + var className = "mbin"; + // Pull out the most recent element. Do some special handling to find + // things at the end of a \color group. Note that we don't use the same + // logic for ordgroups (which count as ords). + var prevAtom = prev; + while (prevAtom && prevAtom.type === "color") { + var atoms = prevAtom.value.value; + prevAtom = atoms[atoms.length - 1]; + } + // See TeXbook pg. 442-446, Rules 5 and 6, and the text before Rule 19. + // Here, we determine whether the bin should turn into an ord. We + // currently only apply Rule 5. + if (!prev || utils.contains(["mbin", "mopen", "mrel", "mop", "mpunct"], + getTypeOfGroup(prevAtom))) { + group.type = "textord"; + className = "mord"; + } + + return buildCommon.mathsym( + group.value, group.mode, options.getColor(), [className]); +}; + +groupTypes.rel = function(group, options, prev) { + return buildCommon.mathsym( + group.value, group.mode, options.getColor(), ["mrel"]); +}; + +groupTypes.open = function(group, options, prev) { + return buildCommon.mathsym( + group.value, group.mode, options.getColor(), ["mopen"]); +}; + +groupTypes.close = function(group, options, prev) { + return buildCommon.mathsym( + group.value, group.mode, options.getColor(), ["mclose"]); +}; + +groupTypes.inner = function(group, options, prev) { + return buildCommon.mathsym( + group.value, group.mode, options.getColor(), ["minner"]); +}; + +groupTypes.punct = function(group, options, prev) { + return buildCommon.mathsym( + group.value, group.mode, options.getColor(), ["mpunct"]); +}; + +groupTypes.ordgroup = function(group, options, prev) { + return makeSpan( + ["mord", options.style.cls()], + buildExpression(group.value, options.reset()) + ); +}; + +groupTypes.text = function(group, options, prev) { + return makeSpan(["text", "mord", options.style.cls()], + buildExpression(group.value.body, options.reset())); +}; + +groupTypes.color = function(group, options, prev) { + var elements = buildExpression( + group.value.value, + options.withColor(group.value.color), + prev + ); + + // \color isn't supposed to affect the type of the elements it contains. + // To accomplish this, we wrap the results in a fragment, so the inner + // elements will be able to directly interact with their neighbors. For + // example, `\color{red}{2 +} 3` has the same spacing as `2 + 3` + return new buildCommon.makeFragment(elements); +}; + +groupTypes.supsub = function(group, options, prev) { + // Superscript and subscripts are handled in the TeXbook on page + // 445-446, rules 18(a-f). + + // Here is where we defer to the inner group if it should handle + // superscripts and subscripts itself. + if (shouldHandleSupSub(group.value.base, options)) { + return groupTypes[group.value.base.type](group, options, prev); + } + + var base = buildGroup(group.value.base, options.reset()); + var supmid; + var submid; + var sup; + var sub; + + if (group.value.sup) { + sup = buildGroup(group.value.sup, + options.withStyle(options.style.sup())); + supmid = makeSpan( + [options.style.reset(), options.style.sup().cls()], [sup]); + } + + if (group.value.sub) { + sub = buildGroup(group.value.sub, + options.withStyle(options.style.sub())); + submid = makeSpan( + [options.style.reset(), options.style.sub().cls()], [sub]); + } + + // Rule 18a + var supShift; + var subShift; + if (isCharacterBox(group.value.base)) { + supShift = 0; + subShift = 0; + } else { + supShift = base.height - fontMetrics.metrics.supDrop; + subShift = base.depth + fontMetrics.metrics.subDrop; + } + + // Rule 18c + var minSupShift; + if (options.style === Style.DISPLAY) { + minSupShift = fontMetrics.metrics.sup1; + } else if (options.style.cramped) { + minSupShift = fontMetrics.metrics.sup3; + } else { + minSupShift = fontMetrics.metrics.sup2; + } + + // scriptspace is a font-size-independent size, so scale it + // appropriately + var multiplier = Style.TEXT.sizeMultiplier * + options.style.sizeMultiplier; + var scriptspace = + (0.5 / fontMetrics.metrics.ptPerEm) / multiplier + "em"; + + var supsub; + if (!group.value.sup) { + // Rule 18b + subShift = Math.max( + subShift, fontMetrics.metrics.sub1, + sub.height - 0.8 * fontMetrics.metrics.xHeight); + + supsub = buildCommon.makeVList([ + {type: "elem", elem: submid}, + ], "shift", subShift, options); + + supsub.children[0].style.marginRight = scriptspace; + + // Subscripts shouldn't be shifted by the base's italic correction. + // Account for that by shifting the subscript back the appropriate + // amount. Note we only do this when the base is a single symbol. + if (base instanceof domTree.symbolNode) { + supsub.children[0].style.marginLeft = -base.italic + "em"; + } + } else if (!group.value.sub) { + // Rule 18c, d + supShift = Math.max(supShift, minSupShift, + sup.depth + 0.25 * fontMetrics.metrics.xHeight); + + supsub = buildCommon.makeVList([ + {type: "elem", elem: supmid}, + ], "shift", -supShift, options); + + supsub.children[0].style.marginRight = scriptspace; + } else { + supShift = Math.max( + supShift, minSupShift, + sup.depth + 0.25 * fontMetrics.metrics.xHeight); + subShift = Math.max(subShift, fontMetrics.metrics.sub2); + + var ruleWidth = fontMetrics.metrics.defaultRuleThickness; + + // Rule 18e + if ((supShift - sup.depth) - (sub.height - subShift) < + 4 * ruleWidth) { + subShift = 4 * ruleWidth - (supShift - sup.depth) + sub.height; + var psi = 0.8 * fontMetrics.metrics.xHeight - + (supShift - sup.depth); + if (psi > 0) { + supShift += psi; + subShift -= psi; + } + } + + supsub = buildCommon.makeVList([ + {type: "elem", elem: submid, shift: subShift}, + {type: "elem", elem: supmid, shift: -supShift}, + ], "individualShift", null, options); + + // See comment above about subscripts not being shifted + if (base instanceof domTree.symbolNode) { + supsub.children[0].style.marginLeft = -base.italic + "em"; + } + + supsub.children[0].style.marginRight = scriptspace; + supsub.children[1].style.marginRight = scriptspace; + } + + return makeSpan([getTypeOfGroup(group.value.base)], + [base, supsub]); +}; + +groupTypes.genfrac = function(group, options, prev) { + // Fractions are handled in the TeXbook on pages 444-445, rules 15(a-e). + // Figure out what style this fraction should be in based on the + // function used + var fstyle = options.style; + if (group.value.size === "display") { + fstyle = Style.DISPLAY; + } else if (group.value.size === "text") { + fstyle = Style.TEXT; + } + + var nstyle = fstyle.fracNum(); + var dstyle = fstyle.fracDen(); + + var numer = buildGroup(group.value.numer, options.withStyle(nstyle)); + var numerreset = makeSpan([fstyle.reset(), nstyle.cls()], [numer]); + + var denom = buildGroup(group.value.denom, options.withStyle(dstyle)); + var denomreset = makeSpan([fstyle.reset(), dstyle.cls()], [denom]); + + var ruleWidth; + if (group.value.hasBarLine) { + ruleWidth = fontMetrics.metrics.defaultRuleThickness / + options.style.sizeMultiplier; + } else { + ruleWidth = 0; + } + + // Rule 15b + var numShift; + var clearance; + var denomShift; + if (fstyle.size === Style.DISPLAY.size) { + numShift = fontMetrics.metrics.num1; + if (ruleWidth > 0) { + clearance = 3 * ruleWidth; + } else { + clearance = 7 * fontMetrics.metrics.defaultRuleThickness; + } + denomShift = fontMetrics.metrics.denom1; + } else { + if (ruleWidth > 0) { + numShift = fontMetrics.metrics.num2; + clearance = ruleWidth; + } else { + numShift = fontMetrics.metrics.num3; + clearance = 3 * fontMetrics.metrics.defaultRuleThickness; + } + denomShift = fontMetrics.metrics.denom2; + } + + var frac; + if (ruleWidth === 0) { + // Rule 15c + var candiateClearance = + (numShift - numer.depth) - (denom.height - denomShift); + if (candiateClearance < clearance) { + numShift += 0.5 * (clearance - candiateClearance); + denomShift += 0.5 * (clearance - candiateClearance); + } + + frac = buildCommon.makeVList([ + {type: "elem", elem: denomreset, shift: denomShift}, + {type: "elem", elem: numerreset, shift: -numShift}, + ], "individualShift", null, options); + } else { + // Rule 15d + var axisHeight = fontMetrics.metrics.axisHeight; + + if ((numShift - numer.depth) - (axisHeight + 0.5 * ruleWidth) < + clearance) { + numShift += + clearance - ((numShift - numer.depth) - + (axisHeight + 0.5 * ruleWidth)); + } + + if ((axisHeight - 0.5 * ruleWidth) - (denom.height - denomShift) < + clearance) { + denomShift += + clearance - ((axisHeight - 0.5 * ruleWidth) - + (denom.height - denomShift)); + } + + var mid = makeSpan( + [options.style.reset(), Style.TEXT.cls(), "frac-line"]); + // Manually set the height of the line because its height is + // created in CSS + mid.height = ruleWidth; + + var midShift = -(axisHeight - 0.5 * ruleWidth); + + frac = buildCommon.makeVList([ + {type: "elem", elem: denomreset, shift: denomShift}, + {type: "elem", elem: mid, shift: midShift}, + {type: "elem", elem: numerreset, shift: -numShift}, + ], "individualShift", null, options); + } + + // Since we manually change the style sometimes (with \dfrac or \tfrac), + // account for the possible size change here. + frac.height *= fstyle.sizeMultiplier / options.style.sizeMultiplier; + frac.depth *= fstyle.sizeMultiplier / options.style.sizeMultiplier; + + // Rule 15e + var delimSize; + if (fstyle.size === Style.DISPLAY.size) { + delimSize = fontMetrics.metrics.delim1; + } else { + delimSize = fontMetrics.metrics.getDelim2(fstyle); + } + + var leftDelim; + var rightDelim; + if (group.value.leftDelim == null) { + leftDelim = makeNullDelimiter(options); + } else { + leftDelim = delimiter.customSizedDelim( + group.value.leftDelim, delimSize, true, + options.withStyle(fstyle), group.mode); + } + if (group.value.rightDelim == null) { + rightDelim = makeNullDelimiter(options); + } else { + rightDelim = delimiter.customSizedDelim( + group.value.rightDelim, delimSize, true, + options.withStyle(fstyle), group.mode); + } + + return makeSpan( + ["mord", options.style.reset(), fstyle.cls()], + [leftDelim, makeSpan(["mfrac"], [frac]), rightDelim], + options.getColor()); +}; + +groupTypes.array = function(group, options, prev) { + var r; + var c; + var nr = group.value.body.length; + var nc = 0; + var body = new Array(nr); + + // Horizontal spacing + var pt = 1 / fontMetrics.metrics.ptPerEm; + var arraycolsep = 5 * pt; // \arraycolsep in article.cls + + // Vertical spacing + var baselineskip = 12 * pt; // see size10.clo + // Default \arraystretch from lttab.dtx + // TODO(gagern): may get redefined once we have user-defined macros + var arraystretch = utils.deflt(group.value.arraystretch, 1); + var arrayskip = arraystretch * baselineskip; + var arstrutHeight = 0.7 * arrayskip; // \strutbox in ltfsstrc.dtx and + var arstrutDepth = 0.3 * arrayskip; // \@arstrutbox in lttab.dtx + + var totalHeight = 0; + for (r = 0; r < group.value.body.length; ++r) { + var inrow = group.value.body[r]; + var height = arstrutHeight; // \@array adds an \@arstrut + var depth = arstrutDepth; // to each tow (via the template) + + if (nc < inrow.length) { + nc = inrow.length; + } + + var outrow = new Array(inrow.length); + for (c = 0; c < inrow.length; ++c) { + var elt = buildGroup(inrow[c], options); + if (depth < elt.depth) { + depth = elt.depth; + } + if (height < elt.height) { + height = elt.height; + } + outrow[c] = elt; + } + + var gap = 0; + if (group.value.rowGaps[r]) { + gap = group.value.rowGaps[r].value; + switch (gap.unit) { + case "em": + gap = gap.number; + break; + case "ex": + gap = gap.number * fontMetrics.metrics.emPerEx; + break; + default: + console.error("Can't handle unit " + gap.unit); + gap = 0; + } + if (gap > 0) { // \@argarraycr + gap += arstrutDepth; + if (depth < gap) { + depth = gap; // \@xargarraycr + } + gap = 0; + } + } + + outrow.height = height; + outrow.depth = depth; + totalHeight += height; + outrow.pos = totalHeight; + totalHeight += depth + gap; // \@yargarraycr + body[r] = outrow; + } + + var offset = totalHeight / 2 + fontMetrics.metrics.axisHeight; + var colDescriptions = group.value.cols || []; + var cols = []; + var colSep; + var colDescrNum; + for (c = 0, colDescrNum = 0; + // Continue while either there are more columns or more column + // descriptions, so trailing separators don't get lost. + c < nc || colDescrNum < colDescriptions.length; + ++c, ++colDescrNum) { + + var colDescr = colDescriptions[colDescrNum] || {}; + + var firstSeparator = true; + while (colDescr.type === "separator") { + // If there is more than one separator in a row, add a space + // between them. + if (!firstSeparator) { + colSep = makeSpan(["arraycolsep"], []); + colSep.style.width = + fontMetrics.metrics.doubleRuleSep + "em"; + cols.push(colSep); + } + + if (colDescr.separator === "|") { + var separator = makeSpan( + ["vertical-separator"], + []); + separator.style.height = totalHeight + "em"; + separator.style.verticalAlign = + -(totalHeight - offset) + "em"; + + cols.push(separator); + } else { + throw new ParseError( + "Invalid separator type: " + colDescr.separator); + } + + colDescrNum++; + colDescr = colDescriptions[colDescrNum] || {}; + firstSeparator = false; + } + + if (c >= nc) { + continue; + } + + var sepwidth; + if (c > 0 || group.value.hskipBeforeAndAfter) { + sepwidth = utils.deflt(colDescr.pregap, arraycolsep); + if (sepwidth !== 0) { + colSep = makeSpan(["arraycolsep"], []); + colSep.style.width = sepwidth + "em"; + cols.push(colSep); + } + } + + var col = []; + for (r = 0; r < nr; ++r) { + var row = body[r]; + var elem = row[c]; + if (!elem) { + continue; + } + var shift = row.pos - offset; + elem.depth = row.depth; + elem.height = row.height; + col.push({type: "elem", elem: elem, shift: shift}); + } + + col = buildCommon.makeVList(col, "individualShift", null, options); + col = makeSpan( + ["col-align-" + (colDescr.align || "c")], + [col]); + cols.push(col); + + if (c < nc - 1 || group.value.hskipBeforeAndAfter) { + sepwidth = utils.deflt(colDescr.postgap, arraycolsep); + if (sepwidth !== 0) { + colSep = makeSpan(["arraycolsep"], []); + colSep.style.width = sepwidth + "em"; + cols.push(colSep); + } + } + } + body = makeSpan(["mtable"], cols); + return makeSpan(["mord"], [body], options.getColor()); +}; + +groupTypes.spacing = function(group, options, prev) { + if (group.value === "\\ " || group.value === "\\space" || + group.value === " " || group.value === "~") { + // Spaces are generated by adding an actual space. Each of these + // things has an entry in the symbols table, so these will be turned + // into appropriate outputs. + return makeSpan( + ["mord", "mspace"], + [buildCommon.mathsym(group.value, group.mode)] + ); + } else { + // Other kinds of spaces are of arbitrary width. We use CSS to + // generate these. + return makeSpan( + ["mord", "mspace", + buildCommon.spacingFunctions[group.value].className]); + } +}; + +groupTypes.llap = function(group, options, prev) { + var inner = makeSpan( + ["inner"], [buildGroup(group.value.body, options.reset())]); + var fix = makeSpan(["fix"], []); + return makeSpan( + ["llap", options.style.cls()], [inner, fix]); +}; + +groupTypes.rlap = function(group, options, prev) { + var inner = makeSpan( + ["inner"], [buildGroup(group.value.body, options.reset())]); + var fix = makeSpan(["fix"], []); + return makeSpan( + ["rlap", options.style.cls()], [inner, fix]); +}; + +groupTypes.op = function(group, options, prev) { + // Operators are handled in the TeXbook pg. 443-444, rule 13(a). + var supGroup; + var subGroup; + var hasLimits = false; + if (group.type === "supsub" ) { + // If we have limits, supsub will pass us its group to handle. Pull + // out the superscript and subscript and set the group to the op in + // its base. + supGroup = group.value.sup; + subGroup = group.value.sub; + group = group.value.base; + hasLimits = true; + } + + // Most operators have a large successor symbol, but these don't. + var noSuccessor = [ + "\\smallint", + ]; + + var large = false; + if (options.style.size === Style.DISPLAY.size && + group.value.symbol && + !utils.contains(noSuccessor, group.value.body)) { + + // Most symbol operators get larger in displaystyle (rule 13) + large = true; + } + + var base; + var baseShift = 0; + var slant = 0; + if (group.value.symbol) { + // If this is a symbol, create the symbol. + var style = large ? "Size2-Regular" : "Size1-Regular"; + base = buildCommon.makeSymbol( + group.value.body, style, "math", options.getColor(), + ["op-symbol", large ? "large-op" : "small-op", "mop"]); + + // Shift the symbol so its center lies on the axis (rule 13). It + // appears that our fonts have the centers of the symbols already + // almost on the axis, so these numbers are very small. Note we + // don't actually apply this here, but instead it is used either in + // the vlist creation or separately when there are no limits. + baseShift = (base.height - base.depth) / 2 - + fontMetrics.metrics.axisHeight * + options.style.sizeMultiplier; + + // The slant of the symbol is just its italic correction. + slant = base.italic; + } else { + // Otherwise, this is a text operator. Build the text from the + // operator's name. + // TODO(emily): Add a space in the middle of some of these + // operators, like \limsup + var output = []; + for (var i = 1; i < group.value.body.length; i++) { + output.push(buildCommon.mathsym(group.value.body[i], group.mode)); + } + base = makeSpan(["mop"], output, options.getColor()); + } + + if (hasLimits) { + // IE 8 clips \int if it is in a display: inline-block. We wrap it + // in a new span so it is an inline, and works. + base = makeSpan([], [base]); + + var supmid; + var supKern; + var submid; + var subKern; + // We manually have to handle the superscripts and subscripts. This, + // aside from the kern calculations, is copied from supsub. + if (supGroup) { + var sup = buildGroup( + supGroup, options.withStyle(options.style.sup())); + supmid = makeSpan( + [options.style.reset(), options.style.sup().cls()], [sup]); + + supKern = Math.max( + fontMetrics.metrics.bigOpSpacing1, + fontMetrics.metrics.bigOpSpacing3 - sup.depth); + } + + if (subGroup) { + var sub = buildGroup( + subGroup, options.withStyle(options.style.sub())); + submid = makeSpan( + [options.style.reset(), options.style.sub().cls()], + [sub]); + + subKern = Math.max( + fontMetrics.metrics.bigOpSpacing2, + fontMetrics.metrics.bigOpSpacing4 - sub.height); + } + + // Build the final group as a vlist of the possible subscript, base, + // and possible superscript. + var finalGroup; + var top; + var bottom; + if (!supGroup) { + top = base.height - baseShift; + + finalGroup = buildCommon.makeVList([ + {type: "kern", size: fontMetrics.metrics.bigOpSpacing5}, + {type: "elem", elem: submid}, + {type: "kern", size: subKern}, + {type: "elem", elem: base}, + ], "top", top, options); + + // Here, we shift the limits by the slant of the symbol. Note + // that we are supposed to shift the limits by 1/2 of the slant, + // but since we are centering the limits adding a full slant of + // margin will shift by 1/2 that. + finalGroup.children[0].style.marginLeft = -slant + "em"; + } else if (!subGroup) { + bottom = base.depth + baseShift; + + finalGroup = buildCommon.makeVList([ + {type: "elem", elem: base}, + {type: "kern", size: supKern}, + {type: "elem", elem: supmid}, + {type: "kern", size: fontMetrics.metrics.bigOpSpacing5}, + ], "bottom", bottom, options); + + // See comment above about slants + finalGroup.children[1].style.marginLeft = slant + "em"; + } else if (!supGroup && !subGroup) { + // This case probably shouldn't occur (this would mean the + // supsub was sending us a group with no superscript or + // subscript) but be safe. + return base; + } else { + bottom = fontMetrics.metrics.bigOpSpacing5 + + submid.height + submid.depth + + subKern + + base.depth + baseShift; + + finalGroup = buildCommon.makeVList([ + {type: "kern", size: fontMetrics.metrics.bigOpSpacing5}, + {type: "elem", elem: submid}, + {type: "kern", size: subKern}, + {type: "elem", elem: base}, + {type: "kern", size: supKern}, + {type: "elem", elem: supmid}, + {type: "kern", size: fontMetrics.metrics.bigOpSpacing5}, + ], "bottom", bottom, options); + + // See comment above about slants + finalGroup.children[0].style.marginLeft = -slant + "em"; + finalGroup.children[2].style.marginLeft = slant + "em"; + } + + return makeSpan(["mop", "op-limits"], [finalGroup]); + } else { + if (group.value.symbol) { + base.style.top = baseShift + "em"; + } + + return base; + } +}; + +groupTypes.katex = function(group, options, prev) { + // The KaTeX logo. The offsets for the K and a were chosen to look + // good, but the offsets for the T, E, and X were taken from the + // definition of \TeX in TeX (see TeXbook pg. 356) + var k = makeSpan( + ["k"], [buildCommon.mathsym("K", group.mode)]); + var a = makeSpan( + ["a"], [buildCommon.mathsym("A", group.mode)]); + + a.height = (a.height + 0.2) * 0.75; + a.depth = (a.height - 0.2) * 0.75; + + var t = makeSpan( + ["t"], [buildCommon.mathsym("T", group.mode)]); + var e = makeSpan( + ["e"], [buildCommon.mathsym("E", group.mode)]); + + e.height = (e.height - 0.2155); + e.depth = (e.depth + 0.2155); + + var x = makeSpan( + ["x"], [buildCommon.mathsym("X", group.mode)]); + + return makeSpan( + ["katex-logo", "mord"], [k, a, t, e, x], options.getColor()); +}; + +groupTypes.overline = function(group, options, prev) { + // Overlines are handled in the TeXbook pg 443, Rule 9. + + // Build the inner group in the cramped style. + var innerGroup = buildGroup(group.value.body, + options.withStyle(options.style.cramp())); + + var ruleWidth = fontMetrics.metrics.defaultRuleThickness / + options.style.sizeMultiplier; + + // Create the line above the body + var line = makeSpan( + [options.style.reset(), Style.TEXT.cls(), "overline-line"]); + line.height = ruleWidth; + line.maxFontSize = 1.0; + + // Generate the vlist, with the appropriate kerns + var vlist = buildCommon.makeVList([ + {type: "elem", elem: innerGroup}, + {type: "kern", size: 3 * ruleWidth}, + {type: "elem", elem: line}, + {type: "kern", size: ruleWidth}, + ], "firstBaseline", null, options); + + return makeSpan(["overline", "mord"], [vlist], options.getColor()); +}; + +groupTypes.underline = function(group, options, prev) { + // Underlines are handled in the TeXbook pg 443, Rule 10. + + // Build the inner group. + var innerGroup = buildGroup(group.value.body, options); + + var ruleWidth = fontMetrics.metrics.defaultRuleThickness / + options.style.sizeMultiplier; + + // Create the line above the body + var line = makeSpan( + [options.style.reset(), Style.TEXT.cls(), "underline-line"]); + line.height = ruleWidth; + line.maxFontSize = 1.0; + + // Generate the vlist, with the appropriate kerns + var vlist = buildCommon.makeVList([ + {type: "kern", size: ruleWidth}, + {type: "elem", elem: line}, + {type: "kern", size: 3 * ruleWidth}, + {type: "elem", elem: innerGroup}, + ], "top", innerGroup.height, options); + + return makeSpan(["underline", "mord"], [vlist], options.getColor()); +}; + +groupTypes.sqrt = function(group, options, prev) { + // Square roots are handled in the TeXbook pg. 443, Rule 11. + + // First, we do the same steps as in overline to build the inner group + // and line + var inner = buildGroup(group.value.body, + options.withStyle(options.style.cramp())); + + var ruleWidth = fontMetrics.metrics.defaultRuleThickness / + options.style.sizeMultiplier; + + var line = makeSpan( + [options.style.reset(), Style.TEXT.cls(), "sqrt-line"], [], + options.getColor()); + line.height = ruleWidth; + line.maxFontSize = 1.0; + + var phi = ruleWidth; + if (options.style.id < Style.TEXT.id) { + phi = fontMetrics.metrics.xHeight; + } + + // Calculate the clearance between the body and line + var lineClearance = ruleWidth + phi / 4; + + var innerHeight = + (inner.height + inner.depth) * options.style.sizeMultiplier; + var minDelimiterHeight = innerHeight + lineClearance + ruleWidth; + + // Create a \surd delimiter of the required minimum size + var delim = makeSpan(["sqrt-sign"], [ + delimiter.customSizedDelim("\\surd", minDelimiterHeight, + false, options, group.mode)], + options.getColor()); + + var delimDepth = (delim.height + delim.depth) - ruleWidth; + + // Adjust the clearance based on the delimiter size + if (delimDepth > inner.height + inner.depth + lineClearance) { + lineClearance = + (lineClearance + delimDepth - inner.height - inner.depth) / 2; + } + + // Shift the delimiter so that its top lines up with the top of the line + var delimShift = -(inner.height + lineClearance + ruleWidth) + delim.height; + delim.style.top = delimShift + "em"; + delim.height -= delimShift; + delim.depth += delimShift; + + // We add a special case here, because even when `inner` is empty, we + // still get a line. So, we use a simple heuristic to decide if we + // should omit the body entirely. (note this doesn't work for something + // like `\sqrt{\rlap{x}}`, but if someone is doing that they deserve for + // it not to work. + var body; + if (inner.height === 0 && inner.depth === 0) { + body = makeSpan(); + } else { + body = buildCommon.makeVList([ + {type: "elem", elem: inner}, + {type: "kern", size: lineClearance}, + {type: "elem", elem: line}, + {type: "kern", size: ruleWidth}, + ], "firstBaseline", null, options); + } + + if (!group.value.index) { + return makeSpan(["sqrt", "mord"], [delim, body]); + } else { + // Handle the optional root index + + // The index is always in scriptscript style + var root = buildGroup( + group.value.index, + options.withStyle(Style.SCRIPTSCRIPT)); + var rootWrap = makeSpan( + [options.style.reset(), Style.SCRIPTSCRIPT.cls()], + [root]); + + // Figure out the height and depth of the inner part + var innerRootHeight = Math.max(delim.height, body.height); + var innerRootDepth = Math.max(delim.depth, body.depth); + + // The amount the index is shifted by. This is taken from the TeX + // source, in the definition of `\r@@t`. + var toShift = 0.6 * (innerRootHeight - innerRootDepth); + + // Build a VList with the superscript shifted up correctly + var rootVList = buildCommon.makeVList( + [{type: "elem", elem: rootWrap}], + "shift", -toShift, options); + // Add a class surrounding it so we can add on the appropriate + // kerning + var rootVListWrap = makeSpan(["root"], [rootVList]); + + return makeSpan(["sqrt", "mord"], [rootVListWrap, delim, body]); + } +}; + +groupTypes.sizing = function(group, options, prev) { + // Handle sizing operators like \Huge. Real TeX doesn't actually allow + // these functions inside of math expressions, so we do some special + // handling. + var inner = buildExpression(group.value.value, + options.withSize(group.value.size), prev); + + var span = makeSpan(["mord"], + [makeSpan(["sizing", "reset-" + options.size, group.value.size, + options.style.cls()], + inner)]); + + // Calculate the correct maxFontSize manually + var fontSize = buildCommon.sizingMultiplier[group.value.size]; + span.maxFontSize = fontSize * options.style.sizeMultiplier; + + return span; +}; + +groupTypes.styling = function(group, options, prev) { + // Style changes are handled in the TeXbook on pg. 442, Rule 3. + + // Figure out what style we're changing to. + var style = { + "display": Style.DISPLAY, + "text": Style.TEXT, + "script": Style.SCRIPT, + "scriptscript": Style.SCRIPTSCRIPT, + }; + + var newStyle = style[group.value.style]; + + // Build the inner expression in the new style. + var inner = buildExpression( + group.value.value, options.withStyle(newStyle), prev); + + return makeSpan([options.style.reset(), newStyle.cls()], inner); +}; + +groupTypes.font = function(group, options, prev) { + var font = group.value.font; + return buildGroup(group.value.body, options.withFont(font), prev); +}; + +groupTypes.delimsizing = function(group, options, prev) { + var delim = group.value.value; + + if (delim === ".") { + // Empty delimiters still count as elements, even though they don't + // show anything. + return makeSpan([groupToType[group.value.delimType]]); + } + + // Use delimiter.sizedDelim to generate the delimiter. + return makeSpan( + [groupToType[group.value.delimType]], + [delimiter.sizedDelim( + delim, group.value.size, options, group.mode)]); +}; + +groupTypes.leftright = function(group, options, prev) { + // Build the inner expression + var inner = buildExpression(group.value.body, options.reset()); + + var innerHeight = 0; + var innerDepth = 0; + + // Calculate its height and depth + for (var i = 0; i < inner.length; i++) { + innerHeight = Math.max(inner[i].height, innerHeight); + innerDepth = Math.max(inner[i].depth, innerDepth); + } + + // The size of delimiters is the same, regardless of what style we are + // in. Thus, to correctly calculate the size of delimiter we need around + // a group, we scale down the inner size based on the size. + innerHeight *= options.style.sizeMultiplier; + innerDepth *= options.style.sizeMultiplier; + + var leftDelim; + if (group.value.left === ".") { + // Empty delimiters in \left and \right make null delimiter spaces. + leftDelim = makeNullDelimiter(options); + } else { + // Otherwise, use leftRightDelim to generate the correct sized + // delimiter. + leftDelim = delimiter.leftRightDelim( + group.value.left, innerHeight, innerDepth, options, + group.mode); + } + // Add it to the beginning of the expression + inner.unshift(leftDelim); + + var rightDelim; + // Same for the right delimiter + if (group.value.right === ".") { + rightDelim = makeNullDelimiter(options); + } else { + rightDelim = delimiter.leftRightDelim( + group.value.right, innerHeight, innerDepth, options, + group.mode); + } + // Add it to the end of the expression. + inner.push(rightDelim); + + return makeSpan( + ["minner", options.style.cls()], inner, options.getColor()); +}; + +groupTypes.rule = function(group, options, prev) { + // Make an empty span for the rule + var rule = makeSpan(["mord", "rule"], [], options.getColor()); + + // Calculate the shift, width, and height of the rule, and account for units + var shift = 0; + if (group.value.shift) { + shift = group.value.shift.number; + if (group.value.shift.unit === "ex") { + shift *= fontMetrics.metrics.xHeight; + } + } + + var width = group.value.width.number; + if (group.value.width.unit === "ex") { + width *= fontMetrics.metrics.xHeight; + } + + var height = group.value.height.number; + if (group.value.height.unit === "ex") { + height *= fontMetrics.metrics.xHeight; + } + + // The sizes of rules are absolute, so make it larger if we are in a + // smaller style. + shift /= options.style.sizeMultiplier; + width /= options.style.sizeMultiplier; + height /= options.style.sizeMultiplier; + + // Style the rule to the right size + rule.style.borderRightWidth = width + "em"; + rule.style.borderTopWidth = height + "em"; + rule.style.bottom = shift + "em"; + + // Record the height and width + rule.width = width; + rule.height = height + shift; + rule.depth = -shift; + + return rule; +}; + +groupTypes.accent = function(group, options, prev) { + // Accents are handled in the TeXbook pg. 443, rule 12. + var base = group.value.base; + + var supsubGroup; + if (group.type === "supsub") { + // If our base is a character box, and we have superscripts and + // subscripts, the supsub will defer to us. In particular, we want + // to attach the superscripts and subscripts to the inner body (so + // that the position of the superscripts and subscripts won't be + // affected by the height of the accent). We accomplish this by + // sticking the base of the accent into the base of the supsub, and + // rendering that, while keeping track of where the accent is. + + // The supsub group is the group that was passed in + var supsub = group; + // The real accent group is the base of the supsub group + group = supsub.value.base; + // The character box is the base of the accent group + base = group.value.base; + // Stick the character box into the base of the supsub group + supsub.value.base = base; + + // Rerender the supsub group with its new base, and store that + // result. + supsubGroup = buildGroup( + supsub, options.reset(), prev); + } + + // Build the base group + var body = buildGroup( + base, options.withStyle(options.style.cramp())); + + // Calculate the skew of the accent. This is based on the line "If the + // nucleus is not a single character, let s = 0; otherwise set s to the + // kern amount for the nucleus followed by the \skewchar of its font." + // Note that our skew metrics are just the kern between each character + // and the skewchar. + var skew; + if (isCharacterBox(base)) { + // If the base is a character box, then we want the skew of the + // innermost character. To do that, we find the innermost character: + var baseChar = getBaseElem(base); + // Then, we render its group to get the symbol inside it + var baseGroup = buildGroup( + baseChar, options.withStyle(options.style.cramp())); + // Finally, we pull the skew off of the symbol. + skew = baseGroup.skew; + // Note that we now throw away baseGroup, because the layers we + // removed with getBaseElem might contain things like \color which + // we can't get rid of. + // TODO(emily): Find a better way to get the skew + } else { + skew = 0; + } + + // calculate the amount of space between the body and the accent + var clearance = Math.min(body.height, fontMetrics.metrics.xHeight); + + // Build the accent + var accent = buildCommon.makeSymbol( + group.value.accent, "Main-Regular", "math", options.getColor()); + // Remove the italic correction of the accent, because it only serves to + // shift the accent over to a place we don't want. + accent.italic = 0; + + // The \vec character that the fonts use is a combining character, and + // thus shows up much too far to the left. To account for this, we add a + // specific class which shifts the accent over to where we want it. + // TODO(emily): Fix this in a better way, like by changing the font + var vecClass = group.value.accent === "\\vec" ? "accent-vec" : null; + + var accentBody = makeSpan(["accent-body", vecClass], [ + makeSpan([], [accent])]); + + accentBody = buildCommon.makeVList([ + {type: "elem", elem: body}, + {type: "kern", size: -clearance}, + {type: "elem", elem: accentBody}, + ], "firstBaseline", null, options); + + // Shift the accent over by the skew. Note we shift by twice the skew + // because we are centering the accent, so by adding 2*skew to the left, + // we shift it to the right by 1*skew. + accentBody.children[1].style.marginLeft = 2 * skew + "em"; + + var accentWrap = makeSpan(["mord", "accent"], [accentBody]); + + if (supsubGroup) { + // Here, we replace the "base" child of the supsub with our newly + // generated accent. + supsubGroup.children[0] = accentWrap; + + // Since we don't rerun the height calculation after replacing the + // accent, we manually recalculate height. + supsubGroup.height = Math.max(accentWrap.height, supsubGroup.height); + + // Accents should always be ords, even when their innards are not. + supsubGroup.classes[0] = "mord"; + + return supsubGroup; + } else { + return accentWrap; + } +}; + +groupTypes.phantom = function(group, options, prev) { + var elements = buildExpression( + group.value.value, + options.withPhantom(), + prev + ); + + // \phantom isn't supposed to affect the elements it contains. + // See "color" for more details. + return new buildCommon.makeFragment(elements); +}; + +/** + * buildGroup is the function that takes a group and calls the correct groupType + * function for it. It also handles the interaction of size and style changes + * between parents and children. + */ +var buildGroup = function(group, options, prev) { + if (!group) { + return makeSpan(); + } + + if (groupTypes[group.type]) { + // Call the groupTypes function + var groupNode = groupTypes[group.type](group, options, prev); + var multiplier; + + // If the style changed between the parent and the current group, + // account for the size difference + if (options.style !== options.parentStyle) { + multiplier = options.style.sizeMultiplier / + options.parentStyle.sizeMultiplier; + + groupNode.height *= multiplier; + groupNode.depth *= multiplier; + } + + // If the size changed between the parent and the current group, account + // for that size difference. + if (options.size !== options.parentSize) { + multiplier = buildCommon.sizingMultiplier[options.size] / + buildCommon.sizingMultiplier[options.parentSize]; + + groupNode.height *= multiplier; + groupNode.depth *= multiplier; + } + + return groupNode; + } else { + throw new ParseError( + "Got group of unknown type: '" + group.type + "'"); + } +}; + +/** + * Take an entire parse tree, and build it into an appropriate set of HTML + * nodes. + */ +var buildHTML = function(tree, options) { + // buildExpression is destructive, so we need to make a clone + // of the incoming tree so that it isn't accidentally changed + tree = JSON.parse(JSON.stringify(tree)); + + // Build the expression contained in the tree + var expression = buildExpression(tree, options); + var body = makeSpan(["base", options.style.cls()], expression); + + // Add struts, which ensure that the top of the HTML element falls at the + // height of the expression, and the bottom of the HTML element falls at the + // depth of the expression. + var topStrut = makeSpan(["strut"]); + var bottomStrut = makeSpan(["strut", "bottom"]); + + topStrut.style.height = body.height + "em"; + bottomStrut.style.height = (body.height + body.depth) + "em"; + // We'd like to use `vertical-align: top` but in IE 9 this lowers the + // baseline of the box to the bottom of this strut (instead staying in the + // normal place) so we use an absolute value for vertical-align instead + bottomStrut.style.verticalAlign = -body.depth + "em"; + + // Wrap the struts and body together + var htmlNode = makeSpan(["katex-html"], [topStrut, bottomStrut, body]); + + htmlNode.setAttribute("aria-hidden", "true"); + + return htmlNode; +}; + +module.exports = buildHTML; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/buildMathML.js b/fastcdm/tokenize_latex/third_party/katex/src/buildMathML.js new file mode 100644 index 0000000000000000000000000000000000000000..7bf38e86ab56117e05322652eb6ece77cfd3055e --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/buildMathML.js @@ -0,0 +1,533 @@ +/** + * This file converts a parse tree into a cooresponding MathML tree. The main + * entry point is the `buildMathML` function, which takes a parse tree from the + * parser. + */ + +var buildCommon = require("./buildCommon"); +var fontMetrics = require("./fontMetrics"); +var mathMLTree = require("./mathMLTree"); +var ParseError = require("./ParseError"); +var symbols = require("./symbols"); +var utils = require("./utils"); + +var makeSpan = buildCommon.makeSpan; +var fontMap = buildCommon.fontMap; + +/** + * Takes a symbol and converts it into a MathML text node after performing + * optional replacement from symbols.js. + */ +var makeText = function(text, mode) { + if (symbols[mode][text] && symbols[mode][text].replace) { + text = symbols[mode][text].replace; + } + + return new mathMLTree.TextNode(text); +}; + +/** + * Returns the math variant as a string or null if none is required. + */ +var getVariant = function(group, options) { + var font = options.font; + if (!font) { + return null; + } + + var mode = group.mode; + if (font === "mathit") { + return "italic"; + } + + var value = group.value; + if (utils.contains(["\\imath", "\\jmath"], value)) { + return null; + } + + if (symbols[mode][value] && symbols[mode][value].replace) { + value = symbols[mode][value].replace; + } + + var fontName = fontMap[font].fontName; + if (fontMetrics.getCharacterMetrics(value, fontName)) { + return fontMap[options.font].variant; + } + + return null; +}; + +/** + * Functions for handling the different types of groups found in the parse + * tree. Each function should take a parse group and return a MathML node. + */ +var groupTypes = {}; + +groupTypes.mathord = function(group, options) { + var node = new mathMLTree.MathNode( + "mi", + [makeText(group.value, group.mode)]); + + var variant = getVariant(group, options); + if (variant) { + node.setAttribute("mathvariant", variant); + } + return node; +}; + +groupTypes.textord = function(group, options) { + var text = makeText(group.value, group.mode); + + var variant = getVariant(group, options) || "normal"; + + var node; + if (/[0-9]/.test(group.value)) { + // TODO(kevinb) merge adjacent nodes + // do it as a post processing step + node = new mathMLTree.MathNode("mn", [text]); + if (options.font) { + node.setAttribute("mathvariant", variant); + } + } else { + node = new mathMLTree.MathNode("mi", [text]); + node.setAttribute("mathvariant", variant); + } + + return node; +}; + +groupTypes.bin = function(group) { + var node = new mathMLTree.MathNode( + "mo", [makeText(group.value, group.mode)]); + + return node; +}; + +groupTypes.rel = function(group) { + var node = new mathMLTree.MathNode( + "mo", [makeText(group.value, group.mode)]); + + return node; +}; + +groupTypes.open = function(group) { + var node = new mathMLTree.MathNode( + "mo", [makeText(group.value, group.mode)]); + + return node; +}; + +groupTypes.close = function(group) { + var node = new mathMLTree.MathNode( + "mo", [makeText(group.value, group.mode)]); + + return node; +}; + +groupTypes.inner = function(group) { + var node = new mathMLTree.MathNode( + "mo", [makeText(group.value, group.mode)]); + + return node; +}; + +groupTypes.punct = function(group) { + var node = new mathMLTree.MathNode( + "mo", [makeText(group.value, group.mode)]); + + node.setAttribute("separator", "true"); + + return node; +}; + +groupTypes.ordgroup = function(group, options) { + var inner = buildExpression(group.value, options); + + var node = new mathMLTree.MathNode("mrow", inner); + + return node; +}; + +groupTypes.text = function(group, options) { + var inner = buildExpression(group.value.body, options); + + var node = new mathMLTree.MathNode("mtext", inner); + + return node; +}; + +groupTypes.color = function(group, options) { + var inner = buildExpression(group.value.value, options); + + var node = new mathMLTree.MathNode("mstyle", inner); + + node.setAttribute("mathcolor", group.value.color); + + return node; +}; + +groupTypes.supsub = function(group, options) { + var children = [buildGroup(group.value.base, options)]; + + if (group.value.sub) { + children.push(buildGroup(group.value.sub, options)); + } + + if (group.value.sup) { + children.push(buildGroup(group.value.sup, options)); + } + + var nodeType; + if (!group.value.sub) { + nodeType = "msup"; + } else if (!group.value.sup) { + nodeType = "msub"; + } else { + nodeType = "msubsup"; + } + + var node = new mathMLTree.MathNode(nodeType, children); + + return node; +}; + +groupTypes.genfrac = function(group, options) { + var node = new mathMLTree.MathNode( + "mfrac", + [buildGroup(group.value.numer, options), + buildGroup(group.value.denom, options)]); + + if (!group.value.hasBarLine) { + node.setAttribute("linethickness", "0px"); + } + + if (group.value.leftDelim != null || group.value.rightDelim != null) { + var withDelims = []; + + if (group.value.leftDelim != null) { + var leftOp = new mathMLTree.MathNode( + "mo", [new mathMLTree.TextNode(group.value.leftDelim)]); + + leftOp.setAttribute("fence", "true"); + + withDelims.push(leftOp); + } + + withDelims.push(node); + + if (group.value.rightDelim != null) { + var rightOp = new mathMLTree.MathNode( + "mo", [new mathMLTree.TextNode(group.value.rightDelim)]); + + rightOp.setAttribute("fence", "true"); + + withDelims.push(rightOp); + } + + var outerNode = new mathMLTree.MathNode("mrow", withDelims); + + return outerNode; + } + + return node; +}; + +groupTypes.array = function(group, options) { + return new mathMLTree.MathNode( + "mtable", group.value.body.map(function(row) { + return new mathMLTree.MathNode( + "mtr", row.map(function(cell) { + return new mathMLTree.MathNode( + "mtd", [buildGroup(cell, options)]); + })); + })); +}; + +groupTypes.sqrt = function(group, options) { + var node; + if (group.value.index) { + node = new mathMLTree.MathNode( + "mroot", [ + buildGroup(group.value.body, options), + buildGroup(group.value.index, options), + ]); + } else { + node = new mathMLTree.MathNode( + "msqrt", [buildGroup(group.value.body, options)]); + } + + return node; +}; + +groupTypes.leftright = function(group, options) { + var inner = buildExpression(group.value.body, options); + + if (group.value.left !== ".") { + var leftNode = new mathMLTree.MathNode( + "mo", [makeText(group.value.left, group.mode)]); + + leftNode.setAttribute("fence", "true"); + + inner.unshift(leftNode); + } + + if (group.value.right !== ".") { + var rightNode = new mathMLTree.MathNode( + "mo", [makeText(group.value.right, group.mode)]); + + rightNode.setAttribute("fence", "true"); + + inner.push(rightNode); + } + + var outerNode = new mathMLTree.MathNode("mrow", inner); + + return outerNode; +}; + +groupTypes.accent = function(group, options) { + var accentNode = new mathMLTree.MathNode( + "mo", [makeText(group.value.accent, group.mode)]); + + var node = new mathMLTree.MathNode( + "mover", + [buildGroup(group.value.base, options), + accentNode]); + + node.setAttribute("accent", "true"); + + return node; +}; + +groupTypes.spacing = function(group) { + var node; + + if (group.value === "\\ " || group.value === "\\space" || + group.value === " " || group.value === "~") { + node = new mathMLTree.MathNode( + "mtext", [new mathMLTree.TextNode("\u00a0")]); + } else { + node = new mathMLTree.MathNode("mspace"); + + node.setAttribute( + "width", buildCommon.spacingFunctions[group.value].size); + } + + return node; +}; + +groupTypes.op = function(group) { + var node; + + // TODO(emily): handle big operators using the `largeop` attribute + + if (group.value.symbol) { + // This is a symbol. Just add the symbol. + node = new mathMLTree.MathNode( + "mo", [makeText(group.value.body, group.mode)]); + } else { + // This is a text operator. Add all of the characters from the + // operator's name. + // TODO(emily): Add a space in the middle of some of these + // operators, like \limsup. + node = new mathMLTree.MathNode( + "mi", [new mathMLTree.TextNode(group.value.body.slice(1))]); + } + + return node; +}; + +groupTypes.katex = function(group) { + var node = new mathMLTree.MathNode( + "mtext", [new mathMLTree.TextNode("KaTeX")]); + + return node; +}; + +groupTypes.font = function(group, options) { + var font = group.value.font; + return buildGroup(group.value.body, options.withFont(font)); +}; + +groupTypes.delimsizing = function(group) { + var children = []; + + if (group.value.value !== ".") { + children.push(makeText(group.value.value, group.mode)); + } + + var node = new mathMLTree.MathNode("mo", children); + + if (group.value.delimType === "open" || + group.value.delimType === "close") { + // Only some of the delimsizing functions act as fences, and they + // return "open" or "close" delimTypes. + node.setAttribute("fence", "true"); + } else { + // Explicitly disable fencing if it's not a fence, to override the + // defaults. + node.setAttribute("fence", "false"); + } + + return node; +}; + +groupTypes.styling = function(group, options) { + var inner = buildExpression(group.value.value, options); + + var node = new mathMLTree.MathNode("mstyle", inner); + + var styleAttributes = { + "display": ["0", "true"], + "text": ["0", "false"], + "script": ["1", "false"], + "scriptscript": ["2", "false"], + }; + + var attr = styleAttributes[group.value.style]; + + node.setAttribute("scriptlevel", attr[0]); + node.setAttribute("displaystyle", attr[1]); + + return node; +}; + +groupTypes.sizing = function(group, options) { + var inner = buildExpression(group.value.value, options); + + var node = new mathMLTree.MathNode("mstyle", inner); + + // TODO(emily): This doesn't produce the correct size for nested size + // changes, because we don't keep state of what style we're currently + // in, so we can't reset the size to normal before changing it. Now + // that we're passing an options parameter we should be able to fix + // this. + node.setAttribute( + "mathsize", buildCommon.sizingMultiplier[group.value.size] + "em"); + + return node; +}; + +groupTypes.overline = function(group, options) { + var operator = new mathMLTree.MathNode( + "mo", [new mathMLTree.TextNode("\u203e")]); + operator.setAttribute("stretchy", "true"); + + var node = new mathMLTree.MathNode( + "mover", + [buildGroup(group.value.body, options), + operator]); + node.setAttribute("accent", "true"); + + return node; +}; + +groupTypes.underline = function(group, options) { + var operator = new mathMLTree.MathNode( + "mo", [new mathMLTree.TextNode("\u203e")]); + operator.setAttribute("stretchy", "true"); + + var node = new mathMLTree.MathNode( + "munder", + [buildGroup(group.value.body, options), + operator]); + node.setAttribute("accentunder", "true"); + + return node; +}; + +groupTypes.rule = function(group) { + // TODO(emily): Figure out if there's an actual way to draw black boxes + // in MathML. + var node = new mathMLTree.MathNode("mrow"); + + return node; +}; + +groupTypes.llap = function(group, options) { + var node = new mathMLTree.MathNode( + "mpadded", [buildGroup(group.value.body, options)]); + + node.setAttribute("lspace", "-1width"); + node.setAttribute("width", "0px"); + + return node; +}; + +groupTypes.rlap = function(group, options) { + var node = new mathMLTree.MathNode( + "mpadded", [buildGroup(group.value.body, options)]); + + node.setAttribute("width", "0px"); + + return node; +}; + +groupTypes.phantom = function(group, options, prev) { + var inner = buildExpression(group.value.value, options); + return new mathMLTree.MathNode("mphantom", inner); +}; + +/** + * Takes a list of nodes, builds them, and returns a list of the generated + * MathML nodes. A little simpler than the HTML version because we don't do any + * previous-node handling. + */ +var buildExpression = function(expression, options) { + var groups = []; + for (var i = 0; i < expression.length; i++) { + var group = expression[i]; + groups.push(buildGroup(group, options)); + } + return groups; +}; + +/** + * Takes a group from the parser and calls the appropriate groupTypes function + * on it to produce a MathML node. + */ +var buildGroup = function(group, options) { + if (!group) { + return new mathMLTree.MathNode("mrow"); + } + + if (groupTypes[group.type]) { + // Call the groupTypes function + return groupTypes[group.type](group, options); + } else { + throw new ParseError( + "Got group of unknown type: '" + group.type + "'"); + } +}; + +/** + * Takes a full parse tree and settings and builds a MathML representation of + * it. In particular, we put the elements from building the parse tree into a + * tag so we can also include that TeX source as an annotation. + * + * Note that we actually return a domTree element with a `` inside it so + * we can do appropriate styling. + */ +var buildMathML = function(tree, texExpression, options) { + var expression = buildExpression(tree, options); + + // Wrap up the expression in an mrow so it is presented in the semantics + // tag correctly. + var wrapper = new mathMLTree.MathNode("mrow", expression); + + // Build a TeX annotation of the source + var annotation = new mathMLTree.MathNode( + "annotation", [new mathMLTree.TextNode(texExpression)]); + + annotation.setAttribute("encoding", "application/x-tex"); + + var semantics = new mathMLTree.MathNode( + "semantics", [wrapper, annotation]); + + var math = new mathMLTree.MathNode("math", [semantics]); + + // You can't style nodes, so we wrap the node in a span. + return makeSpan(["katex-mathml"], [math]); +}; + +module.exports = buildMathML; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/buildTree.js b/fastcdm/tokenize_latex/third_party/katex/src/buildTree.js new file mode 100644 index 0000000000000000000000000000000000000000..4a8c2aeda5b7176e5780b8c4d3ac32b133a99829 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/buildTree.js @@ -0,0 +1,40 @@ +var buildHTML = require("./buildHTML"); +var buildMathML = require("./buildMathML"); +var buildCommon = require("./buildCommon"); +var Options = require("./Options"); +var Settings = require("./Settings"); +var Style = require("./Style"); + +var makeSpan = buildCommon.makeSpan; + +var buildTree = function(tree, expression, settings) { + settings = settings || new Settings({}); + + var startStyle = Style.TEXT; + if (settings.displayMode) { + startStyle = Style.DISPLAY; + } + + // Setup the default options + var options = new Options({ + style: startStyle, + size: "size5", + }); + + // `buildHTML` sometimes messes with the parse tree (like turning bins -> + // ords), so we build the MathML version first. + var mathMLNode = buildMathML(tree, expression, options); + var htmlNode = buildHTML(tree, options); + + var katexNode = makeSpan(["katex"], [ + mathMLNode, htmlNode, + ]); + + if (settings.displayMode) { + return makeSpan(["katex-display"], [katexNode]); + } else { + return katexNode; + } +}; + +module.exports = buildTree; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/delimiter.js b/fastcdm/tokenize_latex/third_party/katex/src/delimiter.js new file mode 100644 index 0000000000000000000000000000000000000000..168319d41f21b528c02a8c24fdfb2d1e4da20955 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/delimiter.js @@ -0,0 +1,542 @@ +/** + * This file deals with creating delimiters of various sizes. The TeXbook + * discusses these routines on page 441-442, in the "Another subroutine sets box + * x to a specified variable delimiter" paragraph. + * + * There are three main routines here. `makeSmallDelim` makes a delimiter in the + * normal font, but in either text, script, or scriptscript style. + * `makeLargeDelim` makes a delimiter in textstyle, but in one of the Size1, + * Size2, Size3, or Size4 fonts. `makeStackedDelim` makes a delimiter out of + * smaller pieces that are stacked on top of one another. + * + * The functions take a parameter `center`, which determines if the delimiter + * should be centered around the axis. + * + * Then, there are three exposed functions. `sizedDelim` makes a delimiter in + * one of the given sizes. This is used for things like `\bigl`. + * `customSizedDelim` makes a delimiter with a given total height+depth. It is + * called in places like `\sqrt`. `leftRightDelim` makes an appropriate + * delimiter which surrounds an expression of a given height an depth. It is + * used in `\left` and `\right`. + */ + +var ParseError = require("./ParseError"); +var Style = require("./Style"); + +var buildCommon = require("./buildCommon"); +var fontMetrics = require("./fontMetrics"); +var symbols = require("./symbols"); +var utils = require("./utils"); + +var makeSpan = buildCommon.makeSpan; + +/** + * Get the metrics for a given symbol and font, after transformation (i.e. + * after following replacement from symbols.js) + */ +var getMetrics = function(symbol, font) { + if (symbols.math[symbol] && symbols.math[symbol].replace) { + return fontMetrics.getCharacterMetrics( + symbols.math[symbol].replace, font); + } else { + return fontMetrics.getCharacterMetrics( + symbol, font); + } +}; + +/** + * Builds a symbol in the given font size (note size is an integer) + */ +var mathrmSize = function(value, size, mode) { + return buildCommon.makeSymbol(value, "Size" + size + "-Regular", mode); +}; + +/** + * Puts a delimiter span in a given style, and adds appropriate height, depth, + * and maxFontSizes. + */ +var styleWrap = function(delim, toStyle, options) { + var span = makeSpan( + ["style-wrap", options.style.reset(), toStyle.cls()], [delim]); + + var multiplier = toStyle.sizeMultiplier / options.style.sizeMultiplier; + + span.height *= multiplier; + span.depth *= multiplier; + span.maxFontSize = toStyle.sizeMultiplier; + + return span; +}; + +/** + * Makes a small delimiter. This is a delimiter that comes in the Main-Regular + * font, but is restyled to either be in textstyle, scriptstyle, or + * scriptscriptstyle. + */ +var makeSmallDelim = function(delim, style, center, options, mode) { + var text = buildCommon.makeSymbol(delim, "Main-Regular", mode); + + var span = styleWrap(text, style, options); + + if (center) { + var shift = + (1 - options.style.sizeMultiplier / style.sizeMultiplier) * + fontMetrics.metrics.axisHeight; + + span.style.top = shift + "em"; + span.height -= shift; + span.depth += shift; + } + + return span; +}; + +/** + * Makes a large delimiter. This is a delimiter that comes in the Size1, Size2, + * Size3, or Size4 fonts. It is always rendered in textstyle. + */ +var makeLargeDelim = function(delim, size, center, options, mode) { + var inner = mathrmSize(delim, size, mode); + + var span = styleWrap( + makeSpan(["delimsizing", "size" + size], + [inner], options.getColor()), + Style.TEXT, options); + + if (center) { + var shift = (1 - options.style.sizeMultiplier) * + fontMetrics.metrics.axisHeight; + + span.style.top = shift + "em"; + span.height -= shift; + span.depth += shift; + } + + return span; +}; + +/** + * Make an inner span with the given offset and in the given font. This is used + * in `makeStackedDelim` to make the stacking pieces for the delimiter. + */ +var makeInner = function(symbol, font, mode) { + var sizeClass; + // Apply the correct CSS class to choose the right font. + if (font === "Size1-Regular") { + sizeClass = "delim-size1"; + } else if (font === "Size4-Regular") { + sizeClass = "delim-size4"; + } + + var inner = makeSpan( + ["delimsizinginner", sizeClass], + [makeSpan([], [buildCommon.makeSymbol(symbol, font, mode)])]); + + // Since this will be passed into `makeVList` in the end, wrap the element + // in the appropriate tag that VList uses. + return {type: "elem", elem: inner}; +}; + +/** + * Make a stacked delimiter out of a given delimiter, with the total height at + * least `heightTotal`. This routine is mentioned on page 442 of the TeXbook. + */ +var makeStackedDelim = function(delim, heightTotal, center, options, mode) { + // There are four parts, the top, an optional middle, a repeated part, and a + // bottom. + var top; + var middle; + var repeat; + var bottom; + top = repeat = bottom = delim; + middle = null; + // Also keep track of what font the delimiters are in + var font = "Size1-Regular"; + + // We set the parts and font based on the symbol. Note that we use + // '\u23d0' instead of '|' and '\u2016' instead of '\\|' for the + // repeats of the arrows + if (delim === "\\uparrow") { + repeat = bottom = "\u23d0"; + } else if (delim === "\\Uparrow") { + repeat = bottom = "\u2016"; + } else if (delim === "\\downarrow") { + top = repeat = "\u23d0"; + } else if (delim === "\\Downarrow") { + top = repeat = "\u2016"; + } else if (delim === "\\updownarrow") { + top = "\\uparrow"; + repeat = "\u23d0"; + bottom = "\\downarrow"; + } else if (delim === "\\Updownarrow") { + top = "\\Uparrow"; + repeat = "\u2016"; + bottom = "\\Downarrow"; + } else if (delim === "[" || delim === "\\lbrack") { + top = "\u23a1"; + repeat = "\u23a2"; + bottom = "\u23a3"; + font = "Size4-Regular"; + } else if (delim === "]" || delim === "\\rbrack") { + top = "\u23a4"; + repeat = "\u23a5"; + bottom = "\u23a6"; + font = "Size4-Regular"; + } else if (delim === "\\lfloor") { + repeat = top = "\u23a2"; + bottom = "\u23a3"; + font = "Size4-Regular"; + } else if (delim === "\\lceil") { + top = "\u23a1"; + repeat = bottom = "\u23a2"; + font = "Size4-Regular"; + } else if (delim === "\\rfloor") { + repeat = top = "\u23a5"; + bottom = "\u23a6"; + font = "Size4-Regular"; + } else if (delim === "\\rceil") { + top = "\u23a4"; + repeat = bottom = "\u23a5"; + font = "Size4-Regular"; + } else if (delim === "(") { + top = "\u239b"; + repeat = "\u239c"; + bottom = "\u239d"; + font = "Size4-Regular"; + } else if (delim === ")") { + top = "\u239e"; + repeat = "\u239f"; + bottom = "\u23a0"; + font = "Size4-Regular"; + } else if (delim === "\\{" || delim === "\\lbrace") { + top = "\u23a7"; + middle = "\u23a8"; + bottom = "\u23a9"; + repeat = "\u23aa"; + font = "Size4-Regular"; + } else if (delim === "\\}" || delim === "\\rbrace") { + top = "\u23ab"; + middle = "\u23ac"; + bottom = "\u23ad"; + repeat = "\u23aa"; + font = "Size4-Regular"; + } else if (delim === "\\lgroup") { + top = "\u23a7"; + bottom = "\u23a9"; + repeat = "\u23aa"; + font = "Size4-Regular"; + } else if (delim === "\\rgroup") { + top = "\u23ab"; + bottom = "\u23ad"; + repeat = "\u23aa"; + font = "Size4-Regular"; + } else if (delim === "\\lmoustache") { + top = "\u23a7"; + bottom = "\u23ad"; + repeat = "\u23aa"; + font = "Size4-Regular"; + } else if (delim === "\\rmoustache") { + top = "\u23ab"; + bottom = "\u23a9"; + repeat = "\u23aa"; + font = "Size4-Regular"; + } else if (delim === "\\surd") { + top = "\ue001"; + bottom = "\u23b7"; + repeat = "\ue000"; + font = "Size4-Regular"; + } + + // Get the metrics of the four sections + var topMetrics = getMetrics(top, font); + var topHeightTotal = topMetrics.height + topMetrics.depth; + var repeatMetrics = getMetrics(repeat, font); + var repeatHeightTotal = repeatMetrics.height + repeatMetrics.depth; + var bottomMetrics = getMetrics(bottom, font); + var bottomHeightTotal = bottomMetrics.height + bottomMetrics.depth; + var middleHeightTotal = 0; + var middleFactor = 1; + if (middle !== null) { + var middleMetrics = getMetrics(middle, font); + middleHeightTotal = middleMetrics.height + middleMetrics.depth; + middleFactor = 2; // repeat symmetrically above and below middle + } + + // Calcuate the minimal height that the delimiter can have. + // It is at least the size of the top, bottom, and optional middle combined. + var minHeight = topHeightTotal + bottomHeightTotal + middleHeightTotal; + + // Compute the number of copies of the repeat symbol we will need + var repeatCount = Math.ceil( + (heightTotal - minHeight) / (middleFactor * repeatHeightTotal)); + + // Compute the total height of the delimiter including all the symbols + var realHeightTotal = + minHeight + repeatCount * middleFactor * repeatHeightTotal; + + // The center of the delimiter is placed at the center of the axis. Note + // that in this context, "center" means that the delimiter should be + // centered around the axis in the current style, while normally it is + // centered around the axis in textstyle. + var axisHeight = fontMetrics.metrics.axisHeight; + if (center) { + axisHeight *= options.style.sizeMultiplier; + } + // Calculate the depth + var depth = realHeightTotal / 2 - axisHeight; + + // Now, we start building the pieces that will go into the vlist + + // Keep a list of the inner pieces + var inners = []; + + // Add the bottom symbol + inners.push(makeInner(bottom, font, mode)); + + var i; + if (middle === null) { + // Add that many symbols + for (i = 0; i < repeatCount; i++) { + inners.push(makeInner(repeat, font, mode)); + } + } else { + // When there is a middle bit, we need the middle part and two repeated + // sections + for (i = 0; i < repeatCount; i++) { + inners.push(makeInner(repeat, font, mode)); + } + inners.push(makeInner(middle, font, mode)); + for (i = 0; i < repeatCount; i++) { + inners.push(makeInner(repeat, font, mode)); + } + } + + // Add the top symbol + inners.push(makeInner(top, font, mode)); + + // Finally, build the vlist + var inner = buildCommon.makeVList(inners, "bottom", depth, options); + + return styleWrap( + makeSpan(["delimsizing", "mult"], [inner], options.getColor()), + Style.TEXT, options); +}; + +// There are three kinds of delimiters, delimiters that stack when they become +// too large +var stackLargeDelimiters = [ + "(", ")", "[", "\\lbrack", "]", "\\rbrack", + "\\{", "\\lbrace", "\\}", "\\rbrace", + "\\lfloor", "\\rfloor", "\\lceil", "\\rceil", + "\\surd", +]; + +// delimiters that always stack +var stackAlwaysDelimiters = [ + "\\uparrow", "\\downarrow", "\\updownarrow", + "\\Uparrow", "\\Downarrow", "\\Updownarrow", + "|", "\\|", "\\vert", "\\Vert", + "\\lvert", "\\rvert", "\\lVert", "\\rVert", + "\\lgroup", "\\rgroup", "\\lmoustache", "\\rmoustache", +]; + +// and delimiters that never stack +var stackNeverDelimiters = [ + "<", ">", "\\langle", "\\rangle", "/", "\\backslash", "\\lt", "\\gt", +]; + +// Metrics of the different sizes. Found by looking at TeX's output of +// $\bigl| // \Bigl| \biggl| \Biggl| \showlists$ +// Used to create stacked delimiters of appropriate sizes in makeSizedDelim. +var sizeToMaxHeight = [0, 1.2, 1.8, 2.4, 3.0]; + +/** + * Used to create a delimiter of a specific size, where `size` is 1, 2, 3, or 4. + */ +var makeSizedDelim = function(delim, size, options, mode) { + // < and > turn into \langle and \rangle in delimiters + if (delim === "<" || delim === "\\lt") { + delim = "\\langle"; + } else if (delim === ">" || delim === "\\gt") { + delim = "\\rangle"; + } + + // Sized delimiters are never centered. + if (utils.contains(stackLargeDelimiters, delim) || + utils.contains(stackNeverDelimiters, delim)) { + return makeLargeDelim(delim, size, false, options, mode); + } else if (utils.contains(stackAlwaysDelimiters, delim)) { + return makeStackedDelim( + delim, sizeToMaxHeight[size], false, options, mode); + } else { + throw new ParseError("Illegal delimiter: '" + delim + "'"); + } +}; + +/** + * There are three different sequences of delimiter sizes that the delimiters + * follow depending on the kind of delimiter. This is used when creating custom + * sized delimiters to decide whether to create a small, large, or stacked + * delimiter. + * + * In real TeX, these sequences aren't explicitly defined, but are instead + * defined inside the font metrics. Since there are only three sequences that + * are possible for the delimiters that TeX defines, it is easier to just encode + * them explicitly here. + */ + +// Delimiters that never stack try small delimiters and large delimiters only +var stackNeverDelimiterSequence = [ + {type: "small", style: Style.SCRIPTSCRIPT}, + {type: "small", style: Style.SCRIPT}, + {type: "small", style: Style.TEXT}, + {type: "large", size: 1}, + {type: "large", size: 2}, + {type: "large", size: 3}, + {type: "large", size: 4}, +]; + +// Delimiters that always stack try the small delimiters first, then stack +var stackAlwaysDelimiterSequence = [ + {type: "small", style: Style.SCRIPTSCRIPT}, + {type: "small", style: Style.SCRIPT}, + {type: "small", style: Style.TEXT}, + {type: "stack"}, +]; + +// Delimiters that stack when large try the small and then large delimiters, and +// stack afterwards +var stackLargeDelimiterSequence = [ + {type: "small", style: Style.SCRIPTSCRIPT}, + {type: "small", style: Style.SCRIPT}, + {type: "small", style: Style.TEXT}, + {type: "large", size: 1}, + {type: "large", size: 2}, + {type: "large", size: 3}, + {type: "large", size: 4}, + {type: "stack"}, +]; + +/** + * Get the font used in a delimiter based on what kind of delimiter it is. + */ +var delimTypeToFont = function(type) { + if (type.type === "small") { + return "Main-Regular"; + } else if (type.type === "large") { + return "Size" + type.size + "-Regular"; + } else if (type.type === "stack") { + return "Size4-Regular"; + } +}; + +/** + * Traverse a sequence of types of delimiters to decide what kind of delimiter + * should be used to create a delimiter of the given height+depth. + */ +var traverseSequence = function(delim, height, sequence, options) { + // Here, we choose the index we should start at in the sequences. In smaller + // sizes (which correspond to larger numbers in style.size) we start earlier + // in the sequence. Thus, scriptscript starts at index 3-3=0, script starts + // at index 3-2=1, text starts at 3-1=2, and display starts at min(2,3-0)=2 + var start = Math.min(2, 3 - options.style.size); + for (var i = start; i < sequence.length; i++) { + if (sequence[i].type === "stack") { + // This is always the last delimiter, so we just break the loop now. + break; + } + + var metrics = getMetrics(delim, delimTypeToFont(sequence[i])); + var heightDepth = metrics.height + metrics.depth; + + // Small delimiters are scaled down versions of the same font, so we + // account for the style change size. + + if (sequence[i].type === "small") { + heightDepth *= sequence[i].style.sizeMultiplier; + } + + // Check if the delimiter at this size works for the given height. + if (heightDepth > height) { + return sequence[i]; + } + } + + // If we reached the end of the sequence, return the last sequence element. + return sequence[sequence.length - 1]; +}; + +/** + * Make a delimiter of a given height+depth, with optional centering. Here, we + * traverse the sequences, and create a delimiter that the sequence tells us to. + */ +var makeCustomSizedDelim = function(delim, height, center, options, mode) { + if (delim === "<" || delim === "\\lt") { + delim = "\\langle"; + } else if (delim === ">" || delim === "\\gt") { + delim = "\\rangle"; + } + + // Decide what sequence to use + var sequence; + if (utils.contains(stackNeverDelimiters, delim)) { + sequence = stackNeverDelimiterSequence; + } else if (utils.contains(stackLargeDelimiters, delim)) { + sequence = stackLargeDelimiterSequence; + } else { + sequence = stackAlwaysDelimiterSequence; + } + + // Look through the sequence + var delimType = traverseSequence(delim, height, sequence, options); + + // Depending on the sequence element we decided on, call the appropriate + // function. + if (delimType.type === "small") { + return makeSmallDelim(delim, delimType.style, center, options, mode); + } else if (delimType.type === "large") { + return makeLargeDelim(delim, delimType.size, center, options, mode); + } else if (delimType.type === "stack") { + return makeStackedDelim(delim, height, center, options, mode); + } +}; + +/** + * Make a delimiter for use with `\left` and `\right`, given a height and depth + * of an expression that the delimiters surround. + */ +var makeLeftRightDelim = function(delim, height, depth, options, mode) { + // We always center \left/\right delimiters, so the axis is always shifted + var axisHeight = + fontMetrics.metrics.axisHeight * options.style.sizeMultiplier; + + // Taken from TeX source, tex.web, function make_left_right + var delimiterFactor = 901; + var delimiterExtend = 5.0 / fontMetrics.metrics.ptPerEm; + + var maxDistFromAxis = Math.max( + height - axisHeight, depth + axisHeight); + + var totalHeight = Math.max( + // In real TeX, calculations are done using integral values which are + // 65536 per pt, or 655360 per em. So, the division here truncates in + // TeX but doesn't here, producing different results. If we wanted to + // exactly match TeX's calculation, we could do + // Math.floor(655360 * maxDistFromAxis / 500) * + // delimiterFactor / 655360 + // (To see the difference, compare + // x^{x^{\left(\rule{0.1em}{0.68em}\right)}} + // in TeX and KaTeX) + maxDistFromAxis / 500 * delimiterFactor, + 2 * maxDistFromAxis - delimiterExtend); + + // Finally, we defer to `makeCustomSizedDelim` with our calculated total + // height + return makeCustomSizedDelim(delim, totalHeight, true, options, mode); +}; + +module.exports = { + sizedDelim: makeSizedDelim, + customSizedDelim: makeCustomSizedDelim, + leftRightDelim: makeLeftRightDelim, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/domTree.js b/fastcdm/tokenize_latex/third_party/katex/src/domTree.js new file mode 100644 index 0000000000000000000000000000000000000000..e0d8e925a618b5127ebc23a96dc8962640fd9e58 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/domTree.js @@ -0,0 +1,269 @@ +/** + * These objects store the data about the DOM nodes we create, as well as some + * extra data. They can then be transformed into real DOM nodes with the + * `toNode` function or HTML markup using `toMarkup`. They are useful for both + * storing extra properties on the nodes, as well as providing a way to easily + * work with the DOM. + * + * Similar functions for working with MathML nodes exist in mathMLTree.js. + */ + +var utils = require("./utils"); + +/** + * Create an HTML className based on a list of classes. In addition to joining + * with spaces, we also remove null or empty classes. + */ +var createClass = function(classes) { + classes = classes.slice(); + for (var i = classes.length - 1; i >= 0; i--) { + if (!classes[i]) { + classes.splice(i, 1); + } + } + + return classes.join(" "); +}; + +/** + * This node represents a span node, with a className, a list of children, and + * an inline style. It also contains information about its height, depth, and + * maxFontSize. + */ +function span(classes, children, height, depth, maxFontSize, style) { + this.classes = classes || []; + this.children = children || []; + this.height = height || 0; + this.depth = depth || 0; + this.maxFontSize = maxFontSize || 0; + this.style = style || {}; + this.attributes = {}; +} + +/** + * Sets an arbitrary attribute on the span. Warning: use this wisely. Not all + * browsers support attributes the same, and having too many custom attributes + * is probably bad. + */ +span.prototype.setAttribute = function(attribute, value) { + this.attributes[attribute] = value; +}; + +/** + * Convert the span into an HTML node + */ +span.prototype.toNode = function() { + var span = document.createElement("span"); + + // Apply the class + span.className = createClass(this.classes); + + // Apply inline styles + for (var style in this.style) { + if (Object.prototype.hasOwnProperty.call(this.style, style)) { + span.style[style] = this.style[style]; + } + } + + // Apply attributes + for (var attr in this.attributes) { + if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) { + span.setAttribute(attr, this.attributes[attr]); + } + } + + // Append the children, also as HTML nodes + for (var i = 0; i < this.children.length; i++) { + span.appendChild(this.children[i].toNode()); + } + + return span; +}; + +/** + * Convert the span into an HTML markup string + */ +span.prototype.toMarkup = function() { + var markup = " 0) { + span = document.createElement("span"); + span.style.marginRight = this.italic + "em"; + } + + if (this.classes.length > 0) { + span = span || document.createElement("span"); + span.className = createClass(this.classes); + } + + for (var style in this.style) { + if (this.style.hasOwnProperty(style)) { + span = span || document.createElement("span"); + span.style[style] = this.style[style]; + } + } + + if (span) { + span.appendChild(node); + return span; + } else { + return node; + } +}; + +/** + * Creates markup for a symbol node. + */ +symbolNode.prototype.toMarkup = function() { + // TODO(alpert): More duplication than I'd like from + // span.prototype.toMarkup and symbolNode.prototype.toNode... + var needsSpan = false; + + var markup = " 0) { + styles += "margin-right:" + this.italic + "em;"; + } + for (var style in this.style) { + if (this.style.hasOwnProperty(style)) { + styles += utils.hyphenate(style) + ":" + this.style[style] + ";"; + } + } + + if (styles) { + needsSpan = true; + markup += " style=\"" + utils.escape(styles) + "\""; + } + + var escaped = utils.escape(this.value); + if (needsSpan) { + markup += ">"; + markup += escaped; + markup += ""; + return markup; + } else { + return escaped; + } +}; + +module.exports = { + span: span, + documentFragment: documentFragment, + symbolNode: symbolNode, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/environments.js b/fastcdm/tokenize_latex/third_party/katex/src/environments.js new file mode 100644 index 0000000000000000000000000000000000000000..01d62b5e599b01d28109bd3cc1103b457d03b892 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/environments.js @@ -0,0 +1,295 @@ +/* eslint no-constant-condition:0 */ +var fontMetrics = require("./fontMetrics"); +var parseData = require("./parseData"); +var ParseError = require("./ParseError"); + +var ParseNode = parseData.ParseNode; + +/** + * Parse the body of the environment, with rows delimited by \\ and + * columns delimited by &, and create a nested list in row-major order + * with one group per cell. + */ +var q = 0 ; +function parseArray(parser, result) { + var row = []; + var body = [row]; + var rowGaps = []; + + while (true) { + + // if (q == 1) console.error(parser.nextToken.text); + try { + var cell = parser.parseExpression(false, null); + } catch (e) { + // console.error(e); + exit(); + } + // if (q == 1) exit(); + row.push(new ParseNode("ordgroup", cell, parser.mode)); + var next = parser.nextToken.text; + if (next === "&") { + parser.consume(); + } else if (next === "\\end" || next == "}") { + break; + } else if (next === "\\\\" || next === "\\cr") { + + var cr = parser.parseFunction(); + + rowGaps.push(cr.value.size); + row = []; + body.push(row); + } else { + // TODO: Clean up the following hack once #385 got merged + var pos = Math.min(parser.pos + 1, parser.lexer._input.length); + throw new ParseError("Expected & or \\\\ or \\end", + parser.lexer, pos); + } + } + result.body = body; + result.rowGaps = rowGaps; + // if (q == 1) exit(); + var node = new ParseNode(result.type, result, parser.mode); + return node; +} + +/* + * An environment definition is very similar to a function definition: + * it is declared with a name or a list of names, a set of properties + * and a handler containing the actual implementation. + * + * The properties include: + * - numArgs: The number of arguments after the \begin{name} function. + * - argTypes: (optional) Just like for a function + * - allowedInText: (optional) Whether or not the environment is allowed inside + * text mode (default false) (not enforced yet) + * - numOptionalArgs: (optional) Just like for a function + * A bare number instead of that object indicates the numArgs value. + * + * The handler function will receive two arguments + * - context: information and references provided by the parser + * - args: an array of arguments passed to \begin{name} + * The context contains the following properties: + * - envName: the name of the environment, one of the listed names. + * - parser: the parser object + * - lexer: the lexer object + * - positions: the positions associated with these arguments from args. + * The handler must return a ParseResult. + */ + +function defineEnvironment(names, props, handler) { + if (typeof names === "string") { + names = [names]; + } + if (typeof props === "number") { + props = { numArgs: props }; + } + // Set default values of environments + var data = { + numArgs: props.numArgs || 0, + argTypes: props.argTypes, + greediness: 1, + allowedInText: !!props.allowedInText, + numOptionalArgs: props.numOptionalArgs || 0, + handler: handler, + }; + for (var i = 0; i < names.length; ++i) { + module.exports[names[i]] = data; + } +} + +// Arrays are part of LaTeX, defined in lttab.dtx so its documentation +// is part of the source2e.pdf file of LaTeX2e source documentation. +defineEnvironment("array", { + numArgs: 1, +}, function(context, args) { + var colalign = args[0]; + colalign = colalign.value.map ? colalign.value : [colalign]; + var cols = colalign.map(function(node) { + var ca = node.value; + if ("lcr".indexOf(ca) !== -1) { + return { + type: "align", + align: ca, + }; + } else if (ca === "|") { + return { + type: "separator", + separator: "|", + }; + } + // throw new ParseError( + // "Unknown column alignment: " + node.value, + // context.lexer, context.positions[1]); + }); + var res = { + type: "array", + cols: cols, + hskipBeforeAndAfter: true, // \@preamble in lttab.dtx + }; + res = parseArray(context.parser, res); + return res; +}); + +defineEnvironment("tabular", { + numArgs: 1, +}, function(context, args) { + var colalign = args[0]; + colalign = colalign.value.map ? colalign.value : [colalign]; + var cols = colalign.map(function(node) { + var ca = node.value; + if ("lcr".indexOf(ca) !== -1) { + return { + type: "align", + align: ca, + }; + } else if (ca === "|") { + return { + type: "separator", + separator: "|", + }; + } + // throw new ParseError( + // "Unknown column alignment: " + node.value, + // context.lexer, context.positions[1]); + }); + var res = { + type: "array", + style: "tabular", + cols: cols, + hskipBeforeAndAfter: true, // \@preamble in lttab.dtx + }; + res = parseArray(context.parser, res); + return res; +}); + +// The matrix environments of amsmath builds on the array environment +// of LaTeX, which is discussed above. +defineEnvironment([ + "matrix", + "pmatrix", + "bmatrix", + "Bmatrix", + "vmatrix", + "Vmatrix", +], { +}, function(context) { + var delimiters = { + "matrix": null, + "pmatrix": ["(", ")"], + "bmatrix": ["[", "]"], + "Bmatrix": ["\\{", "\\}"], + "vmatrix": ["|", "|"], + "Vmatrix": ["\\Vert", "\\Vert"], + }[context.envName]; + var res = { + type: "array", + hskipBeforeAndAfter: false, // \hskip -\arraycolsep in amsmath + }; + q = 1; + res = parseArray(context.parser, res); + + if (delimiters) { + res = new ParseNode("leftright", { + body: [res], + left: delimiters[0], + right: delimiters[1], + }, context.mode); + } + return res; +}); + +// A cases environment (in amsmath.sty) is almost equivalent to +// \def\arraystretch{1.2}% +// \left\{\begin{array}{@{}l@{\quad}l@{}} … \end{array}\right. +defineEnvironment("picture", { +}, function(context) { + var res = { + type: "array", + arraystretch: 1.2, + cols: [{ + type: "align", + align: "l", + pregap: 0, + postgap: fontMetrics.metrics.quad, + }, { + type: "align", + align: "l", + pregap: 0, + postgap: 0, + }], + }; + res = parseArray(context.parser, res); + res = new ParseNode("leftright", { + body: [res], + left: "\\{", + right: ".", + }, context.mode); + return res; +}); + +defineEnvironment("cases", { +}, function(context) { + var res = { + type: "array", + arraystretch: 1.2, + cols: [{ + type: "align", + align: "l", + pregap: 0, + postgap: fontMetrics.metrics.quad, + }, { + type: "align", + align: "l", + pregap: 0, + postgap: 0, + }], + }; + res = parseArray(context.parser, res); + res = new ParseNode("leftright", { + body: [res], + left: "\\{", + right: ".", + }, context.mode); + return res; +}); + +// An aligned environment is like the align* environment +// except it operates within math mode. +// Note that we assume \nomallineskiplimit to be zero, +// so that \strut@ is the same as \strut. +defineEnvironment("aligned", { +}, function(context) { + var res = { + type: "array", + cols: [], + }; + res = parseArray(context.parser, res); + var emptyGroup = new ParseNode("ordgroup", [], context.mode); + var numCols = 0; + res.value.body.forEach(function(row) { + var i; + for (i = 1; i < row.length; i += 2) { + row[i].value.unshift(emptyGroup); + } + if (numCols < row.length) { + numCols = row.length; + } + }); + for (var i = 0; i < numCols; ++i) { + var align = "r"; + var pregap = 0; + if (i % 2 === 1) { + align = "l"; + } else if (i > 0) { + pregap = 2; // one \qquad between columns + } + res.value.cols[i] = { + type: "align", + align: align, + pregap: pregap, + postgap: 0, + }; + } + return res; +}); diff --git a/fastcdm/tokenize_latex/third_party/katex/src/fontMetrics.js b/fastcdm/tokenize_latex/third_party/katex/src/fontMetrics.js new file mode 100644 index 0000000000000000000000000000000000000000..db9e44bfa9b43f958076452e906bd422aa8d6785 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/fontMetrics.js @@ -0,0 +1,147 @@ +/* eslint no-unused-vars:0 */ + +var Style = require("./Style"); + +/** + * This file contains metrics regarding fonts and individual symbols. The sigma + * and xi variables, as well as the metricMap map contain data extracted from + * TeX, TeX font metrics, and the TTF files. These data are then exposed via the + * `metrics` variable and the getCharacterMetrics function. + */ + +// These font metrics are extracted from TeX by using +// \font\a=cmmi10 +// \showthe\fontdimenX\a +// where X is the corresponding variable number. These correspond to the font +// parameters of the symbol fonts. In TeX, there are actually three sets of +// dimensions, one for each of textstyle, scriptstyle, and scriptscriptstyle, +// but we only use the textstyle ones, and scale certain dimensions accordingly. +// See the TeXbook, page 441. +var sigma1 = 0.025; +var sigma2 = 0; +var sigma3 = 0; +var sigma4 = 0; +var sigma5 = 0.431; +var sigma6 = 1; +var sigma7 = 0; +var sigma8 = 0.677; +var sigma9 = 0.394; +var sigma10 = 0.444; +var sigma11 = 0.686; +var sigma12 = 0.345; +var sigma13 = 0.413; +var sigma14 = 0.363; +var sigma15 = 0.289; +var sigma16 = 0.150; +var sigma17 = 0.247; +var sigma18 = 0.386; +var sigma19 = 0.050; +var sigma20 = 2.390; +var sigma21 = 1.01; +var sigma21Script = 0.81; +var sigma21ScriptScript = 0.71; +var sigma22 = 0.250; + +// These font metrics are extracted from TeX by using +// \font\a=cmex10 +// \showthe\fontdimenX\a +// where X is the corresponding variable number. These correspond to the font +// parameters of the extension fonts (family 3). See the TeXbook, page 441. +var xi1 = 0; +var xi2 = 0; +var xi3 = 0; +var xi4 = 0; +var xi5 = 0.431; +var xi6 = 1; +var xi7 = 0; +var xi8 = 0.04; +var xi9 = 0.111; +var xi10 = 0.166; +var xi11 = 0.2; +var xi12 = 0.6; +var xi13 = 0.1; + +// This value determines how large a pt is, for metrics which are defined in +// terms of pts. +// This value is also used in katex.less; if you change it make sure the values +// match. +var ptPerEm = 10.0; + +// The space between adjacent `|` columns in an array definition. From +// `\showthe\doublerulesep` in LaTeX. +var doubleRuleSep = 2.0 / ptPerEm; + +/** + * This is just a mapping from common names to real metrics + */ +var metrics = { + xHeight: sigma5, + quad: sigma6, + num1: sigma8, + num2: sigma9, + num3: sigma10, + denom1: sigma11, + denom2: sigma12, + sup1: sigma13, + sup2: sigma14, + sup3: sigma15, + sub1: sigma16, + sub2: sigma17, + supDrop: sigma18, + subDrop: sigma19, + axisHeight: sigma22, + defaultRuleThickness: xi8, + bigOpSpacing1: xi9, + bigOpSpacing2: xi10, + bigOpSpacing3: xi11, + bigOpSpacing4: xi12, + bigOpSpacing5: xi13, + ptPerEm: ptPerEm, + emPerEx: sigma5 / sigma6, + doubleRuleSep: doubleRuleSep, + + // TODO(alpert): Missing parallel structure here. We should probably add + // style-specific metrics for all of these. + delim1: sigma20, + getDelim2: function(style) { + if (style.size === Style.TEXT.size) { + return sigma21; + } else if (style.size === Style.SCRIPT.size) { + return sigma21Script; + } else if (style.size === Style.SCRIPTSCRIPT.size) { + return sigma21ScriptScript; + } + throw new Error("Unexpected style size: " + style.size); + }, +}; + +// This map contains a mapping from font name and character code to character +// metrics, including height, depth, italic correction, and skew (kern from the +// character to the corresponding \skewchar) +// This map is generated via `make metrics`. It should not be changed manually. +var metricMap = require("./fontMetricsData"); + +/** + * This function is a convenience function for looking up information in the + * metricMap table. It takes a character as a string, and a style. + * + * Note: the `width` property may be undefined if fontMetricsData.js wasn't + * built using `Make extended_metrics`. + */ +var getCharacterMetrics = function(character, style) { + var metrics = metricMap[style][character.charCodeAt(0)]; + if (metrics) { + return { + depth: metrics[0], + height: metrics[1], + italic: metrics[2], + skew: metrics[3], + width: metrics[4], + }; + } +}; + +module.exports = { + metrics: metrics, + getCharacterMetrics: getCharacterMetrics, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/fontMetricsData.js b/fastcdm/tokenize_latex/third_party/katex/src/fontMetricsData.js new file mode 100644 index 0000000000000000000000000000000000000000..957f55b87fc89401ef636857ef98c66c592d64f4 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/fontMetricsData.js @@ -0,0 +1,1752 @@ +module.exports = { + "AMS-Regular": { + "65": [0, 0.68889, 0, 0], + "66": [0, 0.68889, 0, 0], + "67": [0, 0.68889, 0, 0], + "68": [0, 0.68889, 0, 0], + "69": [0, 0.68889, 0, 0], + "70": [0, 0.68889, 0, 0], + "71": [0, 0.68889, 0, 0], + "72": [0, 0.68889, 0, 0], + "73": [0, 0.68889, 0, 0], + "74": [0.16667, 0.68889, 0, 0], + "75": [0, 0.68889, 0, 0], + "76": [0, 0.68889, 0, 0], + "77": [0, 0.68889, 0, 0], + "78": [0, 0.68889, 0, 0], + "79": [0.16667, 0.68889, 0, 0], + "80": [0, 0.68889, 0, 0], + "81": [0.16667, 0.68889, 0, 0], + "82": [0, 0.68889, 0, 0], + "83": [0, 0.68889, 0, 0], + "84": [0, 0.68889, 0, 0], + "85": [0, 0.68889, 0, 0], + "86": [0, 0.68889, 0, 0], + "87": [0, 0.68889, 0, 0], + "88": [0, 0.68889, 0, 0], + "89": [0, 0.68889, 0, 0], + "90": [0, 0.68889, 0, 0], + "107": [0, 0.68889, 0, 0], + "165": [0, 0.675, 0.025, 0], + "174": [0.15559, 0.69224, 0, 0], + "240": [0, 0.68889, 0, 0], + "295": [0, 0.68889, 0, 0], + "710": [0, 0.825, 0, 0], + "732": [0, 0.9, 0, 0], + "770": [0, 0.825, 0, 0], + "771": [0, 0.9, 0, 0], + "989": [0.08167, 0.58167, 0, 0], + "1008": [0, 0.43056, 0.04028, 0], + "8245": [0, 0.54986, 0, 0], + "8463": [0, 0.68889, 0, 0], + "8487": [0, 0.68889, 0, 0], + "8498": [0, 0.68889, 0, 0], + "8502": [0, 0.68889, 0, 0], + "8503": [0, 0.68889, 0, 0], + "8504": [0, 0.68889, 0, 0], + "8513": [0, 0.68889, 0, 0], + "8592": [-0.03598, 0.46402, 0, 0], + "8594": [-0.03598, 0.46402, 0, 0], + "8602": [-0.13313, 0.36687, 0, 0], + "8603": [-0.13313, 0.36687, 0, 0], + "8606": [0.01354, 0.52239, 0, 0], + "8608": [0.01354, 0.52239, 0, 0], + "8610": [0.01354, 0.52239, 0, 0], + "8611": [0.01354, 0.52239, 0, 0], + "8619": [0, 0.54986, 0, 0], + "8620": [0, 0.54986, 0, 0], + "8621": [-0.13313, 0.37788, 0, 0], + "8622": [-0.13313, 0.36687, 0, 0], + "8624": [0, 0.69224, 0, 0], + "8625": [0, 0.69224, 0, 0], + "8630": [0, 0.43056, 0, 0], + "8631": [0, 0.43056, 0, 0], + "8634": [0.08198, 0.58198, 0, 0], + "8635": [0.08198, 0.58198, 0, 0], + "8638": [0.19444, 0.69224, 0, 0], + "8639": [0.19444, 0.69224, 0, 0], + "8642": [0.19444, 0.69224, 0, 0], + "8643": [0.19444, 0.69224, 0, 0], + "8644": [0.1808, 0.675, 0, 0], + "8646": [0.1808, 0.675, 0, 0], + "8647": [0.1808, 0.675, 0, 0], + "8648": [0.19444, 0.69224, 0, 0], + "8649": [0.1808, 0.675, 0, 0], + "8650": [0.19444, 0.69224, 0, 0], + "8651": [0.01354, 0.52239, 0, 0], + "8652": [0.01354, 0.52239, 0, 0], + "8653": [-0.13313, 0.36687, 0, 0], + "8654": [-0.13313, 0.36687, 0, 0], + "8655": [-0.13313, 0.36687, 0, 0], + "8666": [0.13667, 0.63667, 0, 0], + "8667": [0.13667, 0.63667, 0, 0], + "8669": [-0.13313, 0.37788, 0, 0], + "8672": [-0.064, 0.437, 0, 0], + "8674": [-0.064, 0.437, 0, 0], + "8705": [0, 0.825, 0, 0], + "8708": [0, 0.68889, 0, 0], + "8709": [0.08167, 0.58167, 0, 0], + "8717": [0, 0.43056, 0, 0], + "8722": [-0.03598, 0.46402, 0, 0], + "8724": [0.08198, 0.69224, 0, 0], + "8726": [0.08167, 0.58167, 0, 0], + "8733": [0, 0.69224, 0, 0], + "8736": [0, 0.69224, 0, 0], + "8737": [0, 0.69224, 0, 0], + "8738": [0.03517, 0.52239, 0, 0], + "8739": [0.08167, 0.58167, 0, 0], + "8740": [0.25142, 0.74111, 0, 0], + "8741": [0.08167, 0.58167, 0, 0], + "8742": [0.25142, 0.74111, 0, 0], + "8756": [0, 0.69224, 0, 0], + "8757": [0, 0.69224, 0, 0], + "8764": [-0.13313, 0.36687, 0, 0], + "8765": [-0.13313, 0.37788, 0, 0], + "8769": [-0.13313, 0.36687, 0, 0], + "8770": [-0.03625, 0.46375, 0, 0], + "8774": [0.30274, 0.79383, 0, 0], + "8776": [-0.01688, 0.48312, 0, 0], + "8778": [0.08167, 0.58167, 0, 0], + "8782": [0.06062, 0.54986, 0, 0], + "8783": [0.06062, 0.54986, 0, 0], + "8785": [0.08198, 0.58198, 0, 0], + "8786": [0.08198, 0.58198, 0, 0], + "8787": [0.08198, 0.58198, 0, 0], + "8790": [0, 0.69224, 0, 0], + "8791": [0.22958, 0.72958, 0, 0], + "8796": [0.08198, 0.91667, 0, 0], + "8806": [0.25583, 0.75583, 0, 0], + "8807": [0.25583, 0.75583, 0, 0], + "8808": [0.25142, 0.75726, 0, 0], + "8809": [0.25142, 0.75726, 0, 0], + "8812": [0.25583, 0.75583, 0, 0], + "8814": [0.20576, 0.70576, 0, 0], + "8815": [0.20576, 0.70576, 0, 0], + "8816": [0.30274, 0.79383, 0, 0], + "8817": [0.30274, 0.79383, 0, 0], + "8818": [0.22958, 0.72958, 0, 0], + "8819": [0.22958, 0.72958, 0, 0], + "8822": [0.1808, 0.675, 0, 0], + "8823": [0.1808, 0.675, 0, 0], + "8828": [0.13667, 0.63667, 0, 0], + "8829": [0.13667, 0.63667, 0, 0], + "8830": [0.22958, 0.72958, 0, 0], + "8831": [0.22958, 0.72958, 0, 0], + "8832": [0.20576, 0.70576, 0, 0], + "8833": [0.20576, 0.70576, 0, 0], + "8840": [0.30274, 0.79383, 0, 0], + "8841": [0.30274, 0.79383, 0, 0], + "8842": [0.13597, 0.63597, 0, 0], + "8843": [0.13597, 0.63597, 0, 0], + "8847": [0.03517, 0.54986, 0, 0], + "8848": [0.03517, 0.54986, 0, 0], + "8858": [0.08198, 0.58198, 0, 0], + "8859": [0.08198, 0.58198, 0, 0], + "8861": [0.08198, 0.58198, 0, 0], + "8862": [0, 0.675, 0, 0], + "8863": [0, 0.675, 0, 0], + "8864": [0, 0.675, 0, 0], + "8865": [0, 0.675, 0, 0], + "8872": [0, 0.69224, 0, 0], + "8873": [0, 0.69224, 0, 0], + "8874": [0, 0.69224, 0, 0], + "8876": [0, 0.68889, 0, 0], + "8877": [0, 0.68889, 0, 0], + "8878": [0, 0.68889, 0, 0], + "8879": [0, 0.68889, 0, 0], + "8882": [0.03517, 0.54986, 0, 0], + "8883": [0.03517, 0.54986, 0, 0], + "8884": [0.13667, 0.63667, 0, 0], + "8885": [0.13667, 0.63667, 0, 0], + "8888": [0, 0.54986, 0, 0], + "8890": [0.19444, 0.43056, 0, 0], + "8891": [0.19444, 0.69224, 0, 0], + "8892": [0.19444, 0.69224, 0, 0], + "8901": [0, 0.54986, 0, 0], + "8903": [0.08167, 0.58167, 0, 0], + "8905": [0.08167, 0.58167, 0, 0], + "8906": [0.08167, 0.58167, 0, 0], + "8907": [0, 0.69224, 0, 0], + "8908": [0, 0.69224, 0, 0], + "8909": [-0.03598, 0.46402, 0, 0], + "8910": [0, 0.54986, 0, 0], + "8911": [0, 0.54986, 0, 0], + "8912": [0.03517, 0.54986, 0, 0], + "8913": [0.03517, 0.54986, 0, 0], + "8914": [0, 0.54986, 0, 0], + "8915": [0, 0.54986, 0, 0], + "8916": [0, 0.69224, 0, 0], + "8918": [0.0391, 0.5391, 0, 0], + "8919": [0.0391, 0.5391, 0, 0], + "8920": [0.03517, 0.54986, 0, 0], + "8921": [0.03517, 0.54986, 0, 0], + "8922": [0.38569, 0.88569, 0, 0], + "8923": [0.38569, 0.88569, 0, 0], + "8926": [0.13667, 0.63667, 0, 0], + "8927": [0.13667, 0.63667, 0, 0], + "8928": [0.30274, 0.79383, 0, 0], + "8929": [0.30274, 0.79383, 0, 0], + "8934": [0.23222, 0.74111, 0, 0], + "8935": [0.23222, 0.74111, 0, 0], + "8936": [0.23222, 0.74111, 0, 0], + "8937": [0.23222, 0.74111, 0, 0], + "8938": [0.20576, 0.70576, 0, 0], + "8939": [0.20576, 0.70576, 0, 0], + "8940": [0.30274, 0.79383, 0, 0], + "8941": [0.30274, 0.79383, 0, 0], + "8994": [0.19444, 0.69224, 0, 0], + "8995": [0.19444, 0.69224, 0, 0], + "9416": [0.15559, 0.69224, 0, 0], + "9484": [0, 0.69224, 0, 0], + "9488": [0, 0.69224, 0, 0], + "9492": [0, 0.37788, 0, 0], + "9496": [0, 0.37788, 0, 0], + "9585": [0.19444, 0.68889, 0, 0], + "9586": [0.19444, 0.74111, 0, 0], + "9632": [0, 0.675, 0, 0], + "9633": [0, 0.675, 0, 0], + "9650": [0, 0.54986, 0, 0], + "9651": [0, 0.54986, 0, 0], + "9654": [0.03517, 0.54986, 0, 0], + "9660": [0, 0.54986, 0, 0], + "9661": [0, 0.54986, 0, 0], + "9664": [0.03517, 0.54986, 0, 0], + "9674": [0.11111, 0.69224, 0, 0], + "9733": [0.19444, 0.69224, 0, 0], + "10003": [0, 0.69224, 0, 0], + "10016": [0, 0.69224, 0, 0], + "10731": [0.11111, 0.69224, 0, 0], + "10846": [0.19444, 0.75583, 0, 0], + "10877": [0.13667, 0.63667, 0, 0], + "10878": [0.13667, 0.63667, 0, 0], + "10885": [0.25583, 0.75583, 0, 0], + "10886": [0.25583, 0.75583, 0, 0], + "10887": [0.13597, 0.63597, 0, 0], + "10888": [0.13597, 0.63597, 0, 0], + "10889": [0.26167, 0.75726, 0, 0], + "10890": [0.26167, 0.75726, 0, 0], + "10891": [0.48256, 0.98256, 0, 0], + "10892": [0.48256, 0.98256, 0, 0], + "10901": [0.13667, 0.63667, 0, 0], + "10902": [0.13667, 0.63667, 0, 0], + "10933": [0.25142, 0.75726, 0, 0], + "10934": [0.25142, 0.75726, 0, 0], + "10935": [0.26167, 0.75726, 0, 0], + "10936": [0.26167, 0.75726, 0, 0], + "10937": [0.26167, 0.75726, 0, 0], + "10938": [0.26167, 0.75726, 0, 0], + "10949": [0.25583, 0.75583, 0, 0], + "10950": [0.25583, 0.75583, 0, 0], + "10955": [0.28481, 0.79383, 0, 0], + "10956": [0.28481, 0.79383, 0, 0], + "57350": [0.08167, 0.58167, 0, 0], + "57351": [0.08167, 0.58167, 0, 0], + "57352": [0.08167, 0.58167, 0, 0], + "57353": [0, 0.43056, 0.04028, 0], + "57356": [0.25142, 0.75726, 0, 0], + "57357": [0.25142, 0.75726, 0, 0], + "57358": [0.41951, 0.91951, 0, 0], + "57359": [0.30274, 0.79383, 0, 0], + "57360": [0.30274, 0.79383, 0, 0], + "57361": [0.41951, 0.91951, 0, 0], + "57366": [0.25142, 0.75726, 0, 0], + "57367": [0.25142, 0.75726, 0, 0], + "57368": [0.25142, 0.75726, 0, 0], + "57369": [0.25142, 0.75726, 0, 0], + "57370": [0.13597, 0.63597, 0, 0], + "57371": [0.13597, 0.63597, 0, 0], + }, + "Caligraphic-Regular": { + "48": [0, 0.43056, 0, 0], + "49": [0, 0.43056, 0, 0], + "50": [0, 0.43056, 0, 0], + "51": [0.19444, 0.43056, 0, 0], + "52": [0.19444, 0.43056, 0, 0], + "53": [0.19444, 0.43056, 0, 0], + "54": [0, 0.64444, 0, 0], + "55": [0.19444, 0.43056, 0, 0], + "56": [0, 0.64444, 0, 0], + "57": [0.19444, 0.43056, 0, 0], + "65": [0, 0.68333, 0, 0.19445], + "66": [0, 0.68333, 0.03041, 0.13889], + "67": [0, 0.68333, 0.05834, 0.13889], + "68": [0, 0.68333, 0.02778, 0.08334], + "69": [0, 0.68333, 0.08944, 0.11111], + "70": [0, 0.68333, 0.09931, 0.11111], + "71": [0.09722, 0.68333, 0.0593, 0.11111], + "72": [0, 0.68333, 0.00965, 0.11111], + "73": [0, 0.68333, 0.07382, 0], + "74": [0.09722, 0.68333, 0.18472, 0.16667], + "75": [0, 0.68333, 0.01445, 0.05556], + "76": [0, 0.68333, 0, 0.13889], + "77": [0, 0.68333, 0, 0.13889], + "78": [0, 0.68333, 0.14736, 0.08334], + "79": [0, 0.68333, 0.02778, 0.11111], + "80": [0, 0.68333, 0.08222, 0.08334], + "81": [0.09722, 0.68333, 0, 0.11111], + "82": [0, 0.68333, 0, 0.08334], + "83": [0, 0.68333, 0.075, 0.13889], + "84": [0, 0.68333, 0.25417, 0], + "85": [0, 0.68333, 0.09931, 0.08334], + "86": [0, 0.68333, 0.08222, 0], + "87": [0, 0.68333, 0.08222, 0.08334], + "88": [0, 0.68333, 0.14643, 0.13889], + "89": [0.09722, 0.68333, 0.08222, 0.08334], + "90": [0, 0.68333, 0.07944, 0.13889], + }, + "Fraktur-Regular": { + "33": [0, 0.69141, 0, 0], + "34": [0, 0.69141, 0, 0], + "38": [0, 0.69141, 0, 0], + "39": [0, 0.69141, 0, 0], + "40": [0.24982, 0.74947, 0, 0], + "41": [0.24982, 0.74947, 0, 0], + "42": [0, 0.62119, 0, 0], + "43": [0.08319, 0.58283, 0, 0], + "44": [0, 0.10803, 0, 0], + "45": [0.08319, 0.58283, 0, 0], + "46": [0, 0.10803, 0, 0], + "47": [0.24982, 0.74947, 0, 0], + "48": [0, 0.47534, 0, 0], + "49": [0, 0.47534, 0, 0], + "50": [0, 0.47534, 0, 0], + "51": [0.18906, 0.47534, 0, 0], + "52": [0.18906, 0.47534, 0, 0], + "53": [0.18906, 0.47534, 0, 0], + "54": [0, 0.69141, 0, 0], + "55": [0.18906, 0.47534, 0, 0], + "56": [0, 0.69141, 0, 0], + "57": [0.18906, 0.47534, 0, 0], + "58": [0, 0.47534, 0, 0], + "59": [0.12604, 0.47534, 0, 0], + "61": [-0.13099, 0.36866, 0, 0], + "63": [0, 0.69141, 0, 0], + "65": [0, 0.69141, 0, 0], + "66": [0, 0.69141, 0, 0], + "67": [0, 0.69141, 0, 0], + "68": [0, 0.69141, 0, 0], + "69": [0, 0.69141, 0, 0], + "70": [0.12604, 0.69141, 0, 0], + "71": [0, 0.69141, 0, 0], + "72": [0.06302, 0.69141, 0, 0], + "73": [0, 0.69141, 0, 0], + "74": [0.12604, 0.69141, 0, 0], + "75": [0, 0.69141, 0, 0], + "76": [0, 0.69141, 0, 0], + "77": [0, 0.69141, 0, 0], + "78": [0, 0.69141, 0, 0], + "79": [0, 0.69141, 0, 0], + "80": [0.18906, 0.69141, 0, 0], + "81": [0.03781, 0.69141, 0, 0], + "82": [0, 0.69141, 0, 0], + "83": [0, 0.69141, 0, 0], + "84": [0, 0.69141, 0, 0], + "85": [0, 0.69141, 0, 0], + "86": [0, 0.69141, 0, 0], + "87": [0, 0.69141, 0, 0], + "88": [0, 0.69141, 0, 0], + "89": [0.18906, 0.69141, 0, 0], + "90": [0.12604, 0.69141, 0, 0], + "91": [0.24982, 0.74947, 0, 0], + "93": [0.24982, 0.74947, 0, 0], + "94": [0, 0.69141, 0, 0], + "97": [0, 0.47534, 0, 0], + "98": [0, 0.69141, 0, 0], + "99": [0, 0.47534, 0, 0], + "100": [0, 0.62119, 0, 0], + "101": [0, 0.47534, 0, 0], + "102": [0.18906, 0.69141, 0, 0], + "103": [0.18906, 0.47534, 0, 0], + "104": [0.18906, 0.69141, 0, 0], + "105": [0, 0.69141, 0, 0], + "106": [0, 0.69141, 0, 0], + "107": [0, 0.69141, 0, 0], + "108": [0, 0.69141, 0, 0], + "109": [0, 0.47534, 0, 0], + "110": [0, 0.47534, 0, 0], + "111": [0, 0.47534, 0, 0], + "112": [0.18906, 0.52396, 0, 0], + "113": [0.18906, 0.47534, 0, 0], + "114": [0, 0.47534, 0, 0], + "115": [0, 0.47534, 0, 0], + "116": [0, 0.62119, 0, 0], + "117": [0, 0.47534, 0, 0], + "118": [0, 0.52396, 0, 0], + "119": [0, 0.52396, 0, 0], + "120": [0.18906, 0.47534, 0, 0], + "121": [0.18906, 0.47534, 0, 0], + "122": [0.18906, 0.47534, 0, 0], + "8216": [0, 0.69141, 0, 0], + "8217": [0, 0.69141, 0, 0], + "58112": [0, 0.62119, 0, 0], + "58113": [0, 0.62119, 0, 0], + "58114": [0.18906, 0.69141, 0, 0], + "58115": [0.18906, 0.69141, 0, 0], + "58116": [0.18906, 0.47534, 0, 0], + "58117": [0, 0.69141, 0, 0], + "58118": [0, 0.62119, 0, 0], + "58119": [0, 0.47534, 0, 0], + }, + "Main-Bold": { + "33": [0, 0.69444, 0, 0], + "34": [0, 0.69444, 0, 0], + "35": [0.19444, 0.69444, 0, 0], + "36": [0.05556, 0.75, 0, 0], + "37": [0.05556, 0.75, 0, 0], + "38": [0, 0.69444, 0, 0], + "39": [0, 0.69444, 0, 0], + "40": [0.25, 0.75, 0, 0], + "41": [0.25, 0.75, 0, 0], + "42": [0, 0.75, 0, 0], + "43": [0.13333, 0.63333, 0, 0], + "44": [0.19444, 0.15556, 0, 0], + "45": [0, 0.44444, 0, 0], + "46": [0, 0.15556, 0, 0], + "47": [0.25, 0.75, 0, 0], + "48": [0, 0.64444, 0, 0], + "49": [0, 0.64444, 0, 0], + "50": [0, 0.64444, 0, 0], + "51": [0, 0.64444, 0, 0], + "52": [0, 0.64444, 0, 0], + "53": [0, 0.64444, 0, 0], + "54": [0, 0.64444, 0, 0], + "55": [0, 0.64444, 0, 0], + "56": [0, 0.64444, 0, 0], + "57": [0, 0.64444, 0, 0], + "58": [0, 0.44444, 0, 0], + "59": [0.19444, 0.44444, 0, 0], + "60": [0.08556, 0.58556, 0, 0], + "61": [-0.10889, 0.39111, 0, 0], + "62": [0.08556, 0.58556, 0, 0], + "63": [0, 0.69444, 0, 0], + "64": [0, 0.69444, 0, 0], + "65": [0, 0.68611, 0, 0], + "66": [0, 0.68611, 0, 0], + "67": [0, 0.68611, 0, 0], + "68": [0, 0.68611, 0, 0], + "69": [0, 0.68611, 0, 0], + "70": [0, 0.68611, 0, 0], + "71": [0, 0.68611, 0, 0], + "72": [0, 0.68611, 0, 0], + "73": [0, 0.68611, 0, 0], + "74": [0, 0.68611, 0, 0], + "75": [0, 0.68611, 0, 0], + "76": [0, 0.68611, 0, 0], + "77": [0, 0.68611, 0, 0], + "78": [0, 0.68611, 0, 0], + "79": [0, 0.68611, 0, 0], + "80": [0, 0.68611, 0, 0], + "81": [0.19444, 0.68611, 0, 0], + "82": [0, 0.68611, 0, 0], + "83": [0, 0.68611, 0, 0], + "84": [0, 0.68611, 0, 0], + "85": [0, 0.68611, 0, 0], + "86": [0, 0.68611, 0.01597, 0], + "87": [0, 0.68611, 0.01597, 0], + "88": [0, 0.68611, 0, 0], + "89": [0, 0.68611, 0.02875, 0], + "90": [0, 0.68611, 0, 0], + "91": [0.25, 0.75, 0, 0], + "92": [0.25, 0.75, 0, 0], + "93": [0.25, 0.75, 0, 0], + "94": [0, 0.69444, 0, 0], + "95": [0.31, 0.13444, 0.03194, 0], + "96": [0, 0.69444, 0, 0], + "97": [0, 0.44444, 0, 0], + "98": [0, 0.69444, 0, 0], + "99": [0, 0.44444, 0, 0], + "100": [0, 0.69444, 0, 0], + "101": [0, 0.44444, 0, 0], + "102": [0, 0.69444, 0.10903, 0], + "103": [0.19444, 0.44444, 0.01597, 0], + "104": [0, 0.69444, 0, 0], + "105": [0, 0.69444, 0, 0], + "106": [0.19444, 0.69444, 0, 0], + "107": [0, 0.69444, 0, 0], + "108": [0, 0.69444, 0, 0], + "109": [0, 0.44444, 0, 0], + "110": [0, 0.44444, 0, 0], + "111": [0, 0.44444, 0, 0], + "112": [0.19444, 0.44444, 0, 0], + "113": [0.19444, 0.44444, 0, 0], + "114": [0, 0.44444, 0, 0], + "115": [0, 0.44444, 0, 0], + "116": [0, 0.63492, 0, 0], + "117": [0, 0.44444, 0, 0], + "118": [0, 0.44444, 0.01597, 0], + "119": [0, 0.44444, 0.01597, 0], + "120": [0, 0.44444, 0, 0], + "121": [0.19444, 0.44444, 0.01597, 0], + "122": [0, 0.44444, 0, 0], + "123": [0.25, 0.75, 0, 0], + "124": [0.25, 0.75, 0, 0], + "125": [0.25, 0.75, 0, 0], + "126": [0.35, 0.34444, 0, 0], + "168": [0, 0.69444, 0, 0], + "172": [0, 0.44444, 0, 0], + "175": [0, 0.59611, 0, 0], + "176": [0, 0.69444, 0, 0], + "177": [0.13333, 0.63333, 0, 0], + "180": [0, 0.69444, 0, 0], + "215": [0.13333, 0.63333, 0, 0], + "247": [0.13333, 0.63333, 0, 0], + "305": [0, 0.44444, 0, 0], + "567": [0.19444, 0.44444, 0, 0], + "710": [0, 0.69444, 0, 0], + "711": [0, 0.63194, 0, 0], + "713": [0, 0.59611, 0, 0], + "714": [0, 0.69444, 0, 0], + "715": [0, 0.69444, 0, 0], + "728": [0, 0.69444, 0, 0], + "729": [0, 0.69444, 0, 0], + "730": [0, 0.69444, 0, 0], + "732": [0, 0.69444, 0, 0], + "768": [0, 0.69444, 0, 0], + "769": [0, 0.69444, 0, 0], + "770": [0, 0.69444, 0, 0], + "771": [0, 0.69444, 0, 0], + "772": [0, 0.59611, 0, 0], + "774": [0, 0.69444, 0, 0], + "775": [0, 0.69444, 0, 0], + "776": [0, 0.69444, 0, 0], + "778": [0, 0.69444, 0, 0], + "779": [0, 0.69444, 0, 0], + "780": [0, 0.63194, 0, 0], + "824": [0.19444, 0.69444, 0, 0], + "915": [0, 0.68611, 0, 0], + "916": [0, 0.68611, 0, 0], + "920": [0, 0.68611, 0, 0], + "923": [0, 0.68611, 0, 0], + "926": [0, 0.68611, 0, 0], + "928": [0, 0.68611, 0, 0], + "931": [0, 0.68611, 0, 0], + "933": [0, 0.68611, 0, 0], + "934": [0, 0.68611, 0, 0], + "936": [0, 0.68611, 0, 0], + "937": [0, 0.68611, 0, 0], + "8211": [0, 0.44444, 0.03194, 0], + "8212": [0, 0.44444, 0.03194, 0], + "8216": [0, 0.69444, 0, 0], + "8217": [0, 0.69444, 0, 0], + "8220": [0, 0.69444, 0, 0], + "8221": [0, 0.69444, 0, 0], + "8224": [0.19444, 0.69444, 0, 0], + "8225": [0.19444, 0.69444, 0, 0], + "8242": [0, 0.55556, 0, 0], + "8407": [0, 0.72444, 0.15486, 0], + "8463": [0, 0.69444, 0, 0], + "8465": [0, 0.69444, 0, 0], + "8467": [0, 0.69444, 0, 0], + "8472": [0.19444, 0.44444, 0, 0], + "8476": [0, 0.69444, 0, 0], + "8501": [0, 0.69444, 0, 0], + "8592": [-0.10889, 0.39111, 0, 0], + "8593": [0.19444, 0.69444, 0, 0], + "8594": [-0.10889, 0.39111, 0, 0], + "8595": [0.19444, 0.69444, 0, 0], + "8596": [-0.10889, 0.39111, 0, 0], + "8597": [0.25, 0.75, 0, 0], + "8598": [0.19444, 0.69444, 0, 0], + "8599": [0.19444, 0.69444, 0, 0], + "8600": [0.19444, 0.69444, 0, 0], + "8601": [0.19444, 0.69444, 0, 0], + "8636": [-0.10889, 0.39111, 0, 0], + "8637": [-0.10889, 0.39111, 0, 0], + "8640": [-0.10889, 0.39111, 0, 0], + "8641": [-0.10889, 0.39111, 0, 0], + "8656": [-0.10889, 0.39111, 0, 0], + "8657": [0.19444, 0.69444, 0, 0], + "8658": [-0.10889, 0.39111, 0, 0], + "8659": [0.19444, 0.69444, 0, 0], + "8660": [-0.10889, 0.39111, 0, 0], + "8661": [0.25, 0.75, 0, 0], + "8704": [0, 0.69444, 0, 0], + "8706": [0, 0.69444, 0.06389, 0], + "8707": [0, 0.69444, 0, 0], + "8709": [0.05556, 0.75, 0, 0], + "8711": [0, 0.68611, 0, 0], + "8712": [0.08556, 0.58556, 0, 0], + "8715": [0.08556, 0.58556, 0, 0], + "8722": [0.13333, 0.63333, 0, 0], + "8723": [0.13333, 0.63333, 0, 0], + "8725": [0.25, 0.75, 0, 0], + "8726": [0.25, 0.75, 0, 0], + "8727": [-0.02778, 0.47222, 0, 0], + "8728": [-0.02639, 0.47361, 0, 0], + "8729": [-0.02639, 0.47361, 0, 0], + "8730": [0.18, 0.82, 0, 0], + "8733": [0, 0.44444, 0, 0], + "8734": [0, 0.44444, 0, 0], + "8736": [0, 0.69224, 0, 0], + "8739": [0.25, 0.75, 0, 0], + "8741": [0.25, 0.75, 0, 0], + "8743": [0, 0.55556, 0, 0], + "8744": [0, 0.55556, 0, 0], + "8745": [0, 0.55556, 0, 0], + "8746": [0, 0.55556, 0, 0], + "8747": [0.19444, 0.69444, 0.12778, 0], + "8764": [-0.10889, 0.39111, 0, 0], + "8768": [0.19444, 0.69444, 0, 0], + "8771": [0.00222, 0.50222, 0, 0], + "8776": [0.02444, 0.52444, 0, 0], + "8781": [0.00222, 0.50222, 0, 0], + "8801": [0.00222, 0.50222, 0, 0], + "8804": [0.19667, 0.69667, 0, 0], + "8805": [0.19667, 0.69667, 0, 0], + "8810": [0.08556, 0.58556, 0, 0], + "8811": [0.08556, 0.58556, 0, 0], + "8826": [0.08556, 0.58556, 0, 0], + "8827": [0.08556, 0.58556, 0, 0], + "8834": [0.08556, 0.58556, 0, 0], + "8835": [0.08556, 0.58556, 0, 0], + "8838": [0.19667, 0.69667, 0, 0], + "8839": [0.19667, 0.69667, 0, 0], + "8846": [0, 0.55556, 0, 0], + "8849": [0.19667, 0.69667, 0, 0], + "8850": [0.19667, 0.69667, 0, 0], + "8851": [0, 0.55556, 0, 0], + "8852": [0, 0.55556, 0, 0], + "8853": [0.13333, 0.63333, 0, 0], + "8854": [0.13333, 0.63333, 0, 0], + "8855": [0.13333, 0.63333, 0, 0], + "8856": [0.13333, 0.63333, 0, 0], + "8857": [0.13333, 0.63333, 0, 0], + "8866": [0, 0.69444, 0, 0], + "8867": [0, 0.69444, 0, 0], + "8868": [0, 0.69444, 0, 0], + "8869": [0, 0.69444, 0, 0], + "8900": [-0.02639, 0.47361, 0, 0], + "8901": [-0.02639, 0.47361, 0, 0], + "8902": [-0.02778, 0.47222, 0, 0], + "8968": [0.25, 0.75, 0, 0], + "8969": [0.25, 0.75, 0, 0], + "8970": [0.25, 0.75, 0, 0], + "8971": [0.25, 0.75, 0, 0], + "8994": [-0.13889, 0.36111, 0, 0], + "8995": [-0.13889, 0.36111, 0, 0], + "9651": [0.19444, 0.69444, 0, 0], + "9657": [-0.02778, 0.47222, 0, 0], + "9661": [0.19444, 0.69444, 0, 0], + "9667": [-0.02778, 0.47222, 0, 0], + "9711": [0.19444, 0.69444, 0, 0], + "9824": [0.12963, 0.69444, 0, 0], + "9825": [0.12963, 0.69444, 0, 0], + "9826": [0.12963, 0.69444, 0, 0], + "9827": [0.12963, 0.69444, 0, 0], + "9837": [0, 0.75, 0, 0], + "9838": [0.19444, 0.69444, 0, 0], + "9839": [0.19444, 0.69444, 0, 0], + "10216": [0.25, 0.75, 0, 0], + "10217": [0.25, 0.75, 0, 0], + "10815": [0, 0.68611, 0, 0], + "10927": [0.19667, 0.69667, 0, 0], + "10928": [0.19667, 0.69667, 0, 0], + }, + "Main-Italic": { + "33": [0, 0.69444, 0.12417, 0], + "34": [0, 0.69444, 0.06961, 0], + "35": [0.19444, 0.69444, 0.06616, 0], + "37": [0.05556, 0.75, 0.13639, 0], + "38": [0, 0.69444, 0.09694, 0], + "39": [0, 0.69444, 0.12417, 0], + "40": [0.25, 0.75, 0.16194, 0], + "41": [0.25, 0.75, 0.03694, 0], + "42": [0, 0.75, 0.14917, 0], + "43": [0.05667, 0.56167, 0.03694, 0], + "44": [0.19444, 0.10556, 0, 0], + "45": [0, 0.43056, 0.02826, 0], + "46": [0, 0.10556, 0, 0], + "47": [0.25, 0.75, 0.16194, 0], + "48": [0, 0.64444, 0.13556, 0], + "49": [0, 0.64444, 0.13556, 0], + "50": [0, 0.64444, 0.13556, 0], + "51": [0, 0.64444, 0.13556, 0], + "52": [0.19444, 0.64444, 0.13556, 0], + "53": [0, 0.64444, 0.13556, 0], + "54": [0, 0.64444, 0.13556, 0], + "55": [0.19444, 0.64444, 0.13556, 0], + "56": [0, 0.64444, 0.13556, 0], + "57": [0, 0.64444, 0.13556, 0], + "58": [0, 0.43056, 0.0582, 0], + "59": [0.19444, 0.43056, 0.0582, 0], + "61": [-0.13313, 0.36687, 0.06616, 0], + "63": [0, 0.69444, 0.1225, 0], + "64": [0, 0.69444, 0.09597, 0], + "65": [0, 0.68333, 0, 0], + "66": [0, 0.68333, 0.10257, 0], + "67": [0, 0.68333, 0.14528, 0], + "68": [0, 0.68333, 0.09403, 0], + "69": [0, 0.68333, 0.12028, 0], + "70": [0, 0.68333, 0.13305, 0], + "71": [0, 0.68333, 0.08722, 0], + "72": [0, 0.68333, 0.16389, 0], + "73": [0, 0.68333, 0.15806, 0], + "74": [0, 0.68333, 0.14028, 0], + "75": [0, 0.68333, 0.14528, 0], + "76": [0, 0.68333, 0, 0], + "77": [0, 0.68333, 0.16389, 0], + "78": [0, 0.68333, 0.16389, 0], + "79": [0, 0.68333, 0.09403, 0], + "80": [0, 0.68333, 0.10257, 0], + "81": [0.19444, 0.68333, 0.09403, 0], + "82": [0, 0.68333, 0.03868, 0], + "83": [0, 0.68333, 0.11972, 0], + "84": [0, 0.68333, 0.13305, 0], + "85": [0, 0.68333, 0.16389, 0], + "86": [0, 0.68333, 0.18361, 0], + "87": [0, 0.68333, 0.18361, 0], + "88": [0, 0.68333, 0.15806, 0], + "89": [0, 0.68333, 0.19383, 0], + "90": [0, 0.68333, 0.14528, 0], + "91": [0.25, 0.75, 0.1875, 0], + "93": [0.25, 0.75, 0.10528, 0], + "94": [0, 0.69444, 0.06646, 0], + "95": [0.31, 0.12056, 0.09208, 0], + "97": [0, 0.43056, 0.07671, 0], + "98": [0, 0.69444, 0.06312, 0], + "99": [0, 0.43056, 0.05653, 0], + "100": [0, 0.69444, 0.10333, 0], + "101": [0, 0.43056, 0.07514, 0], + "102": [0.19444, 0.69444, 0.21194, 0], + "103": [0.19444, 0.43056, 0.08847, 0], + "104": [0, 0.69444, 0.07671, 0], + "105": [0, 0.65536, 0.1019, 0], + "106": [0.19444, 0.65536, 0.14467, 0], + "107": [0, 0.69444, 0.10764, 0], + "108": [0, 0.69444, 0.10333, 0], + "109": [0, 0.43056, 0.07671, 0], + "110": [0, 0.43056, 0.07671, 0], + "111": [0, 0.43056, 0.06312, 0], + "112": [0.19444, 0.43056, 0.06312, 0], + "113": [0.19444, 0.43056, 0.08847, 0], + "114": [0, 0.43056, 0.10764, 0], + "115": [0, 0.43056, 0.08208, 0], + "116": [0, 0.61508, 0.09486, 0], + "117": [0, 0.43056, 0.07671, 0], + "118": [0, 0.43056, 0.10764, 0], + "119": [0, 0.43056, 0.10764, 0], + "120": [0, 0.43056, 0.12042, 0], + "121": [0.19444, 0.43056, 0.08847, 0], + "122": [0, 0.43056, 0.12292, 0], + "126": [0.35, 0.31786, 0.11585, 0], + "163": [0, 0.69444, 0, 0], + "305": [0, 0.43056, 0, 0.02778], + "567": [0.19444, 0.43056, 0, 0.08334], + "768": [0, 0.69444, 0, 0], + "769": [0, 0.69444, 0.09694, 0], + "770": [0, 0.69444, 0.06646, 0], + "771": [0, 0.66786, 0.11585, 0], + "772": [0, 0.56167, 0.10333, 0], + "774": [0, 0.69444, 0.10806, 0], + "775": [0, 0.66786, 0.11752, 0], + "776": [0, 0.66786, 0.10474, 0], + "778": [0, 0.69444, 0, 0], + "779": [0, 0.69444, 0.1225, 0], + "780": [0, 0.62847, 0.08295, 0], + "915": [0, 0.68333, 0.13305, 0], + "916": [0, 0.68333, 0, 0], + "920": [0, 0.68333, 0.09403, 0], + "923": [0, 0.68333, 0, 0], + "926": [0, 0.68333, 0.15294, 0], + "928": [0, 0.68333, 0.16389, 0], + "931": [0, 0.68333, 0.12028, 0], + "933": [0, 0.68333, 0.11111, 0], + "934": [0, 0.68333, 0.05986, 0], + "936": [0, 0.68333, 0.11111, 0], + "937": [0, 0.68333, 0.10257, 0], + "8211": [0, 0.43056, 0.09208, 0], + "8212": [0, 0.43056, 0.09208, 0], + "8216": [0, 0.69444, 0.12417, 0], + "8217": [0, 0.69444, 0.12417, 0], + "8220": [0, 0.69444, 0.1685, 0], + "8221": [0, 0.69444, 0.06961, 0], + "8463": [0, 0.68889, 0, 0], + }, + "Main-Regular": { + "32": [0, 0, 0, 0], + "33": [0, 0.69444, 0, 0], + "34": [0, 0.69444, 0, 0], + "35": [0.19444, 0.69444, 0, 0], + "36": [0.05556, 0.75, 0, 0], + "37": [0.05556, 0.75, 0, 0], + "38": [0, 0.69444, 0, 0], + "39": [0, 0.69444, 0, 0], + "40": [0.25, 0.75, 0, 0], + "41": [0.25, 0.75, 0, 0], + "42": [0, 0.75, 0, 0], + "43": [0.08333, 0.58333, 0, 0], + "44": [0.19444, 0.10556, 0, 0], + "45": [0, 0.43056, 0, 0], + "46": [0, 0.10556, 0, 0], + "47": [0.25, 0.75, 0, 0], + "48": [0, 0.64444, 0, 0], + "49": [0, 0.64444, 0, 0], + "50": [0, 0.64444, 0, 0], + "51": [0, 0.64444, 0, 0], + "52": [0, 0.64444, 0, 0], + "53": [0, 0.64444, 0, 0], + "54": [0, 0.64444, 0, 0], + "55": [0, 0.64444, 0, 0], + "56": [0, 0.64444, 0, 0], + "57": [0, 0.64444, 0, 0], + "58": [0, 0.43056, 0, 0], + "59": [0.19444, 0.43056, 0, 0], + "60": [0.0391, 0.5391, 0, 0], + "61": [-0.13313, 0.36687, 0, 0], + "62": [0.0391, 0.5391, 0, 0], + "63": [0, 0.69444, 0, 0], + "64": [0, 0.69444, 0, 0], + "65": [0, 0.68333, 0, 0], + "66": [0, 0.68333, 0, 0], + "67": [0, 0.68333, 0, 0], + "68": [0, 0.68333, 0, 0], + "69": [0, 0.68333, 0, 0], + "70": [0, 0.68333, 0, 0], + "71": [0, 0.68333, 0, 0], + "72": [0, 0.68333, 0, 0], + "73": [0, 0.68333, 0, 0], + "74": [0, 0.68333, 0, 0], + "75": [0, 0.68333, 0, 0], + "76": [0, 0.68333, 0, 0], + "77": [0, 0.68333, 0, 0], + "78": [0, 0.68333, 0, 0], + "79": [0, 0.68333, 0, 0], + "80": [0, 0.68333, 0, 0], + "81": [0.19444, 0.68333, 0, 0], + "82": [0, 0.68333, 0, 0], + "83": [0, 0.68333, 0, 0], + "84": [0, 0.68333, 0, 0], + "85": [0, 0.68333, 0, 0], + "86": [0, 0.68333, 0.01389, 0], + "87": [0, 0.68333, 0.01389, 0], + "88": [0, 0.68333, 0, 0], + "89": [0, 0.68333, 0.025, 0], + "90": [0, 0.68333, 0, 0], + "91": [0.25, 0.75, 0, 0], + "92": [0.25, 0.75, 0, 0], + "93": [0.25, 0.75, 0, 0], + "94": [0, 0.69444, 0, 0], + "95": [0.31, 0.12056, 0.02778, 0], + "96": [0, 0.69444, 0, 0], + "97": [0, 0.43056, 0, 0], + "98": [0, 0.69444, 0, 0], + "99": [0, 0.43056, 0, 0], + "100": [0, 0.69444, 0, 0], + "101": [0, 0.43056, 0, 0], + "102": [0, 0.69444, 0.07778, 0], + "103": [0.19444, 0.43056, 0.01389, 0], + "104": [0, 0.69444, 0, 0], + "105": [0, 0.66786, 0, 0], + "106": [0.19444, 0.66786, 0, 0], + "107": [0, 0.69444, 0, 0], + "108": [0, 0.69444, 0, 0], + "109": [0, 0.43056, 0, 0], + "110": [0, 0.43056, 0, 0], + "111": [0, 0.43056, 0, 0], + "112": [0.19444, 0.43056, 0, 0], + "113": [0.19444, 0.43056, 0, 0], + "114": [0, 0.43056, 0, 0], + "115": [0, 0.43056, 0, 0], + "116": [0, 0.61508, 0, 0], + "117": [0, 0.43056, 0, 0], + "118": [0, 0.43056, 0.01389, 0], + "119": [0, 0.43056, 0.01389, 0], + "120": [0, 0.43056, 0, 0], + "121": [0.19444, 0.43056, 0.01389, 0], + "122": [0, 0.43056, 0, 0], + "123": [0.25, 0.75, 0, 0], + "124": [0.25, 0.75, 0, 0], + "125": [0.25, 0.75, 0, 0], + "126": [0.35, 0.31786, 0, 0], + "160": [0, 0, 0, 0], + "168": [0, 0.66786, 0, 0], + "172": [0, 0.43056, 0, 0], + "175": [0, 0.56778, 0, 0], + "176": [0, 0.69444, 0, 0], + "177": [0.08333, 0.58333, 0, 0], + "180": [0, 0.69444, 0, 0], + "215": [0.08333, 0.58333, 0, 0], + "247": [0.08333, 0.58333, 0, 0], + "305": [0, 0.43056, 0, 0], + "567": [0.19444, 0.43056, 0, 0], + "710": [0, 0.69444, 0, 0], + "711": [0, 0.62847, 0, 0], + "713": [0, 0.56778, 0, 0], + "714": [0, 0.69444, 0, 0], + "715": [0, 0.69444, 0, 0], + "728": [0, 0.69444, 0, 0], + "729": [0, 0.66786, 0, 0], + "730": [0, 0.69444, 0, 0], + "732": [0, 0.66786, 0, 0], + "768": [0, 0.69444, 0, 0], + "769": [0, 0.69444, 0, 0], + "770": [0, 0.69444, 0, 0], + "771": [0, 0.66786, 0, 0], + "772": [0, 0.56778, 0, 0], + "774": [0, 0.69444, 0, 0], + "775": [0, 0.66786, 0, 0], + "776": [0, 0.66786, 0, 0], + "778": [0, 0.69444, 0, 0], + "779": [0, 0.69444, 0, 0], + "780": [0, 0.62847, 0, 0], + "824": [0.19444, 0.69444, 0, 0], + "915": [0, 0.68333, 0, 0], + "916": [0, 0.68333, 0, 0], + "920": [0, 0.68333, 0, 0], + "923": [0, 0.68333, 0, 0], + "926": [0, 0.68333, 0, 0], + "928": [0, 0.68333, 0, 0], + "931": [0, 0.68333, 0, 0], + "933": [0, 0.68333, 0, 0], + "934": [0, 0.68333, 0, 0], + "936": [0, 0.68333, 0, 0], + "937": [0, 0.68333, 0, 0], + "8211": [0, 0.43056, 0.02778, 0], + "8212": [0, 0.43056, 0.02778, 0], + "8216": [0, 0.69444, 0, 0], + "8217": [0, 0.69444, 0, 0], + "8220": [0, 0.69444, 0, 0], + "8221": [0, 0.69444, 0, 0], + "8224": [0.19444, 0.69444, 0, 0], + "8225": [0.19444, 0.69444, 0, 0], + "8230": [0, 0.12, 0, 0], + "8242": [0, 0.55556, 0, 0], + "8407": [0, 0.71444, 0.15382, 0], + "8463": [0, 0.68889, 0, 0], + "8465": [0, 0.69444, 0, 0], + "8467": [0, 0.69444, 0, 0.11111], + "8472": [0.19444, 0.43056, 0, 0.11111], + "8476": [0, 0.69444, 0, 0], + "8501": [0, 0.69444, 0, 0], + "8592": [-0.13313, 0.36687, 0, 0], + "8593": [0.19444, 0.69444, 0, 0], + "8594": [-0.13313, 0.36687, 0, 0], + "8595": [0.19444, 0.69444, 0, 0], + "8596": [-0.13313, 0.36687, 0, 0], + "8597": [0.25, 0.75, 0, 0], + "8598": [0.19444, 0.69444, 0, 0], + "8599": [0.19444, 0.69444, 0, 0], + "8600": [0.19444, 0.69444, 0, 0], + "8601": [0.19444, 0.69444, 0, 0], + "8614": [0.011, 0.511, 0, 0], + "8617": [0.011, 0.511, 0, 0], + "8618": [0.011, 0.511, 0, 0], + "8636": [-0.13313, 0.36687, 0, 0], + "8637": [-0.13313, 0.36687, 0, 0], + "8640": [-0.13313, 0.36687, 0, 0], + "8641": [-0.13313, 0.36687, 0, 0], + "8652": [0.011, 0.671, 0, 0], + "8656": [-0.13313, 0.36687, 0, 0], + "8657": [0.19444, 0.69444, 0, 0], + "8658": [-0.13313, 0.36687, 0, 0], + "8659": [0.19444, 0.69444, 0, 0], + "8660": [-0.13313, 0.36687, 0, 0], + "8661": [0.25, 0.75, 0, 0], + "8704": [0, 0.69444, 0, 0], + "8706": [0, 0.69444, 0.05556, 0.08334], + "8707": [0, 0.69444, 0, 0], + "8709": [0.05556, 0.75, 0, 0], + "8711": [0, 0.68333, 0, 0], + "8712": [0.0391, 0.5391, 0, 0], + "8715": [0.0391, 0.5391, 0, 0], + "8722": [0.08333, 0.58333, 0, 0], + "8723": [0.08333, 0.58333, 0, 0], + "8725": [0.25, 0.75, 0, 0], + "8726": [0.25, 0.75, 0, 0], + "8727": [-0.03472, 0.46528, 0, 0], + "8728": [-0.05555, 0.44445, 0, 0], + "8729": [-0.05555, 0.44445, 0, 0], + "8730": [0.2, 0.8, 0, 0], + "8733": [0, 0.43056, 0, 0], + "8734": [0, 0.43056, 0, 0], + "8736": [0, 0.69224, 0, 0], + "8739": [0.25, 0.75, 0, 0], + "8741": [0.25, 0.75, 0, 0], + "8743": [0, 0.55556, 0, 0], + "8744": [0, 0.55556, 0, 0], + "8745": [0, 0.55556, 0, 0], + "8746": [0, 0.55556, 0, 0], + "8747": [0.19444, 0.69444, 0.11111, 0], + "8764": [-0.13313, 0.36687, 0, 0], + "8768": [0.19444, 0.69444, 0, 0], + "8771": [-0.03625, 0.46375, 0, 0], + "8773": [-0.022, 0.589, 0, 0], + "8776": [-0.01688, 0.48312, 0, 0], + "8781": [-0.03625, 0.46375, 0, 0], + "8784": [-0.133, 0.67, 0, 0], + "8800": [0.215, 0.716, 0, 0], + "8801": [-0.03625, 0.46375, 0, 0], + "8804": [0.13597, 0.63597, 0, 0], + "8805": [0.13597, 0.63597, 0, 0], + "8810": [0.0391, 0.5391, 0, 0], + "8811": [0.0391, 0.5391, 0, 0], + "8826": [0.0391, 0.5391, 0, 0], + "8827": [0.0391, 0.5391, 0, 0], + "8834": [0.0391, 0.5391, 0, 0], + "8835": [0.0391, 0.5391, 0, 0], + "8838": [0.13597, 0.63597, 0, 0], + "8839": [0.13597, 0.63597, 0, 0], + "8846": [0, 0.55556, 0, 0], + "8849": [0.13597, 0.63597, 0, 0], + "8850": [0.13597, 0.63597, 0, 0], + "8851": [0, 0.55556, 0, 0], + "8852": [0, 0.55556, 0, 0], + "8853": [0.08333, 0.58333, 0, 0], + "8854": [0.08333, 0.58333, 0, 0], + "8855": [0.08333, 0.58333, 0, 0], + "8856": [0.08333, 0.58333, 0, 0], + "8857": [0.08333, 0.58333, 0, 0], + "8866": [0, 0.69444, 0, 0], + "8867": [0, 0.69444, 0, 0], + "8868": [0, 0.69444, 0, 0], + "8869": [0, 0.69444, 0, 0], + "8872": [0.249, 0.75, 0, 0], + "8900": [-0.05555, 0.44445, 0, 0], + "8901": [-0.05555, 0.44445, 0, 0], + "8902": [-0.03472, 0.46528, 0, 0], + "8904": [0.005, 0.505, 0, 0], + "8942": [0.03, 0.9, 0, 0], + "8943": [-0.19, 0.31, 0, 0], + "8945": [-0.1, 0.82, 0, 0], + "8968": [0.25, 0.75, 0, 0], + "8969": [0.25, 0.75, 0, 0], + "8970": [0.25, 0.75, 0, 0], + "8971": [0.25, 0.75, 0, 0], + "8994": [-0.14236, 0.35764, 0, 0], + "8995": [-0.14236, 0.35764, 0, 0], + "9136": [0.244, 0.744, 0, 0], + "9137": [0.244, 0.744, 0, 0], + "9651": [0.19444, 0.69444, 0, 0], + "9657": [-0.03472, 0.46528, 0, 0], + "9661": [0.19444, 0.69444, 0, 0], + "9667": [-0.03472, 0.46528, 0, 0], + "9711": [0.19444, 0.69444, 0, 0], + "9824": [0.12963, 0.69444, 0, 0], + "9825": [0.12963, 0.69444, 0, 0], + "9826": [0.12963, 0.69444, 0, 0], + "9827": [0.12963, 0.69444, 0, 0], + "9837": [0, 0.75, 0, 0], + "9838": [0.19444, 0.69444, 0, 0], + "9839": [0.19444, 0.69444, 0, 0], + "10216": [0.25, 0.75, 0, 0], + "10217": [0.25, 0.75, 0, 0], + "10222": [0.244, 0.744, 0, 0], + "10223": [0.244, 0.744, 0, 0], + "10229": [0.011, 0.511, 0, 0], + "10230": [0.011, 0.511, 0, 0], + "10231": [0.011, 0.511, 0, 0], + "10232": [0.024, 0.525, 0, 0], + "10233": [0.024, 0.525, 0, 0], + "10234": [0.024, 0.525, 0, 0], + "10236": [0.011, 0.511, 0, 0], + "10815": [0, 0.68333, 0, 0], + "10927": [0.13597, 0.63597, 0, 0], + "10928": [0.13597, 0.63597, 0, 0], + }, + "Math-BoldItalic": { + "47": [0.19444, 0.69444, 0, 0], + "65": [0, 0.68611, 0, 0], + "66": [0, 0.68611, 0.04835, 0], + "67": [0, 0.68611, 0.06979, 0], + "68": [0, 0.68611, 0.03194, 0], + "69": [0, 0.68611, 0.05451, 0], + "70": [0, 0.68611, 0.15972, 0], + "71": [0, 0.68611, 0, 0], + "72": [0, 0.68611, 0.08229, 0], + "73": [0, 0.68611, 0.07778, 0], + "74": [0, 0.68611, 0.10069, 0], + "75": [0, 0.68611, 0.06979, 0], + "76": [0, 0.68611, 0, 0], + "77": [0, 0.68611, 0.11424, 0], + "78": [0, 0.68611, 0.11424, 0], + "79": [0, 0.68611, 0.03194, 0], + "80": [0, 0.68611, 0.15972, 0], + "81": [0.19444, 0.68611, 0, 0], + "82": [0, 0.68611, 0.00421, 0], + "83": [0, 0.68611, 0.05382, 0], + "84": [0, 0.68611, 0.15972, 0], + "85": [0, 0.68611, 0.11424, 0], + "86": [0, 0.68611, 0.25555, 0], + "87": [0, 0.68611, 0.15972, 0], + "88": [0, 0.68611, 0.07778, 0], + "89": [0, 0.68611, 0.25555, 0], + "90": [0, 0.68611, 0.06979, 0], + "97": [0, 0.44444, 0, 0], + "98": [0, 0.69444, 0, 0], + "99": [0, 0.44444, 0, 0], + "100": [0, 0.69444, 0, 0], + "101": [0, 0.44444, 0, 0], + "102": [0.19444, 0.69444, 0.11042, 0], + "103": [0.19444, 0.44444, 0.03704, 0], + "104": [0, 0.69444, 0, 0], + "105": [0, 0.69326, 0, 0], + "106": [0.19444, 0.69326, 0.0622, 0], + "107": [0, 0.69444, 0.01852, 0], + "108": [0, 0.69444, 0.0088, 0], + "109": [0, 0.44444, 0, 0], + "110": [0, 0.44444, 0, 0], + "111": [0, 0.44444, 0, 0], + "112": [0.19444, 0.44444, 0, 0], + "113": [0.19444, 0.44444, 0.03704, 0], + "114": [0, 0.44444, 0.03194, 0], + "115": [0, 0.44444, 0, 0], + "116": [0, 0.63492, 0, 0], + "117": [0, 0.44444, 0, 0], + "118": [0, 0.44444, 0.03704, 0], + "119": [0, 0.44444, 0.02778, 0], + "120": [0, 0.44444, 0, 0], + "121": [0.19444, 0.44444, 0.03704, 0], + "122": [0, 0.44444, 0.04213, 0], + "915": [0, 0.68611, 0.15972, 0], + "916": [0, 0.68611, 0, 0], + "920": [0, 0.68611, 0.03194, 0], + "923": [0, 0.68611, 0, 0], + "926": [0, 0.68611, 0.07458, 0], + "928": [0, 0.68611, 0.08229, 0], + "931": [0, 0.68611, 0.05451, 0], + "933": [0, 0.68611, 0.15972, 0], + "934": [0, 0.68611, 0, 0], + "936": [0, 0.68611, 0.11653, 0], + "937": [0, 0.68611, 0.04835, 0], + "945": [0, 0.44444, 0, 0], + "946": [0.19444, 0.69444, 0.03403, 0], + "947": [0.19444, 0.44444, 0.06389, 0], + "948": [0, 0.69444, 0.03819, 0], + "949": [0, 0.44444, 0, 0], + "950": [0.19444, 0.69444, 0.06215, 0], + "951": [0.19444, 0.44444, 0.03704, 0], + "952": [0, 0.69444, 0.03194, 0], + "953": [0, 0.44444, 0, 0], + "954": [0, 0.44444, 0, 0], + "955": [0, 0.69444, 0, 0], + "956": [0.19444, 0.44444, 0, 0], + "957": [0, 0.44444, 0.06898, 0], + "958": [0.19444, 0.69444, 0.03021, 0], + "959": [0, 0.44444, 0, 0], + "960": [0, 0.44444, 0.03704, 0], + "961": [0.19444, 0.44444, 0, 0], + "962": [0.09722, 0.44444, 0.07917, 0], + "963": [0, 0.44444, 0.03704, 0], + "964": [0, 0.44444, 0.13472, 0], + "965": [0, 0.44444, 0.03704, 0], + "966": [0.19444, 0.44444, 0, 0], + "967": [0.19444, 0.44444, 0, 0], + "968": [0.19444, 0.69444, 0.03704, 0], + "969": [0, 0.44444, 0.03704, 0], + "977": [0, 0.69444, 0, 0], + "981": [0.19444, 0.69444, 0, 0], + "982": [0, 0.44444, 0.03194, 0], + "1009": [0.19444, 0.44444, 0, 0], + "1013": [0, 0.44444, 0, 0], + }, + "Math-Italic": { + "47": [0.19444, 0.69444, 0, 0], + "65": [0, 0.68333, 0, 0.13889], + "66": [0, 0.68333, 0.05017, 0.08334], + "67": [0, 0.68333, 0.07153, 0.08334], + "68": [0, 0.68333, 0.02778, 0.05556], + "69": [0, 0.68333, 0.05764, 0.08334], + "70": [0, 0.68333, 0.13889, 0.08334], + "71": [0, 0.68333, 0, 0.08334], + "72": [0, 0.68333, 0.08125, 0.05556], + "73": [0, 0.68333, 0.07847, 0.11111], + "74": [0, 0.68333, 0.09618, 0.16667], + "75": [0, 0.68333, 0.07153, 0.05556], + "76": [0, 0.68333, 0, 0.02778], + "77": [0, 0.68333, 0.10903, 0.08334], + "78": [0, 0.68333, 0.10903, 0.08334], + "79": [0, 0.68333, 0.02778, 0.08334], + "80": [0, 0.68333, 0.13889, 0.08334], + "81": [0.19444, 0.68333, 0, 0.08334], + "82": [0, 0.68333, 0.00773, 0.08334], + "83": [0, 0.68333, 0.05764, 0.08334], + "84": [0, 0.68333, 0.13889, 0.08334], + "85": [0, 0.68333, 0.10903, 0.02778], + "86": [0, 0.68333, 0.22222, 0], + "87": [0, 0.68333, 0.13889, 0], + "88": [0, 0.68333, 0.07847, 0.08334], + "89": [0, 0.68333, 0.22222, 0], + "90": [0, 0.68333, 0.07153, 0.08334], + "97": [0, 0.43056, 0, 0], + "98": [0, 0.69444, 0, 0], + "99": [0, 0.43056, 0, 0.05556], + "100": [0, 0.69444, 0, 0.16667], + "101": [0, 0.43056, 0, 0.05556], + "102": [0.19444, 0.69444, 0.10764, 0.16667], + "103": [0.19444, 0.43056, 0.03588, 0.02778], + "104": [0, 0.69444, 0, 0], + "105": [0, 0.65952, 0, 0], + "106": [0.19444, 0.65952, 0.05724, 0], + "107": [0, 0.69444, 0.03148, 0], + "108": [0, 0.69444, 0.01968, 0.08334], + "109": [0, 0.43056, 0, 0], + "110": [0, 0.43056, 0, 0], + "111": [0, 0.43056, 0, 0.05556], + "112": [0.19444, 0.43056, 0, 0.08334], + "113": [0.19444, 0.43056, 0.03588, 0.08334], + "114": [0, 0.43056, 0.02778, 0.05556], + "115": [0, 0.43056, 0, 0.05556], + "116": [0, 0.61508, 0, 0.08334], + "117": [0, 0.43056, 0, 0.02778], + "118": [0, 0.43056, 0.03588, 0.02778], + "119": [0, 0.43056, 0.02691, 0.08334], + "120": [0, 0.43056, 0, 0.02778], + "121": [0.19444, 0.43056, 0.03588, 0.05556], + "122": [0, 0.43056, 0.04398, 0.05556], + "915": [0, 0.68333, 0.13889, 0.08334], + "916": [0, 0.68333, 0, 0.16667], + "920": [0, 0.68333, 0.02778, 0.08334], + "923": [0, 0.68333, 0, 0.16667], + "926": [0, 0.68333, 0.07569, 0.08334], + "928": [0, 0.68333, 0.08125, 0.05556], + "931": [0, 0.68333, 0.05764, 0.08334], + "933": [0, 0.68333, 0.13889, 0.05556], + "934": [0, 0.68333, 0, 0.08334], + "936": [0, 0.68333, 0.11, 0.05556], + "937": [0, 0.68333, 0.05017, 0.08334], + "945": [0, 0.43056, 0.0037, 0.02778], + "946": [0.19444, 0.69444, 0.05278, 0.08334], + "947": [0.19444, 0.43056, 0.05556, 0], + "948": [0, 0.69444, 0.03785, 0.05556], + "949": [0, 0.43056, 0, 0.08334], + "950": [0.19444, 0.69444, 0.07378, 0.08334], + "951": [0.19444, 0.43056, 0.03588, 0.05556], + "952": [0, 0.69444, 0.02778, 0.08334], + "953": [0, 0.43056, 0, 0.05556], + "954": [0, 0.43056, 0, 0], + "955": [0, 0.69444, 0, 0], + "956": [0.19444, 0.43056, 0, 0.02778], + "957": [0, 0.43056, 0.06366, 0.02778], + "958": [0.19444, 0.69444, 0.04601, 0.11111], + "959": [0, 0.43056, 0, 0.05556], + "960": [0, 0.43056, 0.03588, 0], + "961": [0.19444, 0.43056, 0, 0.08334], + "962": [0.09722, 0.43056, 0.07986, 0.08334], + "963": [0, 0.43056, 0.03588, 0], + "964": [0, 0.43056, 0.1132, 0.02778], + "965": [0, 0.43056, 0.03588, 0.02778], + "966": [0.19444, 0.43056, 0, 0.08334], + "967": [0.19444, 0.43056, 0, 0.05556], + "968": [0.19444, 0.69444, 0.03588, 0.11111], + "969": [0, 0.43056, 0.03588, 0], + "977": [0, 0.69444, 0, 0.08334], + "981": [0.19444, 0.69444, 0, 0.08334], + "982": [0, 0.43056, 0.02778, 0], + "1009": [0.19444, 0.43056, 0, 0.08334], + "1013": [0, 0.43056, 0, 0.05556], + }, + "Math-Regular": { + "65": [0, 0.68333, 0, 0.13889], + "66": [0, 0.68333, 0.05017, 0.08334], + "67": [0, 0.68333, 0.07153, 0.08334], + "68": [0, 0.68333, 0.02778, 0.05556], + "69": [0, 0.68333, 0.05764, 0.08334], + "70": [0, 0.68333, 0.13889, 0.08334], + "71": [0, 0.68333, 0, 0.08334], + "72": [0, 0.68333, 0.08125, 0.05556], + "73": [0, 0.68333, 0.07847, 0.11111], + "74": [0, 0.68333, 0.09618, 0.16667], + "75": [0, 0.68333, 0.07153, 0.05556], + "76": [0, 0.68333, 0, 0.02778], + "77": [0, 0.68333, 0.10903, 0.08334], + "78": [0, 0.68333, 0.10903, 0.08334], + "79": [0, 0.68333, 0.02778, 0.08334], + "80": [0, 0.68333, 0.13889, 0.08334], + "81": [0.19444, 0.68333, 0, 0.08334], + "82": [0, 0.68333, 0.00773, 0.08334], + "83": [0, 0.68333, 0.05764, 0.08334], + "84": [0, 0.68333, 0.13889, 0.08334], + "85": [0, 0.68333, 0.10903, 0.02778], + "86": [0, 0.68333, 0.22222, 0], + "87": [0, 0.68333, 0.13889, 0], + "88": [0, 0.68333, 0.07847, 0.08334], + "89": [0, 0.68333, 0.22222, 0], + "90": [0, 0.68333, 0.07153, 0.08334], + "97": [0, 0.43056, 0, 0], + "98": [0, 0.69444, 0, 0], + "99": [0, 0.43056, 0, 0.05556], + "100": [0, 0.69444, 0, 0.16667], + "101": [0, 0.43056, 0, 0.05556], + "102": [0.19444, 0.69444, 0.10764, 0.16667], + "103": [0.19444, 0.43056, 0.03588, 0.02778], + "104": [0, 0.69444, 0, 0], + "105": [0, 0.65952, 0, 0], + "106": [0.19444, 0.65952, 0.05724, 0], + "107": [0, 0.69444, 0.03148, 0], + "108": [0, 0.69444, 0.01968, 0.08334], + "109": [0, 0.43056, 0, 0], + "110": [0, 0.43056, 0, 0], + "111": [0, 0.43056, 0, 0.05556], + "112": [0.19444, 0.43056, 0, 0.08334], + "113": [0.19444, 0.43056, 0.03588, 0.08334], + "114": [0, 0.43056, 0.02778, 0.05556], + "115": [0, 0.43056, 0, 0.05556], + "116": [0, 0.61508, 0, 0.08334], + "117": [0, 0.43056, 0, 0.02778], + "118": [0, 0.43056, 0.03588, 0.02778], + "119": [0, 0.43056, 0.02691, 0.08334], + "120": [0, 0.43056, 0, 0.02778], + "121": [0.19444, 0.43056, 0.03588, 0.05556], + "122": [0, 0.43056, 0.04398, 0.05556], + "915": [0, 0.68333, 0.13889, 0.08334], + "916": [0, 0.68333, 0, 0.16667], + "920": [0, 0.68333, 0.02778, 0.08334], + "923": [0, 0.68333, 0, 0.16667], + "926": [0, 0.68333, 0.07569, 0.08334], + "928": [0, 0.68333, 0.08125, 0.05556], + "931": [0, 0.68333, 0.05764, 0.08334], + "933": [0, 0.68333, 0.13889, 0.05556], + "934": [0, 0.68333, 0, 0.08334], + "936": [0, 0.68333, 0.11, 0.05556], + "937": [0, 0.68333, 0.05017, 0.08334], + "945": [0, 0.43056, 0.0037, 0.02778], + "946": [0.19444, 0.69444, 0.05278, 0.08334], + "947": [0.19444, 0.43056, 0.05556, 0], + "948": [0, 0.69444, 0.03785, 0.05556], + "949": [0, 0.43056, 0, 0.08334], + "950": [0.19444, 0.69444, 0.07378, 0.08334], + "951": [0.19444, 0.43056, 0.03588, 0.05556], + "952": [0, 0.69444, 0.02778, 0.08334], + "953": [0, 0.43056, 0, 0.05556], + "954": [0, 0.43056, 0, 0], + "955": [0, 0.69444, 0, 0], + "956": [0.19444, 0.43056, 0, 0.02778], + "957": [0, 0.43056, 0.06366, 0.02778], + "958": [0.19444, 0.69444, 0.04601, 0.11111], + "959": [0, 0.43056, 0, 0.05556], + "960": [0, 0.43056, 0.03588, 0], + "961": [0.19444, 0.43056, 0, 0.08334], + "962": [0.09722, 0.43056, 0.07986, 0.08334], + "963": [0, 0.43056, 0.03588, 0], + "964": [0, 0.43056, 0.1132, 0.02778], + "965": [0, 0.43056, 0.03588, 0.02778], + "966": [0.19444, 0.43056, 0, 0.08334], + "967": [0.19444, 0.43056, 0, 0.05556], + "968": [0.19444, 0.69444, 0.03588, 0.11111], + "969": [0, 0.43056, 0.03588, 0], + "977": [0, 0.69444, 0, 0.08334], + "981": [0.19444, 0.69444, 0, 0.08334], + "982": [0, 0.43056, 0.02778, 0], + "1009": [0.19444, 0.43056, 0, 0.08334], + "1013": [0, 0.43056, 0, 0.05556], + }, + "SansSerif-Regular": { + "33": [0, 0.69444, 0, 0], + "34": [0, 0.69444, 0, 0], + "35": [0.19444, 0.69444, 0, 0], + "36": [0.05556, 0.75, 0, 0], + "37": [0.05556, 0.75, 0, 0], + "38": [0, 0.69444, 0, 0], + "39": [0, 0.69444, 0, 0], + "40": [0.25, 0.75, 0, 0], + "41": [0.25, 0.75, 0, 0], + "42": [0, 0.75, 0, 0], + "43": [0.08333, 0.58333, 0, 0], + "44": [0.125, 0.08333, 0, 0], + "45": [0, 0.44444, 0, 0], + "46": [0, 0.08333, 0, 0], + "47": [0.25, 0.75, 0, 0], + "48": [0, 0.65556, 0, 0], + "49": [0, 0.65556, 0, 0], + "50": [0, 0.65556, 0, 0], + "51": [0, 0.65556, 0, 0], + "52": [0, 0.65556, 0, 0], + "53": [0, 0.65556, 0, 0], + "54": [0, 0.65556, 0, 0], + "55": [0, 0.65556, 0, 0], + "56": [0, 0.65556, 0, 0], + "57": [0, 0.65556, 0, 0], + "58": [0, 0.44444, 0, 0], + "59": [0.125, 0.44444, 0, 0], + "61": [-0.13, 0.37, 0, 0], + "63": [0, 0.69444, 0, 0], + "64": [0, 0.69444, 0, 0], + "65": [0, 0.69444, 0, 0], + "66": [0, 0.69444, 0, 0], + "67": [0, 0.69444, 0, 0], + "68": [0, 0.69444, 0, 0], + "69": [0, 0.69444, 0, 0], + "70": [0, 0.69444, 0, 0], + "71": [0, 0.69444, 0, 0], + "72": [0, 0.69444, 0, 0], + "73": [0, 0.69444, 0, 0], + "74": [0, 0.69444, 0, 0], + "75": [0, 0.69444, 0, 0], + "76": [0, 0.69444, 0, 0], + "77": [0, 0.69444, 0, 0], + "78": [0, 0.69444, 0, 0], + "79": [0, 0.69444, 0, 0], + "80": [0, 0.69444, 0, 0], + "81": [0.125, 0.69444, 0, 0], + "82": [0, 0.69444, 0, 0], + "83": [0, 0.69444, 0, 0], + "84": [0, 0.69444, 0, 0], + "85": [0, 0.69444, 0, 0], + "86": [0, 0.69444, 0.01389, 0], + "87": [0, 0.69444, 0.01389, 0], + "88": [0, 0.69444, 0, 0], + "89": [0, 0.69444, 0.025, 0], + "90": [0, 0.69444, 0, 0], + "91": [0.25, 0.75, 0, 0], + "93": [0.25, 0.75, 0, 0], + "94": [0, 0.69444, 0, 0], + "95": [0.35, 0.09444, 0.02778, 0], + "97": [0, 0.44444, 0, 0], + "98": [0, 0.69444, 0, 0], + "99": [0, 0.44444, 0, 0], + "100": [0, 0.69444, 0, 0], + "101": [0, 0.44444, 0, 0], + "102": [0, 0.69444, 0.06944, 0], + "103": [0.19444, 0.44444, 0.01389, 0], + "104": [0, 0.69444, 0, 0], + "105": [0, 0.67937, 0, 0], + "106": [0.19444, 0.67937, 0, 0], + "107": [0, 0.69444, 0, 0], + "108": [0, 0.69444, 0, 0], + "109": [0, 0.44444, 0, 0], + "110": [0, 0.44444, 0, 0], + "111": [0, 0.44444, 0, 0], + "112": [0.19444, 0.44444, 0, 0], + "113": [0.19444, 0.44444, 0, 0], + "114": [0, 0.44444, 0.01389, 0], + "115": [0, 0.44444, 0, 0], + "116": [0, 0.57143, 0, 0], + "117": [0, 0.44444, 0, 0], + "118": [0, 0.44444, 0.01389, 0], + "119": [0, 0.44444, 0.01389, 0], + "120": [0, 0.44444, 0, 0], + "121": [0.19444, 0.44444, 0.01389, 0], + "122": [0, 0.44444, 0, 0], + "126": [0.35, 0.32659, 0, 0], + "305": [0, 0.44444, 0, 0], + "567": [0.19444, 0.44444, 0, 0], + "768": [0, 0.69444, 0, 0], + "769": [0, 0.69444, 0, 0], + "770": [0, 0.69444, 0, 0], + "771": [0, 0.67659, 0, 0], + "772": [0, 0.60889, 0, 0], + "774": [0, 0.69444, 0, 0], + "775": [0, 0.67937, 0, 0], + "776": [0, 0.67937, 0, 0], + "778": [0, 0.69444, 0, 0], + "779": [0, 0.69444, 0, 0], + "780": [0, 0.63194, 0, 0], + "915": [0, 0.69444, 0, 0], + "916": [0, 0.69444, 0, 0], + "920": [0, 0.69444, 0, 0], + "923": [0, 0.69444, 0, 0], + "926": [0, 0.69444, 0, 0], + "928": [0, 0.69444, 0, 0], + "931": [0, 0.69444, 0, 0], + "933": [0, 0.69444, 0, 0], + "934": [0, 0.69444, 0, 0], + "936": [0, 0.69444, 0, 0], + "937": [0, 0.69444, 0, 0], + "8211": [0, 0.44444, 0.02778, 0], + "8212": [0, 0.44444, 0.02778, 0], + "8216": [0, 0.69444, 0, 0], + "8217": [0, 0.69444, 0, 0], + "8220": [0, 0.69444, 0, 0], + "8221": [0, 0.69444, 0, 0], + }, + "Script-Regular": { + "65": [0, 0.7, 0.22925, 0], + "66": [0, 0.7, 0.04087, 0], + "67": [0, 0.7, 0.1689, 0], + "68": [0, 0.7, 0.09371, 0], + "69": [0, 0.7, 0.18583, 0], + "70": [0, 0.7, 0.13634, 0], + "71": [0, 0.7, 0.17322, 0], + "72": [0, 0.7, 0.29694, 0], + "73": [0, 0.7, 0.19189, 0], + "74": [0.27778, 0.7, 0.19189, 0], + "75": [0, 0.7, 0.31259, 0], + "76": [0, 0.7, 0.19189, 0], + "77": [0, 0.7, 0.15981, 0], + "78": [0, 0.7, 0.3525, 0], + "79": [0, 0.7, 0.08078, 0], + "80": [0, 0.7, 0.08078, 0], + "81": [0, 0.7, 0.03305, 0], + "82": [0, 0.7, 0.06259, 0], + "83": [0, 0.7, 0.19189, 0], + "84": [0, 0.7, 0.29087, 0], + "85": [0, 0.7, 0.25815, 0], + "86": [0, 0.7, 0.27523, 0], + "87": [0, 0.7, 0.27523, 0], + "88": [0, 0.7, 0.26006, 0], + "89": [0, 0.7, 0.2939, 0], + "90": [0, 0.7, 0.24037, 0], + }, + "Size1-Regular": { + "40": [0.35001, 0.85, 0, 0], + "41": [0.35001, 0.85, 0, 0], + "47": [0.35001, 0.85, 0, 0], + "91": [0.35001, 0.85, 0, 0], + "92": [0.35001, 0.85, 0, 0], + "93": [0.35001, 0.85, 0, 0], + "123": [0.35001, 0.85, 0, 0], + "125": [0.35001, 0.85, 0, 0], + "710": [0, 0.72222, 0, 0], + "732": [0, 0.72222, 0, 0], + "770": [0, 0.72222, 0, 0], + "771": [0, 0.72222, 0, 0], + "8214": [-0.00099, 0.601, 0, 0], + "8593": [1e-05, 0.6, 0, 0], + "8595": [1e-05, 0.6, 0, 0], + "8657": [1e-05, 0.6, 0, 0], + "8659": [1e-05, 0.6, 0, 0], + "8719": [0.25001, 0.75, 0, 0], + "8720": [0.25001, 0.75, 0, 0], + "8721": [0.25001, 0.75, 0, 0], + "8730": [0.35001, 0.85, 0, 0], + "8739": [-0.00599, 0.606, 0, 0], + "8741": [-0.00599, 0.606, 0, 0], + "8747": [0.30612, 0.805, 0.19445, 0], + "8748": [0.306, 0.805, 0.19445, 0], + "8749": [0.306, 0.805, 0.19445, 0], + "8750": [0.30612, 0.805, 0.19445, 0], + "8896": [0.25001, 0.75, 0, 0], + "8897": [0.25001, 0.75, 0, 0], + "8898": [0.25001, 0.75, 0, 0], + "8899": [0.25001, 0.75, 0, 0], + "8968": [0.35001, 0.85, 0, 0], + "8969": [0.35001, 0.85, 0, 0], + "8970": [0.35001, 0.85, 0, 0], + "8971": [0.35001, 0.85, 0, 0], + "9168": [-0.00099, 0.601, 0, 0], + "10216": [0.35001, 0.85, 0, 0], + "10217": [0.35001, 0.85, 0, 0], + "10752": [0.25001, 0.75, 0, 0], + "10753": [0.25001, 0.75, 0, 0], + "10754": [0.25001, 0.75, 0, 0], + "10756": [0.25001, 0.75, 0, 0], + "10758": [0.25001, 0.75, 0, 0], + }, + "Size2-Regular": { + "40": [0.65002, 1.15, 0, 0], + "41": [0.65002, 1.15, 0, 0], + "47": [0.65002, 1.15, 0, 0], + "91": [0.65002, 1.15, 0, 0], + "92": [0.65002, 1.15, 0, 0], + "93": [0.65002, 1.15, 0, 0], + "123": [0.65002, 1.15, 0, 0], + "125": [0.65002, 1.15, 0, 0], + "710": [0, 0.75, 0, 0], + "732": [0, 0.75, 0, 0], + "770": [0, 0.75, 0, 0], + "771": [0, 0.75, 0, 0], + "8719": [0.55001, 1.05, 0, 0], + "8720": [0.55001, 1.05, 0, 0], + "8721": [0.55001, 1.05, 0, 0], + "8730": [0.65002, 1.15, 0, 0], + "8747": [0.86225, 1.36, 0.44445, 0], + "8748": [0.862, 1.36, 0.44445, 0], + "8749": [0.862, 1.36, 0.44445, 0], + "8750": [0.86225, 1.36, 0.44445, 0], + "8896": [0.55001, 1.05, 0, 0], + "8897": [0.55001, 1.05, 0, 0], + "8898": [0.55001, 1.05, 0, 0], + "8899": [0.55001, 1.05, 0, 0], + "8968": [0.65002, 1.15, 0, 0], + "8969": [0.65002, 1.15, 0, 0], + "8970": [0.65002, 1.15, 0, 0], + "8971": [0.65002, 1.15, 0, 0], + "10216": [0.65002, 1.15, 0, 0], + "10217": [0.65002, 1.15, 0, 0], + "10752": [0.55001, 1.05, 0, 0], + "10753": [0.55001, 1.05, 0, 0], + "10754": [0.55001, 1.05, 0, 0], + "10756": [0.55001, 1.05, 0, 0], + "10758": [0.55001, 1.05, 0, 0], + }, + "Size3-Regular": { + "40": [0.95003, 1.45, 0, 0], + "41": [0.95003, 1.45, 0, 0], + "47": [0.95003, 1.45, 0, 0], + "91": [0.95003, 1.45, 0, 0], + "92": [0.95003, 1.45, 0, 0], + "93": [0.95003, 1.45, 0, 0], + "123": [0.95003, 1.45, 0, 0], + "125": [0.95003, 1.45, 0, 0], + "710": [0, 0.75, 0, 0], + "732": [0, 0.75, 0, 0], + "770": [0, 0.75, 0, 0], + "771": [0, 0.75, 0, 0], + "8730": [0.95003, 1.45, 0, 0], + "8968": [0.95003, 1.45, 0, 0], + "8969": [0.95003, 1.45, 0, 0], + "8970": [0.95003, 1.45, 0, 0], + "8971": [0.95003, 1.45, 0, 0], + "10216": [0.95003, 1.45, 0, 0], + "10217": [0.95003, 1.45, 0, 0], + }, + "Size4-Regular": { + "40": [1.25003, 1.75, 0, 0], + "41": [1.25003, 1.75, 0, 0], + "47": [1.25003, 1.75, 0, 0], + "91": [1.25003, 1.75, 0, 0], + "92": [1.25003, 1.75, 0, 0], + "93": [1.25003, 1.75, 0, 0], + "123": [1.25003, 1.75, 0, 0], + "125": [1.25003, 1.75, 0, 0], + "710": [0, 0.825, 0, 0], + "732": [0, 0.825, 0, 0], + "770": [0, 0.825, 0, 0], + "771": [0, 0.825, 0, 0], + "8730": [1.25003, 1.75, 0, 0], + "8968": [1.25003, 1.75, 0, 0], + "8969": [1.25003, 1.75, 0, 0], + "8970": [1.25003, 1.75, 0, 0], + "8971": [1.25003, 1.75, 0, 0], + "9115": [0.64502, 1.155, 0, 0], + "9116": [1e-05, 0.6, 0, 0], + "9117": [0.64502, 1.155, 0, 0], + "9118": [0.64502, 1.155, 0, 0], + "9119": [1e-05, 0.6, 0, 0], + "9120": [0.64502, 1.155, 0, 0], + "9121": [0.64502, 1.155, 0, 0], + "9122": [-0.00099, 0.601, 0, 0], + "9123": [0.64502, 1.155, 0, 0], + "9124": [0.64502, 1.155, 0, 0], + "9125": [-0.00099, 0.601, 0, 0], + "9126": [0.64502, 1.155, 0, 0], + "9127": [1e-05, 0.9, 0, 0], + "9128": [0.65002, 1.15, 0, 0], + "9129": [0.90001, 0, 0, 0], + "9130": [0, 0.3, 0, 0], + "9131": [1e-05, 0.9, 0, 0], + "9132": [0.65002, 1.15, 0, 0], + "9133": [0.90001, 0, 0, 0], + "9143": [0.88502, 0.915, 0, 0], + "10216": [1.25003, 1.75, 0, 0], + "10217": [1.25003, 1.75, 0, 0], + "57344": [-0.00499, 0.605, 0, 0], + "57345": [-0.00499, 0.605, 0, 0], + "57680": [0, 0.12, 0, 0], + "57681": [0, 0.12, 0, 0], + "57682": [0, 0.12, 0, 0], + "57683": [0, 0.12, 0, 0], + }, + "Typewriter-Regular": { + "33": [0, 0.61111, 0, 0], + "34": [0, 0.61111, 0, 0], + "35": [0, 0.61111, 0, 0], + "36": [0.08333, 0.69444, 0, 0], + "37": [0.08333, 0.69444, 0, 0], + "38": [0, 0.61111, 0, 0], + "39": [0, 0.61111, 0, 0], + "40": [0.08333, 0.69444, 0, 0], + "41": [0.08333, 0.69444, 0, 0], + "42": [0, 0.52083, 0, 0], + "43": [-0.08056, 0.53055, 0, 0], + "44": [0.13889, 0.125, 0, 0], + "45": [-0.08056, 0.53055, 0, 0], + "46": [0, 0.125, 0, 0], + "47": [0.08333, 0.69444, 0, 0], + "48": [0, 0.61111, 0, 0], + "49": [0, 0.61111, 0, 0], + "50": [0, 0.61111, 0, 0], + "51": [0, 0.61111, 0, 0], + "52": [0, 0.61111, 0, 0], + "53": [0, 0.61111, 0, 0], + "54": [0, 0.61111, 0, 0], + "55": [0, 0.61111, 0, 0], + "56": [0, 0.61111, 0, 0], + "57": [0, 0.61111, 0, 0], + "58": [0, 0.43056, 0, 0], + "59": [0.13889, 0.43056, 0, 0], + "60": [-0.05556, 0.55556, 0, 0], + "61": [-0.19549, 0.41562, 0, 0], + "62": [-0.05556, 0.55556, 0, 0], + "63": [0, 0.61111, 0, 0], + "64": [0, 0.61111, 0, 0], + "65": [0, 0.61111, 0, 0], + "66": [0, 0.61111, 0, 0], + "67": [0, 0.61111, 0, 0], + "68": [0, 0.61111, 0, 0], + "69": [0, 0.61111, 0, 0], + "70": [0, 0.61111, 0, 0], + "71": [0, 0.61111, 0, 0], + "72": [0, 0.61111, 0, 0], + "73": [0, 0.61111, 0, 0], + "74": [0, 0.61111, 0, 0], + "75": [0, 0.61111, 0, 0], + "76": [0, 0.61111, 0, 0], + "77": [0, 0.61111, 0, 0], + "78": [0, 0.61111, 0, 0], + "79": [0, 0.61111, 0, 0], + "80": [0, 0.61111, 0, 0], + "81": [0.13889, 0.61111, 0, 0], + "82": [0, 0.61111, 0, 0], + "83": [0, 0.61111, 0, 0], + "84": [0, 0.61111, 0, 0], + "85": [0, 0.61111, 0, 0], + "86": [0, 0.61111, 0, 0], + "87": [0, 0.61111, 0, 0], + "88": [0, 0.61111, 0, 0], + "89": [0, 0.61111, 0, 0], + "90": [0, 0.61111, 0, 0], + "91": [0.08333, 0.69444, 0, 0], + "92": [0.08333, 0.69444, 0, 0], + "93": [0.08333, 0.69444, 0, 0], + "94": [0, 0.61111, 0, 0], + "95": [0.09514, 0, 0, 0], + "96": [0, 0.61111, 0, 0], + "97": [0, 0.43056, 0, 0], + "98": [0, 0.61111, 0, 0], + "99": [0, 0.43056, 0, 0], + "100": [0, 0.61111, 0, 0], + "101": [0, 0.43056, 0, 0], + "102": [0, 0.61111, 0, 0], + "103": [0.22222, 0.43056, 0, 0], + "104": [0, 0.61111, 0, 0], + "105": [0, 0.61111, 0, 0], + "106": [0.22222, 0.61111, 0, 0], + "107": [0, 0.61111, 0, 0], + "108": [0, 0.61111, 0, 0], + "109": [0, 0.43056, 0, 0], + "110": [0, 0.43056, 0, 0], + "111": [0, 0.43056, 0, 0], + "112": [0.22222, 0.43056, 0, 0], + "113": [0.22222, 0.43056, 0, 0], + "114": [0, 0.43056, 0, 0], + "115": [0, 0.43056, 0, 0], + "116": [0, 0.55358, 0, 0], + "117": [0, 0.43056, 0, 0], + "118": [0, 0.43056, 0, 0], + "119": [0, 0.43056, 0, 0], + "120": [0, 0.43056, 0, 0], + "121": [0.22222, 0.43056, 0, 0], + "122": [0, 0.43056, 0, 0], + "123": [0.08333, 0.69444, 0, 0], + "124": [0.08333, 0.69444, 0, 0], + "125": [0.08333, 0.69444, 0, 0], + "126": [0, 0.61111, 0, 0], + "127": [0, 0.61111, 0, 0], + "305": [0, 0.43056, 0, 0], + "567": [0.22222, 0.43056, 0, 0], + "768": [0, 0.61111, 0, 0], + "769": [0, 0.61111, 0, 0], + "770": [0, 0.61111, 0, 0], + "771": [0, 0.61111, 0, 0], + "772": [0, 0.56555, 0, 0], + "774": [0, 0.61111, 0, 0], + "776": [0, 0.61111, 0, 0], + "778": [0, 0.61111, 0, 0], + "780": [0, 0.56597, 0, 0], + "915": [0, 0.61111, 0, 0], + "916": [0, 0.61111, 0, 0], + "920": [0, 0.61111, 0, 0], + "923": [0, 0.61111, 0, 0], + "926": [0, 0.61111, 0, 0], + "928": [0, 0.61111, 0, 0], + "931": [0, 0.61111, 0, 0], + "933": [0, 0.61111, 0, 0], + "934": [0, 0.61111, 0, 0], + "936": [0, 0.61111, 0, 0], + "937": [0, 0.61111, 0, 0], + "2018": [0, 0.61111, 0, 0], + "2019": [0, 0.61111, 0, 0], + "8242": [0, 0.61111, 0, 0], + }, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/functions.js b/fastcdm/tokenize_latex/third_party/katex/src/functions.js new file mode 100644 index 0000000000000000000000000000000000000000..806f6982d0eddb6c6ac010e27d8acdb022b14d25 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/functions.js @@ -0,0 +1,585 @@ +var utils = require("./utils"); +var ParseError = require("./ParseError"); + +/* This file contains a list of functions that we parse, identified by + * the calls to defineFunction. + * + * The first argument to defineFunction is a single name or a list of names. + * All functions named in such a list will share a single implementation. + * + * Each declared function can have associated properties, which + * include the following: + * + * - numArgs: The number of arguments the function takes. + * If this is the only property, it can be passed as a number + * instead of an element of a properties object. + * - argTypes: (optional) An array corresponding to each argument of the + * function, giving the type of argument that should be parsed. Its + * length should be equal to `numArgs + numOptionalArgs`. Valid + * types: + * - "size": A size-like thing, such as "1em" or "5ex" + * - "color": An html color, like "#abc" or "blue" + * - "original": The same type as the environment that the + * function being parsed is in (e.g. used for the + * bodies of functions like \color where the first + * argument is special and the second argument is + * parsed normally) + * Other possible types (probably shouldn't be used) + * - "text": Text-like (e.g. \text) + * - "math": Normal math + * If undefined, this will be treated as an appropriate length + * array of "original" strings + * - greediness: (optional) The greediness of the function to use ungrouped + * arguments. + * + * E.g. if you have an expression + * \sqrt \frac 1 2 + * since \frac has greediness=2 vs \sqrt's greediness=1, \frac + * will use the two arguments '1' and '2' as its two arguments, + * then that whole function will be used as the argument to + * \sqrt. On the other hand, the expressions + * \frac \frac 1 2 3 + * and + * \frac \sqrt 1 2 + * will fail because \frac and \frac have equal greediness + * and \sqrt has a lower greediness than \frac respectively. To + * make these parse, we would have to change them to: + * \frac {\frac 1 2} 3 + * and + * \frac {\sqrt 1} 2 + * + * The default value is `1` + * - allowedInText: (optional) Whether or not the function is allowed inside + * text mode (default false) + * - numOptionalArgs: (optional) The number of optional arguments the function + * should parse. If the optional arguments aren't found, + * `null` will be passed to the handler in their place. + * (default 0) + * + * The last argument is that implementation, the handler for the function(s). + * It is called to handle these functions and their arguments. + * It receives two arguments: + * - context contains information and references provided by the parser + * - args is an array of arguments obtained from TeX input + * The context contains the following properties: + * - funcName: the text (i.e. name) of the function, including \ + * - parser: the parser object + * - lexer: the lexer object + * - positions: the positions in the overall string of the function + * and the arguments. + * The latter three should only be used to produce error messages. + * + * The function should return an object with the following keys: + * - type: The type of element that this is. This is then used in + * buildHTML/buildMathML to determine which function + * should be called to build this node into a DOM node + * Any other data can be added to the object, which will be passed + * in to the function in buildHTML/buildMathML as `group.value`. + */ + +function defineFunction(names, props, handler) { + if (typeof names === "string") { + names = [names]; + } + if (typeof props === "number") { + props = { numArgs: props }; + } + // Set default values of functions + var data = { + numArgs: props.numArgs, + argTypes: props.argTypes, + greediness: (props.greediness === undefined) ? 1 : props.greediness, + allowedInText: !!props.allowedInText, + numOptionalArgs: props.numOptionalArgs || 0, + handler: handler, + }; + for (var i = 0; i < names.length; ++i) { + module.exports[names[i]] = data; + } +} + +// A normal square root +defineFunction("\\sqrt", { + numArgs: 1, + numOptionalArgs: 1, +}, function(context, args) { + var index = args[0]; + var body = args[1]; + return { + type: "sqrt", + body: body, + index: index, + }; +}); + +// Some non-mathy text +defineFunction(["\\text", "\\mbox", "\\hbox", "\\vbox"], { + numArgs: 1, + argTypes: ["text"], + greediness: 2, +}, function(context, args) { + var body = args[0]; + // Since the corresponding buildHTML/buildMathML function expects a + // list of elements, we normalize for different kinds of arguments + // TODO(emily): maybe this should be done somewhere else + var inner; + if (body.type === "ordgroup") { + inner = body.value; + } else { + inner = [body]; + } + + return { + type: "text", + body: inner, + }; +}); + +// A two-argument custom color +defineFunction("\\color", { + numArgs: 2, + allowedInText: true, + greediness: 3, + argTypes: ["color", "original"], +}, function(context, args) { + var color = args[0]; + var body = args[1]; + // Normalize the different kinds of bodies (see \text above) + var inner; + if (body.type === "ordgroup") { + inner = body.value; + } else { + inner = [body]; + } + + return { + type: "color", + color: color.value, + value: inner, + }; +}); + +// An overline +defineFunction("\\overline", { + numArgs: 1, +}, function(context, args) { + var body = args[0]; + return { + type: "overline", + body: body, + }; +}); + +// An underline +defineFunction("\\underline", { + numArgs: 1, +}, function(context, args) { + var body = args[0]; + return { + type: "underline", + body: body, + }; +}); + +// A box of the width and height +defineFunction("\\rule", { + numArgs: 2, + numOptionalArgs: 1, + argTypes: ["size", "size", "size"], +}, function(context, args) { + var shift = args[0]; + var width = args[1]; + var height = args[2]; + return { + type: "rule", + shift: shift && shift.value, + width: width.value, + height: height.value, + }; +}); + +// A KaTeX logo +defineFunction("\\KaTeX", { + numArgs: 0, +}, function(context) { + return { + type: "katex", + }; +}); + +defineFunction("\\phantom", { + numArgs: 1, +}, function(context, args) { + var body = args[0]; + var inner; + if (body.type === "ordgroup") { + inner = body.value; + } else { + inner = [body]; + } + + return { + type: "phantom", + value: inner, + }; +}); + +// Extra data needed for the delimiter handler down below +var delimiterSizes = { + "\\bigl" : {type: "open", size: 1}, + "\\Bigl" : {type: "open", size: 2}, + "\\biggl": {type: "open", size: 3}, + "\\Biggl": {type: "open", size: 4}, + "\\bigr" : {type: "close", size: 1}, + "\\Bigr" : {type: "close", size: 2}, + "\\biggr": {type: "close", size: 3}, + "\\Biggr": {type: "close", size: 4}, + "\\bigm" : {type: "rel", size: 1}, + "\\Bigm" : {type: "rel", size: 2}, + "\\biggm": {type: "rel", size: 3}, + "\\Biggm": {type: "rel", size: 4}, + "\\big" : {type: "textord", size: 1}, + "\\Big" : {type: "textord", size: 2}, + "\\bigg" : {type: "textord", size: 3}, + "\\Bigg" : {type: "textord", size: 4}, +}; + +var delimiters = [ + "(", ")", "[", "\\lbrack", "]", "\\rbrack", + "\\{", "\\lbrace", "\\}", "\\rbrace", + "\\lfloor", "\\rfloor", "\\lceil", "\\rceil", + "<", ">", "\\langle", "\\rangle", "\\lt", "\\gt", + "\\lvert", "\\rvert", "\\lVert", "\\rVert", + "\\lgroup", "\\rgroup", "\\lmoustache", "\\rmoustache", + "/", "\\backslash", + "|", "\\vert", "\\|", "\\Vert", + "\\uparrow", "\\Uparrow", + "\\downarrow", "\\Downarrow", + "\\updownarrow", "\\Updownarrow", + ".", +]; + +var fontAliases = { + "\\Bbb": "\\mathbb", + "\\bold": "\\mathbf", + "\\frak": "\\mathfrak", +}; + +// Single-argument color functions +defineFunction([ + "\\blue", "\\orange", "\\pink", "\\red", + "\\green", "\\gray", "\\purple", + "\\blueA", "\\blueB", "\\blueC", "\\blueD", "\\blueE", + "\\tealA", "\\tealB", "\\tealC", "\\tealD", "\\tealE", + "\\greenA", "\\greenB", "\\greenC", "\\greenD", "\\greenE", + "\\goldA", "\\goldB", "\\goldC", "\\goldD", "\\goldE", + "\\redA", "\\redB", "\\redC", "\\redD", "\\redE", + "\\maroonA", "\\maroonB", "\\maroonC", "\\maroonD", "\\maroonE", + "\\purpleA", "\\purpleB", "\\purpleC", "\\purpleD", "\\purpleE", + "\\mintA", "\\mintB", "\\mintC", + "\\grayA", "\\grayB", "\\grayC", "\\grayD", "\\grayE", + "\\grayF", "\\grayG", "\\grayH", "\\grayI", + "\\kaBlue", "\\kaGreen", +], { + numArgs: 1, + allowedInText: true, + greediness: 3, +}, function(context, args) { + var body = args[0]; + var atoms; + if (body.type === "ordgroup") { + atoms = body.value; + } else { + atoms = [body]; + } + + return { + type: "color", + color: "katex-" + context.funcName.slice(1), + value: atoms, + }; +}); + +// There are 2 flags for operators; whether they produce limits in +// displaystyle, and whether they are symbols and should grow in +// displaystyle. These four groups cover the four possible choices. + +// No limits, not symbols +defineFunction([ + "\\arcsin", "\\arccos", "\\arctan", "\\arg", "\\cos", "\\cosh", + "\\cot", "\\coth", "\\csc", "\\deg", "\\dim", "\\exp", "\\hom", + "\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", "\\sinh", + "\\tan", "\\tanh", +], { + numArgs: 0, +}, function(context) { + return { + type: "op", + limits: false, + symbol: false, + body: context.funcName, + }; +}); + +// Limits, not symbols +defineFunction([ + "\\det", "\\gcd", "\\inf", "\\lim", "\\liminf", "\\limsup", "\\max", + "\\min", "\\Pr", "\\sup", +], { + numArgs: 0, +}, function(context) { + return { + type: "op", + limits: true, + symbol: false, + body: context.funcName, + }; +}); + +// No limits, symbols +defineFunction([ + "\\int", "\\iint", "\\iiint", "\\oint", +], { + numArgs: 0, +}, function(context) { + return { + type: "op", + limits: false, + symbol: true, + body: context.funcName, + }; +}); + +// Limits, symbols +defineFunction([ + "\\coprod", "\\bigvee", "\\bigwedge", "\\biguplus", "\\bigcap", + "\\bigcup", "\\intop", "\\prod", "\\sum", "\\bigotimes", + "\\bigoplus", "\\bigodot", "\\bigsqcup", "\\smallint", +], { + numArgs: 0, +}, function(context) { + return { + type: "op", + limits: true, + symbol: true, + body: context.funcName, + }; +}); + +// Fractions +defineFunction([ + "\\dfrac", "\\frac", "\\tfrac", + "\\dbinom", "\\binom", "\\tbinom", +], { + numArgs: 2, + greediness: 2, +}, function(context, args) { + var numer = args[0]; + var denom = args[1]; + var hasBarLine; + var leftDelim = null; + var rightDelim = null; + var size = "auto"; + + switch (context.funcName) { + case "\\dfrac": + case "\\frac": + case "\\tfrac": + hasBarLine = true; + break; + case "\\dbinom": + case "\\binom": + case "\\tbinom": + hasBarLine = false; + leftDelim = "("; + rightDelim = ")"; + break; + default: + throw new Error("Unrecognized genfrac command"); + } + + switch (context.funcName) { + case "\\dfrac": + case "\\dbinom": + size = "display"; + break; + case "\\tfrac": + case "\\tbinom": + size = "text"; + break; + } + + return { + type: "genfrac", + numer: numer, + denom: denom, + hasBarLine: hasBarLine, + leftDelim: leftDelim, + rightDelim: rightDelim, + size: size, + }; +}); + +// Left and right overlap functions +defineFunction(["\\llap", "\\rlap"], { + numArgs: 1, + allowedInText: true, +}, function(context, args) { + var body = args[0]; + return { + type: context.funcName.slice(1), + body: body, + }; +}); + +// Delimiter functions +defineFunction([ + "\\bigl", "\\Bigl", "\\biggl", "\\Biggl", + "\\bigr", "\\Bigr", "\\biggr", "\\Biggr", + "\\bigm", "\\Bigm", "\\biggm", "\\Biggm", + "\\big", "\\Big", "\\bigg", "\\Bigg", + "\\left", "\\right" +], { + numArgs: 1, +}, function(context, args) { + var delim = args[0]; + if (!utils.contains(delimiters, delim.value)) { + throw new ParseError( + "Invalid delimiter: '" + delim.value + "' after '" + + context.funcName + "'", + context.lexer, context.positions[1]); + } + + // \left and \right are caught somewhere in Parser.js, which is + // why this data doesn't match what is in buildHTML. + if (context.funcName === "\\left" || context.funcName === "\\right") { + return { + type: "leftright", + value: delim.value, + funcName: context.funcName + }; + } else { + return { + type: "delimsizing", + size: delimiterSizes[context.funcName].size, + delimType: delimiterSizes[context.funcName].type, + value: delim.value, + funcName: context.funcName + }; + } +}); + +// Sizing functions (handled in Parser.js explicitly, hence no handler) +defineFunction([ + "\\tiny", "\\scriptsize", "\\footnotesize", "\\small", + "\\normalsize", "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge", "\\textrm", "\\rm", "\\cal", "\\bf", "\\siptstyle", "\\boldmath", "\\it" +], 0, null); + +// Style changing functions (handled in Parser.js explicitly, hence no +// handler) +defineFunction([ + "\\displaystyle", "\\textstyle", "\\scriptstyle", + "\\scriptscriptstyle", +], 0, null); + +defineFunction([ + // styles + "\\mathrm", "\\mathit", "\\mathbf","\\mathop","\\stackrel", + + // families + "\\mathbb", "\\mathcal", "\\mathfrak", "\\mathscr", "\\mathsf", + "\\mathtt", + + "\\label", "\\comment", "\\hspace", "\\vspace", "\\atop", "\\fbox", "\\tag", "\\makebox", + "\\raisebox", "\\framebox", "\\circle", "\\line", "\\put", "\\vphantom", "\\textup", "\\noalign", + + // aliases + "\\Bbb", "\\bold", "\\frak", +], { + numArgs: 1, + greediness: 2, +}, function(context, args) { + var body = args[0]; + var func = context.funcName; + if (func in fontAliases) { + func = fontAliases[func]; + } + return { + type: "font", + font: func.slice(1), + body: body, + }; +}); + +// Accents +defineFunction([ + "\\acute", "\\grave", "\\ddot", "\\tilde", "\\bar", "\\breve", + "\\check", "\\hat", "\\vec", "\\dot", + // We don't support expanding accents yet + // "\\widetilde", "\\widehat" +], { + numArgs: 1, +}, function(context, args) { + var base = args[0]; + return { + type: "accent", + accent: context.funcName, + base: base, + }; +}); + +// Infix generalized fractions +defineFunction(["\\over", "\\choose"], { + numArgs: 0, +}, function(context) { + var replaceWith; + switch (context.funcName) { + case "\\over": + replaceWith = "\\frac"; + break; + case "\\choose": + replaceWith = "\\binom"; + break; + default: + throw new Error("Unrecognized infix genfrac command"); + } + return { + type: "infix", + replaceWith: replaceWith, + }; +}); + +// Row breaks for aligned data +defineFunction(["\\\\", "\\cr"], { + numArgs: 0, + numOptionalArgs: 1, + argTypes: ["size"], +}, function(context, args) { + var size = args[0]; + return { + type: "cr", + size: size, + }; +}); + +// Environment delimiters +defineFunction(["\\begin", "\\end"], { + numArgs: 1, + argTypes: ["text"], +}, function(context, args) { + var nameGroup = args[0]; + if (nameGroup.type !== "ordgroup") { + throw new ParseError( + "Invalid environment name", + context.lexer, context.positions[1]); + } + var name = ""; + for (var i = 0; i < nameGroup.value.length; ++i) { + name += nameGroup.value[i].value; + } + return { + type: "environment", + name: name, + namepos: context.positions[1], + }; +}); diff --git a/fastcdm/tokenize_latex/third_party/katex/src/mathMLTree.js b/fastcdm/tokenize_latex/third_party/katex/src/mathMLTree.js new file mode 100644 index 0000000000000000000000000000000000000000..86e63562c6d732980326b852fbfd7cabcc891997 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/mathMLTree.js @@ -0,0 +1,102 @@ +/** + * These objects store data about MathML nodes. This is the MathML equivalent + * of the types in domTree.js. Since MathML handles its own rendering, and + * since we're mainly using MathML to improve accessibility, we don't manage + * any of the styling state that the plain DOM nodes do. + * + * The `toNode` and `toMarkup` functions work simlarly to how they do in + * domTree.js, creating namespaced DOM nodes and HTML text markup respectively. + */ + +var utils = require("./utils"); + +/** + * This node represents a general purpose MathML node of any type. The + * constructor requires the type of node to create (for example, `"mo"` or + * `"mspace"`, corresponding to `` and `` tags). + */ +function MathNode(type, children) { + this.type = type; + this.attributes = {}; + this.children = children || []; +} + +/** + * Sets an attribute on a MathML node. MathML depends on attributes to convey a + * semantic content, so this is used heavily. + */ +MathNode.prototype.setAttribute = function(name, value) { + this.attributes[name] = value; +}; + +/** + * Converts the math node into a MathML-namespaced DOM element. + */ +MathNode.prototype.toNode = function() { + var node = document.createElementNS( + "http://www.w3.org/1998/Math/MathML", this.type); + + for (var attr in this.attributes) { + if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) { + node.setAttribute(attr, this.attributes[attr]); + } + } + + for (var i = 0; i < this.children.length; i++) { + node.appendChild(this.children[i].toNode()); + } + + return node; +}; + +/** + * Converts the math node into an HTML markup string. + */ +MathNode.prototype.toMarkup = function() { + var markup = "<" + this.type; + + // Add the attributes + for (var attr in this.attributes) { + if (Object.prototype.hasOwnProperty.call(this.attributes, attr)) { + markup += " " + attr + "=\""; + markup += utils.escape(this.attributes[attr]); + markup += "\""; + } + } + + markup += ">"; + + for (var i = 0; i < this.children.length; i++) { + markup += this.children[i].toMarkup(); + } + + markup += ""; + + return markup; +}; + +/** + * This node represents a piece of text. + */ +function TextNode(text) { + this.text = text; +} + +/** + * Converts the text node into a DOM text node. + */ +TextNode.prototype.toNode = function() { + return document.createTextNode(this.text); +}; + +/** + * Converts the text node into HTML markup (which is just the text itself). + */ +TextNode.prototype.toMarkup = function() { + return utils.escape(this.text); +}; + +module.exports = { + MathNode: MathNode, + TextNode: TextNode, +}; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/parseData.js b/fastcdm/tokenize_latex/third_party/katex/src/parseData.js new file mode 100644 index 0000000000000000000000000000000000000000..be8dd67f442319f115946e8a0029cf3341c43e42 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/parseData.js @@ -0,0 +1,13 @@ +/** + * The resulting parse tree nodes of the parse tree. + */ +function ParseNode(type, value, mode) { + this.type = type; + this.value = value; + this.mode = mode; +} + +module.exports = { + ParseNode: ParseNode, +}; + diff --git a/fastcdm/tokenize_latex/third_party/katex/src/parseTree.js b/fastcdm/tokenize_latex/third_party/katex/src/parseTree.js new file mode 100644 index 0000000000000000000000000000000000000000..3adba824890d869e796d24524d1896294a12e697 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/parseTree.js @@ -0,0 +1,17 @@ +/** + * Provides a single function for parsing an expression using a Parser + * TODO(emily): Remove this + */ + +var Parser = require("./Parser"); + +/** + * Parses an expression using a Parser, then returns the parsed result. + */ +var parseTree = function(toParse, settings) { + var parser = new Parser(toParse, settings); + + return parser.parse(); +}; + +module.exports = parseTree; diff --git a/fastcdm/tokenize_latex/third_party/katex/src/symbols.js b/fastcdm/tokenize_latex/third_party/katex/src/symbols.js new file mode 100644 index 0000000000000000000000000000000000000000..9a2b130d16b18c5d14bb9f054f5ee16e91e5544b --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/symbols.js @@ -0,0 +1,687 @@ +/** + * This file holds a list of all no-argument functions and single-character + * symbols (like 'a' or ';'). + * + * For each of the symbols, there are three properties they can have: + * - font (required): the font to be used for this symbol. Either "main" (the + normal font), or "ams" (the ams fonts). + * - group (required): the ParseNode group type the symbol should have (i.e. + "textord", "mathord", etc). + See https://github.com/Khan/KaTeX/wiki/Examining-TeX#group-types + * - replace: the character that this symbol or function should be + * replaced with (i.e. "\phi" has a replace value of "\u03d5", the phi + * character in the main font). + * + * The outermost map in the table indicates what mode the symbols should be + * accepted in (e.g. "math" or "text"). + */ + +module.exports = { + math: {}, + text: {}, +}; + +function defineSymbol(mode, font, group, replace, name) { + module.exports[mode][name] = { + font: font, + group: group, + replace: replace, + }; +} + +// Some abbreviations for commonly used strings. +// This helps minify the code, and also spotting typos using jshint. + +// modes: +var math = "math"; +var text = "text"; + +// fonts: +var main = "main"; +var ams = "ams"; + +// groups: +var accent = "accent"; +var bin = "bin"; +var close = "close"; +var inner = "inner"; +var mathord = "mathord"; +var op = "op"; +var open = "open"; +var punct = "punct"; +var rel = "rel"; +var spacing = "spacing"; +var textord = "textord"; + +// Now comes the symbol table + +// Relation Symbols +defineSymbol(math, main, rel, "\u2261", "\\equiv"); +defineSymbol(math, main, rel, "\u227a", "\\prec"); +defineSymbol(math, main, rel, "\u227b", "\\succ"); +defineSymbol(math, main, rel, "\u223c", "\\sim"); +defineSymbol(math, main, rel, "\u22a5", "\\perp"); +defineSymbol(math, main, rel, "\u2aaf", "\\preceq"); +defineSymbol(math, main, rel, "\u2ab0", "\\succeq"); +defineSymbol(math, main, rel, "\u2243", "\\simeq"); +defineSymbol(math, main, rel, "\u2223", "\\mid"); +defineSymbol(math, main, rel, "\u226a", "\\ll"); +defineSymbol(math, main, rel, "\u226b", "\\gg"); +defineSymbol(math, main, rel, "\u224d", "\\asymp"); +defineSymbol(math, main, rel, "\u2225", "\\parallel"); +defineSymbol(math, main, rel, "\u22c8", "\\bowtie"); +defineSymbol(math, main, rel, "\u2323", "\\smile"); +defineSymbol(math, main, rel, "\u2291", "\\sqsubseteq"); +defineSymbol(math, main, rel, "\u2292", "\\sqsupseteq"); +defineSymbol(math, main, rel, "\u2250", "\\doteq"); +defineSymbol(math, main, rel, "\u2322", "\\frown"); +defineSymbol(math, main, rel, "\u220b", "\\ni"); +defineSymbol(math, main, rel, "\u221d", "\\propto"); +defineSymbol(math, main, rel, "\u22a2", "\\vdash"); +defineSymbol(math, main, rel, "\u22a3", "\\dashv"); +defineSymbol(math, main, rel, "\u220b", "\\owns"); + +defineSymbol(math, main, rel, "\u220b", "\\widehat"); +defineSymbol(math, main, rel, "\u220b", "\\widetilde"); +defineSymbol(math, main, rel, "\u220b", "\\sp"); +defineSymbol(math, main, rel, "\u220b", "\\quad"); +// defineSymbol(math, main, rel, "\u220b", "\\cr"); +defineSymbol(math, main, rel, "\u220b", "\\\\sim"); +defineSymbol(math, main, rel, "\u220b", "\\nonumber"); +defineSymbol(math, main, rel, "\u220b", "\\dots"); +defineSymbol(math, main, rel, "\u220b", "\\cases"); +defineSymbol(math, main, rel, "\u220b", "\\mit"); +defineSymbol(math, main, rel, "\u220b", "\\smallskip"); +defineSymbol(math, main, rel, "\u220b", "\\slash"); +defineSymbol(math, main, rel, "\u220b", "\\d"); +defineSymbol(math, main, rel, "\u220b", "\\c"); +defineSymbol(math, main, rel, "\u220b", "\\b"); +defineSymbol(math, main, rel, "\u220b", "\\M"); +defineSymbol(math, main, rel, "\u220b", "\\S"); +defineSymbol(math, main, rel, "\u220b", "\\("); +defineSymbol(math, main, rel, "\u220b", "\\)"); +// defineSymbol(math, main, rel, "\u220b", "\\Comp"); +defineSymbol(math, main, rel, "\u220b", "\\thinspace"); +defineSymbol(math, main, rel, "\u220b", "\\hskip"); +defineSymbol(math, main, rel, "\u220b", "\\tt"); +defineSymbol(math, main, rel, "\u220b", "\\not"); +defineSymbol(math, main, rel, "\u220b", "\\boldmathr"); +defineSymbol(math, main, rel, "\u220b", "\\overleftarrow"); +defineSymbol(math, main, rel, "\u220b", "\\overrightarrow"); +defineSymbol(math, main, rel, "\u220b", "\\intf"); +defineSymbol(math, main, rel, "\u220b", "\\sf"); +defineSymbol(math, main, rel, "\u220b", "\\textbf"); +defineSymbol(math, main, rel, "\u220b", "\\L"); +defineSymbol(math, main, rel, "\u220b", "\\pii"); +defineSymbol(math, main, rel, "\u220b", "\\unitlength"); +defineSymbol(math, main, rel, "\u220b", "\\arowtor5linv"); +defineSymbol(math, main, rel, "\u220b", "\\hline"); +defineSymbol(math, main, rel, "\u220b", "\\mathbin"); +defineSymbol(math, main, rel, "\u220b", "\\nc"); +defineSymbol(math, main, rel, "\u220b", "\\underbrace"); +defineSymbol(math, main, rel, "\u220b", "\\o"); +defineSymbol(math, main, rel, "\u220b", "\\a"); +defineSymbol(math, main, rel, "\u220b", "\\b"); +defineSymbol(math, main, rel, "\u220b", "\\c"); +defineSymbol(math, main, rel, "\u220b", "\\d"); +defineSymbol(math, main, rel, "\u220b", "\\e"); +defineSymbol(math, main, rel, "\u220b", "\\f"); +defineSymbol(math, main, rel, "\u220b", "\\g"); +defineSymbol(math, main, rel, "\u220b", "\\h"); +defineSymbol(math, main, rel, "\u220b", "\\i"); +defineSymbol(math, main, rel, "\u220b", "\\j"); +defineSymbol(math, main, rel, "\u220b", "\\k"); +defineSymbol(math, main, rel, "\u220b", "\\l"); +defineSymbol(math, main, rel, "\u220b", "\\m"); +defineSymbol(math, main, rel, "\u220b", "\\n"); +defineSymbol(math, main, rel, "\u220b", "\\o"); +// defineSymbol(math, main, rel, "\u220b", "\\wedgee"); +defineSymbol(math, main, rel, "\u220b", "\\sb"); +defineSymbol(math, main, rel, "\u220b", "\\do"); +defineSymbol(math, main, rel, "\u220b", "\\em"); +// defineSymbol(math, main, rel, "\u220b", "\\diamonda"); + + +defineSymbol(math, main, rel, "\u220b", "\\dint"); +defineSymbol(math, main, rel, "\u220b", "\\intd"); + + +// Punctuation +defineSymbol(math, main, punct, "\u002e", "\\ldotp"); +defineSymbol(math, main, punct, "\u22c5", "\\cdotp"); + +// Misc Symbols +defineSymbol(math, main, textord, "\u0023", "\\#"); +defineSymbol(math, main, textord, "\u0026", "\\&"); +defineSymbol(math, main, textord, "\u2135", "\\aleph"); +defineSymbol(math, main, textord, "\u2200", "\\forall"); +defineSymbol(math, main, textord, "\u210f", "\\hbar"); +defineSymbol(math, main, textord, "\u2203", "\\eixsts"); +defineSymbol(math, main, textord, "\u2207", "\\nabla"); +defineSymbol(math, main, textord, "\u266d", "\\flat"); +defineSymbol(math, main, textord, "\u2113", "\\ell"); +defineSymbol(math, main, textord, "\u266e", "\\natural"); +defineSymbol(math, main, textord, "\u2663", "\\clubsuit"); +defineSymbol(math, main, textord, "\u2118", "\\wp"); +defineSymbol(math, main, textord, "\u266f", "\\sharp"); +defineSymbol(math, main, textord, "\u2662", "\\diamondsuit"); +defineSymbol(math, main, textord, "\u211c", "\\Re"); +defineSymbol(math, main, textord, "\u2661", "\\heartsuit"); +defineSymbol(math, main, textord, "\u2111", "\\Im"); +defineSymbol(math, main, textord, "\u2660", "\\spadesuit"); + +// Math and Text +defineSymbol(math, main, textord, "\u2020", "\\dag"); +defineSymbol(math, main, textord, "\u2021", "\\ddag"); + +// Large Delimiters +defineSymbol(math, main, close, "\u23b1", "\\rmoustache"); +defineSymbol(math, main, open, "\u23b0", "\\lmoustache"); +defineSymbol(math, main, close, "\u27ef", "\\rgroup"); +defineSymbol(math, main, open, "\u27ee", "\\lgroup"); + +// Binary Operators +defineSymbol(math, main, bin, "\u2213", "\\mp"); +defineSymbol(math, main, bin, "\u2296", "\\ominus"); +defineSymbol(math, main, bin, "\u228e", "\\uplus"); +defineSymbol(math, main, bin, "\u2293", "\\sqcap"); +defineSymbol(math, main, bin, "\u2217", "\\ast"); +defineSymbol(math, main, bin, "\u2294", "\\sqcup"); +defineSymbol(math, main, bin, "\u25ef", "\\bigcirc"); +defineSymbol(math, main, bin, "\u2219", "\\bullet"); +defineSymbol(math, main, bin, "\u2021", "\\ddagger"); +defineSymbol(math, main, bin, "\u2240", "\\wr"); +defineSymbol(math, main, bin, "\u2a3f", "\\amalg"); + +// Arrow Symbols +defineSymbol(math, main, rel, "\u27f5", "\\longleftarrow"); +defineSymbol(math, main, rel, "\u21d0", "\\Leftarrow"); +defineSymbol(math, main, rel, "\u27f8", "\\Longleftarrow"); +defineSymbol(math, main, rel, "\u27f6", "\\longrightarrow"); +defineSymbol(math, main, rel, "\u21d2", "\\Rightarrow"); +defineSymbol(math, main, rel, "\u27f9", "\\Longrightarrow"); +defineSymbol(math, main, rel, "\u2194", "\\leftrightarrow"); +defineSymbol(math, main, rel, "\u27f7", "\\longleftrightarrow"); +defineSymbol(math, main, rel, "\u21d4", "\\Leftrightarrow"); +defineSymbol(math, main, rel, "\u27fa", "\\Longleftrightarrow"); +defineSymbol(math, main, rel, "\u21a6", "\\mapsto"); +defineSymbol(math, main, rel, "\u27fc", "\\longmapsto"); +defineSymbol(math, main, rel, "\u2197", "\\nearrow"); +defineSymbol(math, main, rel, "\u21a9", "\\hookleftarrow"); +defineSymbol(math, main, rel, "\u21aa", "\\hookrightarrow"); +defineSymbol(math, main, rel, "\u2198", "\\searrow"); +defineSymbol(math, main, rel, "\u21bc", "\\leftharpoonup"); +defineSymbol(math, main, rel, "\u21c0", "\\rightharpoonup"); +defineSymbol(math, main, rel, "\u2199", "\\swarrow"); +defineSymbol(math, main, rel, "\u21bd", "\\leftharpoondown"); +defineSymbol(math, main, rel, "\u21c1", "\\rightharpoondown"); +defineSymbol(math, main, rel, "\u2196", "\\nwarrow"); +defineSymbol(math, main, rel, "\u21cc", "\\rightleftharpoons"); + +// AMS Negated Binary Relations +defineSymbol(math, ams, rel, "\u226e", "\\nless"); +defineSymbol(math, ams, rel, "\ue010", "\\nleqslant"); +defineSymbol(math, ams, rel, "\ue011", "\\nleqq"); +defineSymbol(math, ams, rel, "\u2a87", "\\lneq"); +defineSymbol(math, ams, rel, "\u2268", "\\lneqq"); +defineSymbol(math, ams, rel, "\ue00c", "\\lvertneqq"); +defineSymbol(math, ams, rel, "\u22e6", "\\lnsim"); +defineSymbol(math, ams, rel, "\u2a89", "\\lnapprox"); +defineSymbol(math, ams, rel, "\u2280", "\\nprec"); +defineSymbol(math, ams, rel, "\u22e0", "\\npreceq"); +defineSymbol(math, ams, rel, "\u22e8", "\\precnsim"); +defineSymbol(math, ams, rel, "\u2ab9", "\\precnapprox"); +defineSymbol(math, ams, rel, "\u2241", "\\nsim"); +defineSymbol(math, ams, rel, "\ue006", "\\nshortmid"); +defineSymbol(math, ams, rel, "\u2224", "\\nmid"); +defineSymbol(math, ams, rel, "\u22ac", "\\nvdash"); +defineSymbol(math, ams, rel, "\u22ad", "\\nvDash"); +defineSymbol(math, ams, rel, "\u22ea", "\\ntriangleleft"); +defineSymbol(math, ams, rel, "\u22ec", "\\ntrianglelefteq"); +defineSymbol(math, ams, rel, "\u228a", "\\subsetneq"); +defineSymbol(math, ams, rel, "\ue01a", "\\varsubsetneq"); +defineSymbol(math, ams, rel, "\u2acb", "\\subsetneqq"); +defineSymbol(math, ams, rel, "\ue017", "\\varsubsetneqq"); +defineSymbol(math, ams, rel, "\u226f", "\\ngtr"); +defineSymbol(math, ams, rel, "\ue00f", "\\ngeqslant"); +defineSymbol(math, ams, rel, "\ue00e", "\\ngeqq"); +defineSymbol(math, ams, rel, "\u2a88", "\\gneq"); +defineSymbol(math, ams, rel, "\u2269", "\\gneqq"); +defineSymbol(math, ams, rel, "\ue00d", "\\gvertneqq"); +defineSymbol(math, ams, rel, "\u22e7", "\\gnsim"); +defineSymbol(math, ams, rel, "\u2a8a", "\\gnapprox"); +defineSymbol(math, ams, rel, "\u2281", "\\nsucc"); +defineSymbol(math, ams, rel, "\u22e1", "\\nsucceq"); +defineSymbol(math, ams, rel, "\u22e9", "\\succnsim"); +defineSymbol(math, ams, rel, "\u2aba", "\\succnapprox"); +defineSymbol(math, ams, rel, "\u2246", "\\ncong"); +defineSymbol(math, ams, rel, "\ue007", "\\nshortparallel"); +defineSymbol(math, ams, rel, "\u2226", "\\nparallel"); +defineSymbol(math, ams, rel, "\u22af", "\\nVDash"); +defineSymbol(math, ams, rel, "\u22eb", "\\ntriangleright"); +defineSymbol(math, ams, rel, "\u22ed", "\\ntrianglerighteq"); +defineSymbol(math, ams, rel, "\ue018", "\\nsupseteqq"); +defineSymbol(math, ams, rel, "\u228b", "\\supsetneq"); +defineSymbol(math, ams, rel, "\ue01b", "\\varsupsetneq"); +defineSymbol(math, ams, rel, "\u2acc", "\\supsetneqq"); +defineSymbol(math, ams, rel, "\ue019", "\\varsupsetneqq"); +defineSymbol(math, ams, rel, "\u22ae", "\\nVdash"); +defineSymbol(math, ams, rel, "\u2ab5", "\\precneqq"); +defineSymbol(math, ams, rel, "\u2ab6", "\\succneqq"); +defineSymbol(math, ams, rel, "\ue016", "\\nsubseteqq"); +defineSymbol(math, ams, bin, "\u22b4", "\\unlhd"); +defineSymbol(math, ams, bin, "\u22b5", "\\unrhd"); + +// AMS Negated Arrows +defineSymbol(math, ams, rel, "\u219a", "\\nleftarrow"); +defineSymbol(math, ams, rel, "\u219b", "\\nrightarrow"); +defineSymbol(math, ams, rel, "\u21cd", "\\nLeftarrow"); +defineSymbol(math, ams, rel, "\u21cf", "\\nRightarrow"); +defineSymbol(math, ams, rel, "\u21ae", "\\nleftrightarrow"); +defineSymbol(math, ams, rel, "\u21ce", "\\nLeftrightarrow"); + +// AMS Misc +defineSymbol(math, ams, rel, "\u25b3", "\\vartriangle"); +defineSymbol(math, ams, textord, "\u210f", "\\hslash"); +defineSymbol(math, ams, textord, "\u25bd", "\\triangledown"); +defineSymbol(math, ams, textord, "\u25ca", "\\lozenge"); +defineSymbol(math, ams, textord, "\u24c8", "\\circledS"); +defineSymbol(math, ams, textord, "\u00ae", "\\circledR"); +defineSymbol(math, ams, textord, "\u2221", "\\measuredangle"); +defineSymbol(math, ams, textord, "\u2204", "\\nexists"); +defineSymbol(math, ams, textord, "\u2127", "\\mho"); +defineSymbol(math, ams, textord, "\u2132", "\\Finv"); +defineSymbol(math, ams, textord, "\u2141", "\\Game"); +defineSymbol(math, ams, textord, "\u006b", "\\Bbbk"); +defineSymbol(math, ams, textord, "\u2035", "\\backprime"); +defineSymbol(math, ams, textord, "\u25b2", "\\blacktriangle"); +defineSymbol(math, ams, textord, "\u25bc", "\\blacktriangledown"); +defineSymbol(math, ams, textord, "\u25a0", "\\blacksquare"); +defineSymbol(math, ams, textord, "\u29eb", "\\blacklozenge"); +defineSymbol(math, ams, textord, "\u2605", "\\bigstar"); +defineSymbol(math, ams, textord, "\u2222", "\\sphericalangle"); +defineSymbol(math, ams, textord, "\u2201", "\\complement"); +defineSymbol(math, ams, textord, "\u00f0", "\\eth"); +defineSymbol(math, ams, textord, "\u2571", "\\diagup"); +defineSymbol(math, ams, textord, "\u2572", "\\diagdown"); +defineSymbol(math, ams, textord, "\u25a1", "\\square"); +defineSymbol(math, ams, textord, "\u25a1", "\\Box"); +defineSymbol(math, ams, textord, "\u25ca", "\\Diamond"); +defineSymbol(math, ams, textord, "\u00a5", "\\yen"); +defineSymbol(math, ams, textord, "\u2713", "\\checkmark"); + +// AMS Hebrew +defineSymbol(math, ams, textord, "\u2136", "\\beth"); +defineSymbol(math, ams, textord, "\u2138", "\\daleth"); +defineSymbol(math, ams, textord, "\u2137", "\\gimel"); + +// AMS Greek +defineSymbol(math, ams, textord, "\u03dd", "\\digamma"); +defineSymbol(math, ams, textord, "\u03f0", "\\varkappa"); + +// AMS Delimiters +defineSymbol(math, ams, open, "\u250c", "\\ulcorner"); +defineSymbol(math, ams, close, "\u2510", "\\urcorner"); +defineSymbol(math, ams, open, "\u2514", "\\llcorner"); +defineSymbol(math, ams, close, "\u2518", "\\lrcorner"); + +// AMS Binary Relations +defineSymbol(math, ams, rel, "\u2266", "\\leqq"); +defineSymbol(math, ams, rel, "\u2a7d", "\\leqslant"); +defineSymbol(math, ams, rel, "\u2a95", "\\eqslantless"); +defineSymbol(math, ams, rel, "\u2272", "\\lesssim"); +defineSymbol(math, ams, rel, "\u2a85", "\\lessapprox"); +defineSymbol(math, ams, rel, "\u224a", "\\approxeq"); +defineSymbol(math, ams, bin, "\u22d6", "\\lessdot"); +defineSymbol(math, ams, rel, "\u22d8", "\\lll"); +defineSymbol(math, ams, rel, "\u2276", "\\lessgtr"); +defineSymbol(math, ams, rel, "\u22da", "\\lesseqgtr"); +defineSymbol(math, ams, rel, "\u2a8b", "\\lesseqqgtr"); +defineSymbol(math, ams, rel, "\u2251", "\\doteqdot"); +defineSymbol(math, ams, rel, "\u2253", "\\risingdotseq"); +defineSymbol(math, ams, rel, "\u2252", "\\fallingdotseq"); +defineSymbol(math, ams, rel, "\u223d", "\\backsim"); +defineSymbol(math, ams, rel, "\u22cd", "\\backsimeq"); +defineSymbol(math, ams, rel, "\u2ac5", "\\subseteqq"); +defineSymbol(math, ams, rel, "\u22d0", "\\Subset"); +defineSymbol(math, ams, rel, "\u228f", "\\sqsubset"); +defineSymbol(math, ams, rel, "\u227c", "\\preccurlyeq"); +defineSymbol(math, ams, rel, "\u22de", "\\curlyeqprec"); +defineSymbol(math, ams, rel, "\u227e", "\\precsim"); +defineSymbol(math, ams, rel, "\u2ab7", "\\precapprox"); +defineSymbol(math, ams, rel, "\u22b2", "\\vartriangleleft"); +defineSymbol(math, ams, rel, "\u22b4", "\\trianglelefteq"); +defineSymbol(math, ams, rel, "\u22a8", "\\vDash"); +defineSymbol(math, ams, rel, "\u22aa", "\\Vvdash"); +defineSymbol(math, ams, rel, "\u2323", "\\smallsmile"); +defineSymbol(math, ams, rel, "\u2322", "\\smallfrown"); +defineSymbol(math, ams, rel, "\u224f", "\\bumpeq"); +defineSymbol(math, ams, rel, "\u224e", "\\Bumpeq"); +defineSymbol(math, ams, rel, "\u2267", "\\geqq"); +defineSymbol(math, ams, rel, "\u2a7e", "\\geqslant"); +defineSymbol(math, ams, rel, "\u2a96", "\\eqslantgtr"); +defineSymbol(math, ams, rel, "\u2273", "\\gtrsim"); +defineSymbol(math, ams, rel, "\u2a86", "\\gtrapprox"); +defineSymbol(math, ams, bin, "\u22d7", "\\gtrdot"); +defineSymbol(math, ams, rel, "\u22d9", "\\ggg"); +defineSymbol(math, ams, rel, "\u2277", "\\gtrless"); +defineSymbol(math, ams, rel, "\u22db", "\\gtreqless"); +defineSymbol(math, ams, rel, "\u2a8c", "\\gtreqqless"); +defineSymbol(math, ams, rel, "\u2256", "\\eqcirc"); +defineSymbol(math, ams, rel, "\u2257", "\\circeq"); +defineSymbol(math, ams, rel, "\u225c", "\\triangleq"); +defineSymbol(math, ams, rel, "\u223c", "\\thicksim"); +defineSymbol(math, ams, rel, "\u2248", "\\thickapprox"); +defineSymbol(math, ams, rel, "\u2ac6", "\\supseteqq"); +defineSymbol(math, ams, rel, "\u22d1", "\\Supset"); +defineSymbol(math, ams, rel, "\u2290", "\\sqsupset"); +defineSymbol(math, ams, rel, "\u227d", "\\succcurlyeq"); +defineSymbol(math, ams, rel, "\u22df", "\\curlyeqsucc"); +defineSymbol(math, ams, rel, "\u227f", "\\succsim"); +defineSymbol(math, ams, rel, "\u2ab8", "\\succapprox"); +defineSymbol(math, ams, rel, "\u22b3", "\\vartriangleright"); +defineSymbol(math, ams, rel, "\u22b5", "\\trianglerighteq"); +defineSymbol(math, ams, rel, "\u22a9", "\\Vdash"); +defineSymbol(math, ams, rel, "\u2223", "\\shortmid"); +defineSymbol(math, ams, rel, "\u2225", "\\shortparallel"); +defineSymbol(math, ams, rel, "\u226c", "\\between"); +defineSymbol(math, ams, rel, "\u22d4", "\\pitchfork"); +defineSymbol(math, ams, rel, "\u221d", "\\varpropto"); +defineSymbol(math, ams, rel, "\u25c0", "\\blacktriangleleft"); +defineSymbol(math, ams, rel, "\u2234", "\\therefore"); +defineSymbol(math, ams, rel, "\u220d", "\\backepsilon"); +defineSymbol(math, ams, rel, "\u25b6", "\\blacktriangleright"); +defineSymbol(math, ams, rel, "\u2235", "\\because"); +defineSymbol(math, ams, rel, "\u22d8", "\\llless"); +defineSymbol(math, ams, rel, "\u22d9", "\\gggtr"); +defineSymbol(math, ams, bin, "\u22b2", "\\lhd"); +defineSymbol(math, ams, bin, "\u22b3", "\\rhd"); +defineSymbol(math, ams, rel, "\u2242", "\\eqsim"); +defineSymbol(math, main, rel, "\u22c8", "\\Join"); +defineSymbol(math, ams, rel, "\u2251", "\\Doteq"); + +// AMS Binary Operators +defineSymbol(math, ams, bin, "\u2214", "\\dotplus"); +defineSymbol(math, ams, bin, "\u2216", "\\smallsetminus"); +defineSymbol(math, ams, bin, "\u22d2", "\\Cap"); +defineSymbol(math, ams, bin, "\u22d3", "\\Cup"); +defineSymbol(math, ams, bin, "\u2a5e", "\\doublebarwedge"); +defineSymbol(math, ams, bin, "\u229f", "\\boxminus"); +defineSymbol(math, ams, bin, "\u229e", "\\boxplus"); +defineSymbol(math, ams, bin, "\u22c7", "\\divideontimes"); +defineSymbol(math, ams, bin, "\u22c9", "\\ltimes"); +defineSymbol(math, ams, bin, "\u22ca", "\\rtimes"); +defineSymbol(math, ams, bin, "\u22cb", "\\leftthreetimes"); +defineSymbol(math, ams, bin, "\u22cc", "\\rightthreetimes"); +defineSymbol(math, ams, bin, "\u22cf", "\\curlywedge"); +defineSymbol(math, ams, bin, "\u22ce", "\\curlyvee"); +defineSymbol(math, ams, bin, "\u229d", "\\circleddash"); +defineSymbol(math, ams, bin, "\u229b", "\\circledast"); +defineSymbol(math, ams, bin, "\u22c5", "\\centerdot"); +defineSymbol(math, ams, bin, "\u22ba", "\\intercal"); +defineSymbol(math, ams, bin, "\u22d2", "\\doublecap"); +defineSymbol(math, ams, bin, "\u22d3", "\\doublecup"); +defineSymbol(math, ams, bin, "\u22a0", "\\boxtimes"); + +// AMS Arrows +defineSymbol(math, ams, rel, "\u21e2", "\\dashrightarrow"); +defineSymbol(math, ams, rel, "\u21e0", "\\dashleftarrow"); +defineSymbol(math, ams, rel, "\u21c7", "\\leftleftarrows"); +defineSymbol(math, ams, rel, "\u21c6", "\\leftrightarrows"); +defineSymbol(math, ams, rel, "\u21da", "\\Lleftarrow"); +defineSymbol(math, ams, rel, "\u219e", "\\twoheadleftarrow"); +defineSymbol(math, ams, rel, "\u21a2", "\\leftarrowtail"); +defineSymbol(math, ams, rel, "\u21ab", "\\looparrowleft"); +defineSymbol(math, ams, rel, "\u21cb", "\\leftrightharpoons"); +defineSymbol(math, ams, rel, "\u21b6", "\\curvearrowleft"); +defineSymbol(math, ams, rel, "\u21ba", "\\circlearrowleft"); +defineSymbol(math, ams, rel, "\u21b0", "\\Lsh"); +defineSymbol(math, ams, rel, "\u21c8", "\\upuparrows"); +defineSymbol(math, ams, rel, "\u21bf", "\\upharpoonleft"); +defineSymbol(math, ams, rel, "\u21c3", "\\downharpoonleft"); +defineSymbol(math, ams, rel, "\u22b8", "\\multimap"); +defineSymbol(math, ams, rel, "\u21ad", "\\leftrightsquigarrow"); +defineSymbol(math, ams, rel, "\u21c9", "\\rightrightarrows"); +defineSymbol(math, ams, rel, "\u21c4", "\\rightleftarrows"); +defineSymbol(math, ams, rel, "\u21a0", "\\twoheadrightarrow"); +defineSymbol(math, ams, rel, "\u21a3", "\\rightarrowtail"); +defineSymbol(math, ams, rel, "\u21ac", "\\looparrowright"); +defineSymbol(math, ams, rel, "\u21b7", "\\curvearrowright"); +defineSymbol(math, ams, rel, "\u21bb", "\\circlearrowright"); +defineSymbol(math, ams, rel, "\u21b1", "\\Rsh"); +defineSymbol(math, ams, rel, "\u21ca", "\\downdownarrows"); +defineSymbol(math, ams, rel, "\u21be", "\\upharpoonright"); +defineSymbol(math, ams, rel, "\u21c2", "\\downharpoonright"); +defineSymbol(math, ams, rel, "\u21dd", "\\rightsquigarrow"); +defineSymbol(math, ams, rel, "\u21dd", "\\leadsto"); +defineSymbol(math, ams, rel, "\u21db", "\\Rrightarrow"); +defineSymbol(math, ams, rel, "\u21be", "\\restriction"); + +defineSymbol(math, main, textord, "\u2018", "`"); +defineSymbol(math, main, textord, "$", "\\$"); +defineSymbol(math, main, textord, "%", "\\%"); +defineSymbol(math, main, textord, "_", "\\_"); +defineSymbol(math, main, textord, "\u2220", "\\angle"); +defineSymbol(math, main, textord, "\u221e", "\\infty"); +defineSymbol(math, main, textord, "\u2032", "\\prime"); +defineSymbol(math, main, textord, "\u25b3", "\\triangle"); +defineSymbol(math, main, textord, "\u0393", "\\Gamma"); +defineSymbol(math, main, textord, "\u0394", "\\Delta"); +defineSymbol(math, main, textord, "\u0398", "\\Theta"); +defineSymbol(math, main, textord, "\u039b", "\\Lambda"); +defineSymbol(math, main, textord, "\u039e", "\\Xi"); +defineSymbol(math, main, textord, "\u03a0", "\\Pi"); +defineSymbol(math, main, textord, "\u03a3", "\\Sigma"); +defineSymbol(math, main, textord, "\u03a5", "\\Upsilon"); +defineSymbol(math, main, textord, "\u03a6", "\\Phi"); +defineSymbol(math, main, textord, "\u03a8", "\\Psi"); +defineSymbol(math, main, textord, "\u03a9", "\\Omega"); +defineSymbol(math, main, textord, "\u00ac", "\\neg"); +defineSymbol(math, main, textord, "\u00ac", "\\lnot"); +defineSymbol(math, main, textord, "\u22a4", "\\top"); +defineSymbol(math, main, textord, "\u22a5", "\\bot"); +defineSymbol(math, main, textord, "\u2205", "\\emptyset"); +defineSymbol(math, ams, textord, "\u2205", "\\varnothing"); +defineSymbol(math, main, mathord, "\u03b1", "\\alpha"); +defineSymbol(math, main, mathord, "\u03b2", "\\beta"); +defineSymbol(math, main, mathord, "\u03b3", "\\gamma"); +defineSymbol(math, main, mathord, "\u03b4", "\\delta"); +defineSymbol(math, main, mathord, "\u03f5", "\\epsilon"); +defineSymbol(math, main, mathord, "\u03b6", "\\zeta"); +defineSymbol(math, main, mathord, "\u03b7", "\\eta"); +defineSymbol(math, main, mathord, "\u03b8", "\\theta"); +defineSymbol(math, main, mathord, "\u03b9", "\\iota"); +defineSymbol(math, main, mathord, "\u03ba", "\\kappa"); +defineSymbol(math, main, mathord, "\u03bb", "\\lambda"); +defineSymbol(math, main, mathord, "\u03bc", "\\mu"); +defineSymbol(math, main, mathord, "\u03bd", "\\nu"); +defineSymbol(math, main, mathord, "\u03be", "\\xi"); +defineSymbol(math, main, mathord, "o", "\\omicron"); +defineSymbol(math, main, mathord, "\u03c0", "\\pi"); +defineSymbol(math, main, mathord, "\u03c1", "\\rho"); +defineSymbol(math, main, mathord, "\u03c3", "\\sigma"); +defineSymbol(math, main, mathord, "\u03c4", "\\tau"); +defineSymbol(math, main, mathord, "\u03c5", "\\upsilon"); +defineSymbol(math, main, mathord, "\u03d5", "\\phi"); +defineSymbol(math, main, mathord, "\u03c7", "\\chi"); +defineSymbol(math, main, mathord, "\u03c8", "\\psi"); +defineSymbol(math, main, mathord, "\u03c9", "\\omega"); +defineSymbol(math, main, mathord, "\u03b5", "\\varepsilon"); +defineSymbol(math, main, mathord, "\u03d1", "\\vartheta"); +defineSymbol(math, main, mathord, "\u03d6", "\\varpi"); +defineSymbol(math, main, mathord, "\u03f1", "\\varrho"); +defineSymbol(math, main, mathord, "\u03c2", "\\varsigma"); +defineSymbol(math, main, mathord, "\u03c6", "\\varphi"); +defineSymbol(math, main, bin, "\u2217", "*"); +defineSymbol(math, main, bin, "+", "+"); +defineSymbol(math, main, bin, "\u2212", "-"); +defineSymbol(math, main, bin, "\u22c5", "\\cdot"); +defineSymbol(math, main, bin, "\u2218", "\\circ"); +defineSymbol(math, main, bin, "\u00f7", "\\div"); +defineSymbol(math, main, bin, "\u00b1", "\\pm"); +defineSymbol(math, main, bin, "\u00d7", "\\times"); +defineSymbol(math, main, bin, "\u2229", "\\cap"); +defineSymbol(math, main, bin, "\u222a", "\\cup"); +defineSymbol(math, main, bin, "\u2216", "\\setminus"); +defineSymbol(math, main, bin, "\u2227", "\\land"); +defineSymbol(math, main, bin, "\u2228", "\\lor"); +defineSymbol(math, main, bin, "\u2227", "\\wedge"); +defineSymbol(math, main, bin, "\u2228", "\\vee"); +defineSymbol(math, main, textord, "\u221a", "\\surd"); +defineSymbol(math, main, open, "(", "("); +defineSymbol(math, main, open, "[", "["); +defineSymbol(math, main, open, "\u27e8", "\\langle"); +defineSymbol(math, main, open, "\u2223", "\\lvert"); +defineSymbol(math, main, open, "\u2225", "\\lVert"); +defineSymbol(math, main, close, ")", ")"); +defineSymbol(math, main, close, "]", "]"); +defineSymbol(math, main, close, "?", "?"); +defineSymbol(math, main, close, "!", "!"); +defineSymbol(math, main, close, "\u27e9", "\\rangle"); +defineSymbol(math, main, close, "\u2223", "\\rvert"); +defineSymbol(math, main, close, "\u2225", "\\rVert"); +defineSymbol(math, main, rel, "=", "="); +defineSymbol(math, main, rel, "<", "<"); +defineSymbol(math, main, rel, ">", ">"); +defineSymbol(math, main, rel, ":", ":"); +defineSymbol(math, main, rel, "\u2248", "\\approx"); +defineSymbol(math, main, rel, "\u2245", "\\cong"); +defineSymbol(math, main, rel, "\u2265", "\\ge"); +defineSymbol(math, main, rel, "\u2265", "\\geq"); +defineSymbol(math, main, rel, "\u2190", "\\gets"); +defineSymbol(math, main, rel, ">", "\\gt"); +defineSymbol(math, main, rel, "\u2208", "\\in"); +defineSymbol(math, main, rel, "\u2209", "\\notin"); +defineSymbol(math, main, rel, "\u2282", "\\subset"); +defineSymbol(math, main, rel, "\u2283", "\\supset"); +defineSymbol(math, main, rel, "\u2286", "\\subseteq"); +defineSymbol(math, main, rel, "\u2287", "\\supseteq"); +defineSymbol(math, ams, rel, "\u2288", "\\nsubseteq"); +defineSymbol(math, ams, rel, "\u2289", "\\nsupseteq"); +defineSymbol(math, main, rel, "\u22a8", "\\models"); +defineSymbol(math, main, rel, "\u2190", "\\leftarrow"); +defineSymbol(math, main, rel, "\u2264", "\\le"); +defineSymbol(math, main, rel, "\u2264", "\\leq"); +defineSymbol(math, main, rel, "<", "\\lt"); +defineSymbol(math, main, rel, "\u2260", "\\ne"); +defineSymbol(math, main, rel, "\u2260", "\\neq"); +defineSymbol(math, main, rel, "\u2192", "\\rightarrow"); +defineSymbol(math, main, rel, "\u2192", "\\to"); +defineSymbol(math, ams, rel, "\u2271", "\\ngeq"); +defineSymbol(math, ams, rel, "\u2270", "\\nleq"); +defineSymbol(math, main, spacing, null, "\\!"); +defineSymbol(math, main, spacing, "\u00a0", "\\ "); +defineSymbol(math, main, spacing, "\u00a0", "~"); +defineSymbol(math, main, spacing, null, "\\,"); +defineSymbol(math, main, spacing, null, "\\:"); +defineSymbol(math, main, spacing, null, "\\;"); +defineSymbol(math, main, spacing, null, "\\enspace"); +defineSymbol(math, main, spacing, null, "\\qquad"); +defineSymbol(math, main, spacing, null, "\\quad"); +defineSymbol(math, main, spacing, "\u00a0", "\\space"); +defineSymbol(math, main, punct, ",", ","); +defineSymbol(math, main, punct, ";", ";"); +defineSymbol(math, main, punct, ":", "\\colon"); +defineSymbol(math, ams, bin, "\u22bc", "\\barwedge"); +defineSymbol(math, ams, bin, "\u22bb", "\\veebar"); +defineSymbol(math, main, bin, "\u2299", "\\odot"); +defineSymbol(math, main, bin, "\u2295", "\\oplus"); +defineSymbol(math, main, bin, "\u2297", "\\otimes"); +defineSymbol(math, main, textord, "\u2202", "\\partial"); +defineSymbol(math, main, bin, "\u2298", "\\oslash"); +defineSymbol(math, ams, bin, "\u229a", "\\circledcirc"); +defineSymbol(math, ams, bin, "\u22a1", "\\boxdot"); +defineSymbol(math, main, bin, "\u25b3", "\\bigtriangleup"); +defineSymbol(math, main, bin, "\u25bd", "\\bigtriangledown"); +defineSymbol(math, main, bin, "\u2020", "\\dagger"); +defineSymbol(math, main, bin, "\u22c4", "\\diamond"); +defineSymbol(math, main, bin, "\u22c6", "\\star"); +defineSymbol(math, main, bin, "\u25c3", "\\triangleleft"); +defineSymbol(math, main, bin, "\u25b9", "\\triangleright"); +defineSymbol(math, main, open, "{", "\\{"); +defineSymbol(math, main, close, "}", "\\}"); +defineSymbol(math, main, open, "{", "\\lbrace"); +defineSymbol(math, main, close, "}", "\\rbrace"); +defineSymbol(math, main, open, "[", "\\lbrack"); +defineSymbol(math, main, close, "]", "\\rbrack"); +defineSymbol(math, main, open, "\u230a", "\\lfloor"); +defineSymbol(math, main, close, "\u230b", "\\rfloor"); +defineSymbol(math, main, open, "\u2308", "\\lceil"); +defineSymbol(math, main, close, "\u2309", "\\rceil"); +defineSymbol(math, main, textord, "\\", "\\backslash"); +defineSymbol(math, main, textord, "\u2223", "|"); +defineSymbol(math, main, textord, "\u2223", "\\vert"); +defineSymbol(math, main, textord, "\u2225", "\\|"); +defineSymbol(math, main, textord, "\u2225", "\\Vert"); +defineSymbol(math, main, rel, "\u2191", "\\uparrow"); +defineSymbol(math, main, rel, "\u21d1", "\\Uparrow"); +defineSymbol(math, main, rel, "\u2193", "\\downarrow"); +defineSymbol(math, main, rel, "\u21d3", "\\Downarrow"); +defineSymbol(math, main, rel, "\u2195", "\\updownarrow"); +defineSymbol(math, main, rel, "\u21d5", "\\Updownarrow"); +defineSymbol(math, math, op, "\u2210", "\\coprod"); +defineSymbol(math, math, op, "\u22c1", "\\bigvee"); +defineSymbol(math, math, op, "\u22c0", "\\bigwedge"); +defineSymbol(math, math, op, "\u2a04", "\\biguplus"); +defineSymbol(math, math, op, "\u22c2", "\\bigcap"); +defineSymbol(math, math, op, "\u22c3", "\\bigcup"); +defineSymbol(math, math, op, "\u222b", "\\int"); +defineSymbol(math, math, op, "\u222b", "\\intop"); +defineSymbol(math, math, op, "\u222c", "\\iint"); +defineSymbol(math, math, op, "\u222d", "\\iiint"); +defineSymbol(math, math, op, "\u220f", "\\prod"); +defineSymbol(math, math, op, "\u2211", "\\sum"); + +defineSymbol(math, math, op, "\u2a02", "\\bigotimes"); +defineSymbol(math, math, op, "\u2a01", "\\bigoplus"); +defineSymbol(math, math, op, "\u2a00", "\\bigodot"); +defineSymbol(math, math, op, "\u222e", "\\oint"); +defineSymbol(math, math, op, "\u2a06", "\\bigsqcup"); +defineSymbol(math, math, op, "\u222b", "\\smallint"); +defineSymbol(math, main, inner, "\u2026", "\\ldots"); +defineSymbol(math, main, inner, "\u22ef", "\\cdots"); +defineSymbol(math, main, inner, "\u22f1", "\\ddots"); +defineSymbol(math, main, textord, "\u22ee", "\\vdots"); +defineSymbol(math, main, accent, "\u00b4", "\\acute"); +defineSymbol(math, main, accent, "\u0060", "\\grave"); +defineSymbol(math, main, accent, "\u00a8", "\\ddot"); +defineSymbol(math, main, accent, "\u007e", "\\tilde"); +defineSymbol(math, main, accent, "\u00af", "\\bar"); +defineSymbol(math, main, accent, "\u02d8", "\\breve"); +defineSymbol(math, main, accent, "\u02c7", "\\check"); +defineSymbol(math, main, accent, "\u005e", "\\hat"); +defineSymbol(math, main, accent, "\u20d7", "\\vec"); +defineSymbol(math, main, accent, "\u02d9", "\\dot"); +defineSymbol(math, main, mathord, "\u0131", "\\imath"); +defineSymbol(math, main, mathord, "\u0237", "\\jmath"); + + +defineSymbol(text, main, spacing, "\u00a0", "\\ "); +defineSymbol(text, main, spacing, "\u00a0", " "); +defineSymbol(text, main, spacing, "\u00a0", "~"); + +// There are lots of symbols which are the same, so we add them in afterwards. +var i; +var ch; + +// All of these are textords in math mode +var mathTextSymbols = "0123456789/@.\""; +for (i = 0; i < mathTextSymbols.length; i++) { + ch = mathTextSymbols.charAt(i); + defineSymbol(math, main, textord, ch, ch); +} + +// All of these are textords in text mode +var textSymbols = "0123456789`!@*()-=+[]'\";:?/.,"; +for (i = 0; i < textSymbols.length; i++) { + ch = textSymbols.charAt(i); + defineSymbol(text, main, textord, ch, ch); +} + +// All of these are textords in text mode, and mathords in math mode +var letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; +for (i = 0; i < letters.length; i++) { + ch = letters.charAt(i); + defineSymbol(math, main, mathord, ch, ch); + defineSymbol(text, main, textord, ch, ch); +} diff --git a/fastcdm/tokenize_latex/third_party/katex/src/utils.js b/fastcdm/tokenize_latex/third_party/katex/src/utils.js new file mode 100644 index 0000000000000000000000000000000000000000..f9e57cc65642e18e47b27e329d7ce6913a5f7a10 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/katex/src/utils.js @@ -0,0 +1,106 @@ +/** + * This file contains a list of utility functions which are useful in other + * files. + */ + +/** + * Provide an `indexOf` function which works in IE8, but defers to native if + * possible. + */ +var nativeIndexOf = Array.prototype.indexOf; +var indexOf = function(list, elem) { + if (list == null) { + return -1; + } + if (nativeIndexOf && list.indexOf === nativeIndexOf) { + return list.indexOf(elem); + } + var i = 0; + var l = list.length; + for (; i < l; i++) { + if (list[i] === elem) { + return i; + } + } + return -1; +}; + +/** + * Return whether an element is contained in a list + */ +var contains = function(list, elem) { + return indexOf(list, elem) !== -1; +}; + +/** + * Provide a default value if a setting is undefined + */ +var deflt = function(setting, defaultIfUndefined) { + return setting === undefined ? defaultIfUndefined : setting; +}; + +// hyphenate and escape adapted from Facebook's React under Apache 2 license + +var uppercase = /([A-Z])/g; +var hyphenate = function(str) { + return str.replace(uppercase, "-$1").toLowerCase(); +}; + +var ESCAPE_LOOKUP = { + "&": "&", + ">": ">", + "<": "<", + "\"": """, + "'": "'", +}; + +var ESCAPE_REGEX = /[&><"']/g; + +function escaper(match) { + return ESCAPE_LOOKUP[match]; +} + +/** + * Escapes text to prevent scripting attacks. + * + * @param {*} text Text value to escape. + * @return {string} An escaped string. + */ +function escape(text) { + return ("" + text).replace(ESCAPE_REGEX, escaper); +} + +/** + * A function to set the text content of a DOM element in all supported + * browsers. Note that we don't define this if there is no document. + */ +var setTextContent; +if (typeof document !== "undefined") { + var testNode = document.createElement("span"); + if ("textContent" in testNode) { + setTextContent = function(node, text) { + node.textContent = text; + }; + } else { + setTextContent = function(node, text) { + node.innerText = text; + }; + } +} + +/** + * A function to clear a node. + */ +function clearNode(node) { + setTextContent(node, ""); +} + +module.exports = { + contains: contains, + deflt: deflt, + escape: escape, + hyphenate: hyphenate, + indexOf: indexOf, + setTextContent: setTextContent, + clearNode: clearNode, +}; diff --git a/fastcdm/tokenize_latex/third_party/match-at/README.md b/fastcdm/tokenize_latex/third_party/match-at/README.md new file mode 100644 index 0000000000000000000000000000000000000000..69083d16567567db48e3c95de42153f8bae7603c --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/match-at/README.md @@ -0,0 +1 @@ +# match-at [![Build Status](https://travis-ci.org/spicyj/match-at.svg?branch=master)](https://travis-ci.org/spicyj/match-at) diff --git a/fastcdm/tokenize_latex/third_party/match-at/package.json b/fastcdm/tokenize_latex/third_party/match-at/package.json new file mode 100644 index 0000000000000000000000000000000000000000..3e9e997d8d906c83074f6e26ac5b848a09bd8820 --- /dev/null +++ b/fastcdm/tokenize_latex/third_party/match-at/package.json @@ -0,0 +1,54 @@ +{ + "name": "match-at", + "version": "0.1.0", + "description": "Relocatable regular expressions.", + "repository": { + "type": "git", + "url": "https://github.com/spicyj/match-at" + }, + "main": "lib/matchAt.js", + "files": [ + "lib/" + ], + "devDependencies": { + "babel": "^4.7.16", + "jest-cli": "^0.4.0", + "react-tools": "^0.13.1" + }, + "jest": { + "scriptPreprocessor": "/jestSupport/preprocessor.js", + "unmockedModulePathPatterns": [ + "" + ] + }, + "scripts": { + "prepublish": "babel -d lib/ src/", + "test": "jest" + }, + "gitHead": "4197daff69720734c72ba3321ed68a41c0527fb2", + "bugs": { + "url": "https://github.com/spicyj/match-at/issues" + }, + "homepage": "https://github.com/spicyj/match-at", + "_id": "match-at@0.1.0", + "_shasum": "f561e7709ff9a105b85cc62c6b8ee7c15bf24f31", + "_from": "match-at@", + "_npmVersion": "2.2.0", + "_nodeVersion": "0.10.35", + "_npmUser": { + "name": "spicyj", + "email": "ben@benalpert.com" + }, + "maintainers": [ + { + "name": "spicyj", + "email": "ben@benalpert.com" + } + ], + "dist": { + "shasum": "f561e7709ff9a105b85cc62c6b8ee7c15bf24f31", + "tarball": "https://registry.npmjs.org/match-at/-/match-at-0.1.0.tgz" + }, + "directories": {}, + "_resolved": "https://registry.npmjs.org/match-at/-/match-at-0.1.0.tgz" +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..909cd22a57d47c82dbee39f6da47b8128667fc3c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,60 @@ +[build-system] +requires = ["setuptools>=66,<71", "wheel", "packaging>=22"] +build-backend = "setuptools.build_meta" + +[project] +name = "fastcdm" +dynamic = ["version"] +description = "Fast CDM utilities for LaTeX tokenization, rendering, and matching" +readme = "README.md" +requires-python = ">=3.8" +authors = [{ name = "KevinQiu" }] +license = { file = "LICENSE" } +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", +] +dependencies = [ + "numpy>=1.20", + "opencv-python>=4.5", + "selenium>=4", + "webdriver-manager>=4", + "scikit-image", + "gradio", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7", + "black>=23", + "ruff>=0.1", + "mypy>=1.5", + "twine", +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.dynamic] +version = { attr = "fastcdm.__version__" } + +[tool.setuptools.packages.find] +namespaces = true +include = ["fastcdm*"] + +[tool.setuptools.package-data] +fastcdm = [ + "render/templates/*.html", + "render/templates/lib/*.js", + "render/templates/lib/*.css", + "render/templates/lib/katex_fonts/*", + "tokenize_latex/*.js", + "tokenize_latex/third_party/**/*.js", + "tokenize_latex/third_party/**/*.css", + "tokenize_latex/third_party/**/*.html", + "tokenize_latex/third_party/**/*.ttf", + "tokenize_latex/third_party/**/*.woff", + "tokenize_latex/third_party/**/*.woff2", +] diff --git a/scripts/app.py b/scripts/app.py new file mode 100644 index 0000000000000000000000000000000000000000..02f37f2d0cd596571974cbec33d0941bcd76b53f --- /dev/null +++ b/scripts/app.py @@ -0,0 +1,93 @@ +import gradio as gr +import cv2 +from pathlib import Path +from fastcdm import FastCDM + + +CHROMEDRIVER_PATH = Path("driver/chromedriver") + + +def _wrap_latex(s: str) -> str: + s = s or "" + return s if s.strip().startswith("$$") else f"$$ {s} $$" + + +def preview_latex_gt(gt: str) -> str: + return _wrap_latex(gt) + + +def preview_latex_pred(pred: str) -> str: + return _wrap_latex(pred) + + +def compute_fastcdm(gt: str, pred: str): + print("-" * 20) + print(" gt:", gt) + print("pred:", pred) + print("-" * 20) + + driver_path = str(CHROMEDRIVER_PATH) if CHROMEDRIVER_PATH.exists() else None + fastcdm = FastCDM(chromedriver=driver_path) + f1, recall, precision, vis_img = fastcdm.compute(gt, pred, visualize=True) + + if vis_img is not None: + vis_rgb = cv2.cvtColor(vis_img, cv2.COLOR_BGR2RGB) + else: + vis_rgb = None + + metrics_md = f"**CDM得分(F1)**: {f1:.4f} \n**召回率**: {recall:.4f} \n**准确率**: {precision:.4f}" + return metrics_md, vis_rgb + + +with gr.Blocks(title="FastCDM 可视化") as demo: + gr.Markdown("# FastCDM 可视化") + + with gr.Row(): + with gr.Column(): + gt_input = gr.Textbox( + label="GT (LaTeX)", + lines=4, + placeholder="输入GT公式,例如: \\frac{1}{2}", + ) + gt_md = gr.Markdown( + value="", + latex_delimiters=[ + {"left": "$$", "right": "$$", "display": True}, + {"left": "$", "right": "$", "display": False}, + {"left": "\\(", "right": "\\)", "display": False}, + {"left": "\\[", "right": "\\]", "display": True}, + ], + ) + + pred_input = gr.Textbox( + label="Pred (LaTeX)", + lines=4, + placeholder="输入Pred公式,例如: \\frac{1}{2}", + ) + pred_md = gr.Markdown( + value="", + latex_delimiters=[ + {"left": "$$", "right": "$$", "display": True}, + {"left": "$", "right": "$", "display": False}, + {"left": "\\(", "right": "\\)", "display": False}, + {"left": "\\[", "right": "\\]", "display": True}, + ], + ) + + submit_btn = gr.Button("提交并评估") + + with gr.Column(): + metrics_out = gr.Markdown(label="评估指标") + vis_out = gr.Image(type="numpy", label="匹配可视化", format="png") + + gt_input.change(fn=preview_latex_gt, inputs=gt_input, outputs=gt_md) + pred_input.change(fn=preview_latex_pred, inputs=pred_input, outputs=pred_md) + submit_btn.click( + fn=compute_fastcdm, + inputs=[gt_input, pred_input], + outputs=[metrics_out, vis_out], + ) + + +if __name__ == "__main__": + demo.launch(server_name="0.0.0.0", server_port=7860) diff --git a/scripts/auto_install_chromedriver.py b/scripts/auto_install_chromedriver.py new file mode 100644 index 0000000000000000000000000000000000000000..c8d8f6937b739769bba1e4875751df045917aacf --- /dev/null +++ b/scripts/auto_install_chromedriver.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import argparse +import os +import shutil +import sys +from pathlib import Path +from typing import Optional + +from webdriver_manager.chrome import ChromeDriverManager + + +def resolve_target_path(dest: str) -> Path: + p = Path(dest) + if p.exists() and p.is_dir(): + name = "chromedriver.exe" if sys.platform.startswith("win") else "chromedriver" + return p / name + if dest.endswith(os.sep): + name = "chromedriver.exe" if sys.platform.startswith("win") else "chromedriver" + return Path(dest) / name + return p + + +def install(dest: Optional[str]) -> Path: + print( + "[提示] ChromeDriverManager 下载可能需要代理。如果失败,请查看 docs/chromedriver_installation.md 以手动安装。" + ) + src_path = ChromeDriverManager().install() + print(f"[完成] 已下载: {src_path}") + if dest: + target = resolve_target_path(dest) + target.parent.mkdir(parents=True, exist_ok=True) + shutil.move(src_path, target) + try: + mode = os.stat(target).st_mode + os.chmod(target, mode | 0o111) + except Exception as e: + print(f"[警告] 设置可执行权限失败: {e}") + print(f"[完成] 已移动到: {target}") + return target + else: + print(f"[信息] 使用默认安装位置: {src_path}") + return Path(src_path) + + +def main(): + parser = argparse.ArgumentParser( + prog="auto_install_chromedriver", + description=( + "自动下载 ChromeDriver。可选指定目标路径,若未指定则保留在默认位置。" + ), + ) + parser.add_argument( + "-d", + "--dest", + type=str, + default=None, + help="目标 chromedriver 路径或目录。可传目录或完整文件路径。", + ) + args = parser.parse_args() + target = install(args.dest) + print(f"[结果] chromedriver 位置: {target}") + + +if __name__ == "__main__": + main() diff --git a/scripts/test_driver.py b/scripts/test_driver.py new file mode 100644 index 0000000000000000000000000000000000000000..ef25b27cd45dc67c260f2030ae41fcac470f30ea --- /dev/null +++ b/scripts/test_driver.py @@ -0,0 +1,65 @@ +import argparse +import sys +from pathlib import Path + +from selenium import webdriver +from selenium.webdriver.chrome.service import Service + + +def parse_args(): + parser = argparse.ArgumentParser( + prog="test_driver", + description="测试指定 ChromeDriver 是否可用,并检查页面访问是否正常。", + ) + parser.add_argument( + "--driver", + type=str, + required=True, + help="ChromeDriver 可执行文件路径。", + ) + parser.add_argument( + "--url", + type=str, + default="https://www.baidu.com", + help="用于测试的目标 URL,默认 https://www.baidu.com。", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + driver_path = Path(args.driver) + + if not driver_path.exists(): + print(f"不通过:ChromeDriver 路径不存在:{driver_path}") + sys.exit(1) + + chrome_options = webdriver.ChromeOptions() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--disable-gpu") + chrome_options.add_argument("--no-sandbox") + + try: + client = webdriver.Chrome( + options=chrome_options, service=Service(str(driver_path)) + ) + except Exception as e: + print(f"不通过:初始化 WebDriver 失败:{e}") + sys.exit(1) + + try: + client.get(args.url) + print("通过:成功访问页面,视为返回 200。") + sys.exit(0) + except Exception as e: + print(f"不通过:页面请求失败:{e}") + sys.exit(1) + finally: + try: + client.quit() + except Exception: + pass + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..b908cbe55cb344569d32de1dfc10ca7323828dc5 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +import setuptools + +setuptools.setup() diff --git a/tests/test_grammer.py b/tests/test_grammer.py new file mode 100644 index 0000000000000000000000000000000000000000..9e75b860994bfd19cba1a0d226b7c7b0756dcd75 --- /dev/null +++ b/tests/test_grammer.py @@ -0,0 +1,8 @@ +from fastcdm.core import preprocess + +def test_preprocess(): + latex_string = r"\begin{bmatrix}(\mathbf{I}-\mathbf{A}^{\mathsf{DD}})&-\mathbf{A}^{\mathsf{DP }}&-\mathbf{A}^{\mathsf{DN}}\\ 0&\mathbf{I}&0\\ -\mathbf{A}^{\mathsf{ND}}&-\mathbf{A}^{\mathsf{NP}}&(\mathbf{I}-\mathbf{A}^{ \mathsf{NN}})\end{bmatrix}^{-1}=\begin{bmatrix}\mathbf{B}^{\mathsf{DD}}& \mathbf{B}^{\mathsf{DP}}&\mathbf{B}^{\mathsf{DN}}\\ \mathbf{B}^{\mathsf{PD}}&\mathbf{B}^{\mathsf{PP}}&\mathbf{B}^{\mathsf{PN}}\\ \mathbf{B}^{\mathsf{ND}}&\mathbf{B}^{\mathsf{NP}}&\mathbf{B}^{\mathsf{NN}} \end{bmatrix}" + processed, _ = preprocess(latex_string) + print(processed) + +test_preprocess()