File size: 8,968 Bytes
ac2f8e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
from sympy.external import import_module
from sympy.utilities.decorator import doctest_depends_on
from re import compile as rcompile
from sympy.parsing.latex.lark import LarkLaTeXParser, TransformToSymPyExpr, parse_latex_lark # noqa
from .errors import LaTeXParsingError # noqa
IGNORE_L = r"\s*[{]*\s*"
IGNORE_R = r"\s*[}]*\s*"
NO_LEFT = r"(?<!\\left)"
BEGIN_AMS_MAT = r"\\begin{matrix}"
END_AMS_MAT = r"\\end{matrix}"
BEGIN_ARR = r"\\begin{array}{.*?}"
END_ARR = r"\\end{array}"
# begin_delim_regex: end_delim_regex
MATRIX_DELIMS = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\)",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\)",
fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\]",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\]",
fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\|",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\|",
r"\\begin{pmatrix}": r"\\end{pmatrix}",
r"\\begin{bmatrix}": r"\\end{bmatrix}",
r"\\begin{vmatrix}": r"\\end{vmatrix}",
fr"\\left\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\)",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\)",
fr"\\left\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\]",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\]",
fr"\\left\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\|",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\|"
}
MATRIX_DELIMS_INV = {v: k for k, v in MATRIX_DELIMS.items()}
# begin_delim_regex: ideal_begin_delim_representative
BEGIN_DELIM_REPR = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": "\\left(\\begin{matrix}",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": "(\\begin{matrix}",
fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": "\\left[\\begin{matrix}",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": "[\\begin{matrix}",
fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": "\\left|\\begin{matrix}",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": "|\\begin{matrix}",
r"\\begin{pmatrix}": "\\begin{pmatrix}",
r"\\begin{bmatrix}": "\\begin{bmatrix}",
r"\\begin{vmatrix}": "\\begin{vmatrix}",
fr"\\left\({IGNORE_L}{BEGIN_ARR}": "\\left(\\begin{array}{COLUMN_SPECIFIERS}",
fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": "(\\begin{array}{COLUMN_SPECIFIERS}",
fr"\\left\[{IGNORE_L}{BEGIN_ARR}": "\\left[\\begin{array}{COLUMN_SPECIFIERS}",
fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": "[\\begin{array}{COLUMN_SPECIFIERS}",
fr"\\left\|{IGNORE_L}{BEGIN_ARR}": "\\left|\\begin{array}{COLUMN_SPECIFIERS}",
fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": "|\\begin{array}{COLUMN_SPECIFIERS}"
}
# end_delim_regex: ideal_end_delim_representative
END_DELIM_REPR = {fr"{END_AMS_MAT}{IGNORE_R}\\right\)": "\\end{matrix}\\right)",
fr"{END_AMS_MAT}{IGNORE_R}\)": "\\end{matrix})",
fr"{END_AMS_MAT}{IGNORE_R}\\right\]": "\\end{matrix}\\right]",
fr"{END_AMS_MAT}{IGNORE_R}\]": "\\end{matrix}]",
fr"{END_AMS_MAT}{IGNORE_R}\\right\|": "\\end{matrix}\\right|",
fr"{END_AMS_MAT}{IGNORE_R}\|": "\\end{matrix}|",
r"\\end{pmatrix}": "\\end{pmatrix}",
r"\\end{bmatrix}": "\\end{bmatrix}",
r"\\end{vmatrix}": "\\end{vmatrix}",
fr"{END_ARR}{IGNORE_R}\\right\)": "\\end{array}\\right)",
fr"{END_ARR}{IGNORE_R}\)": "\\end{array})",
fr"{END_ARR}{IGNORE_R}\\right\]": "\\end{array}\\right]",
fr"{END_ARR}{IGNORE_R}\]": "\\end{array}]",
fr"{END_ARR}{IGNORE_R}\\right\|": "\\end{array}\\right|",
fr"{END_ARR}{IGNORE_R}\|": "\\end{array}|"
}
def check_matrix_delimiters(latex_str):
"""Report mismatched, excess, or missing matrix delimiters."""
spans = []
for begin_delim in MATRIX_DELIMS:
end_delim = MATRIX_DELIMS[begin_delim]
p = rcompile(begin_delim)
q = rcompile(end_delim)
spans.extend([(*m.span(), m.group(),
begin_delim) for m in p.finditer(latex_str)])
spans.extend([(*m.span(), m.group(),
end_delim) for m in q.finditer(latex_str)])
spans.sort(key=(lambda x: x[0]))
if len(spans) % 2 == 1:
# Odd number of delimiters; therefore something
# is wrong. We do not complain yet; let's see if
# we can pinpoint the actual error.
spans.append((None, None, None, None))
spans = [(*x, *y) for (x, y) in zip(spans[::2], spans[1::2])]
for x in spans:
# x is supposed to be an 8-tuple of the following form:
#
# (begin_delim_span_start, begin_delim_span_end,
# begin_delim_match, begin_delim_regex,
# end_delim_span_start, end_delim_span_end,
# end_delim_match, end_delim_regex)
sellipsis = "..."
s = x[0] - 10
if s < 0:
s = 0
sellipsis = ""
eellipsis = "..."
e = x[1] + 10
if e > len(latex_str):
e = len(latex_str)
eellipsis = ""
if x[3] in END_DELIM_REPR:
err = (f"Extra '{x[2]}' at index {x[0]} or "
"missing corresponding "
f"'{BEGIN_DELIM_REPR[MATRIX_DELIMS_INV[x[3]]]}' "
f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
f"{eellipsis}")
raise LaTeXParsingError(err)
if x[7] is None:
err = (f"Extra '{x[2]}' at index {x[0]} or "
"missing corresponding "
f"'{END_DELIM_REPR[MATRIX_DELIMS[x[3]]]}' "
f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
f"{eellipsis}")
raise LaTeXParsingError(err)
correct_end_regex = MATRIX_DELIMS[x[3]]
sellipsis = "..." if x[0] > 0 else ""
eellipsis = "..." if x[5] < len(latex_str) else ""
if x[7] != correct_end_regex:
err = ("Expected "
f"'{END_DELIM_REPR[correct_end_regex]}' "
f"to close the '{x[2]}' at index {x[0]} but "
f"found '{x[6]}' at index {x[4]} of LaTeX "
f"string instead: {sellipsis}{latex_str[x[0]:x[5]]}"
f"{eellipsis}")
raise LaTeXParsingError(err)
__doctest_requires__ = {('parse_latex',): ['antlr4', 'lark']}
@doctest_depends_on(modules=('antlr4', 'lark'))
def parse_latex(s, strict=False, backend="antlr"):
r"""Converts the input LaTeX string ``s`` to a SymPy ``Expr``.
Parameters
==========
s : str
The LaTeX string to parse. In Python source containing LaTeX,
*raw strings* (denoted with ``r"``, like this one) are preferred,
as LaTeX makes liberal use of the ``\`` character, which would
trigger escaping in normal Python strings.
backend : str, optional
Currently, there are two backends supported: ANTLR, and Lark.
The default setting is to use the ANTLR backend, which can be
changed to Lark if preferred.
Use ``backend="antlr"`` for the ANTLR-based parser, and
``backend="lark"`` for the Lark-based parser.
The ``backend`` option is case-sensitive, and must be in
all lowercase.
strict : bool, optional
This option is only available with the ANTLR backend.
If True, raise an exception if the string cannot be parsed as
valid LaTeX. If False, try to recover gracefully from common
mistakes.
Examples
========
>>> from sympy.parsing.latex import parse_latex
>>> expr = parse_latex(r"\frac {1 + \sqrt {\a}} {\b}")
>>> expr
(sqrt(a) + 1)/b
>>> expr.evalf(4, subs=dict(a=5, b=2))
1.618
>>> func = parse_latex(r"\int_1^\alpha \dfrac{\mathrm{d}t}{t}", backend="lark")
>>> func.evalf(subs={"alpha": 2})
0.693147180559945
"""
check_matrix_delimiters(s)
if backend == "antlr":
_latex = import_module(
'sympy.parsing.latex._parse_latex_antlr',
import_kwargs={'fromlist': ['X']})
if _latex is not None:
return _latex.parse_latex(s, strict)
elif backend == "lark":
return parse_latex_lark(s)
else:
raise NotImplementedError(f"Using the '{backend}' backend in the LaTeX" \
" parser is not supported.")
|