File size: 8,968 Bytes
ac2f8e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
from sympy.external import import_module
from sympy.utilities.decorator import doctest_depends_on
from re import compile as rcompile

from sympy.parsing.latex.lark import LarkLaTeXParser, TransformToSymPyExpr, parse_latex_lark # noqa

from .errors import LaTeXParsingError  # noqa


IGNORE_L = r"\s*[{]*\s*"
IGNORE_R = r"\s*[}]*\s*"
NO_LEFT = r"(?<!\\left)"
BEGIN_AMS_MAT = r"\\begin{matrix}"
END_AMS_MAT = r"\\end{matrix}"
BEGIN_ARR = r"\\begin{array}{.*?}"
END_ARR = r"\\end{array}"

# begin_delim_regex: end_delim_regex
MATRIX_DELIMS = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\)",
                 fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\)",
                 fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\]",
                 fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\]",
                 fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\|",
                 fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\|",
                 r"\\begin{pmatrix}": r"\\end{pmatrix}",
                 r"\\begin{bmatrix}": r"\\end{bmatrix}",
                 r"\\begin{vmatrix}": r"\\end{vmatrix}",
                 fr"\\left\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\)",
                 fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\)",
                 fr"\\left\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\]",
                 fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\]",
                 fr"\\left\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\|",
                 fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\|"
                 }

MATRIX_DELIMS_INV = {v: k for k, v in MATRIX_DELIMS.items()}

# begin_delim_regex: ideal_begin_delim_representative
BEGIN_DELIM_REPR = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": "\\left(\\begin{matrix}",
                    fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": "(\\begin{matrix}",
                    fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": "\\left[\\begin{matrix}",
                    fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": "[\\begin{matrix}",
                    fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": "\\left|\\begin{matrix}",
                    fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": "|\\begin{matrix}",
                    r"\\begin{pmatrix}": "\\begin{pmatrix}",
                    r"\\begin{bmatrix}": "\\begin{bmatrix}",
                    r"\\begin{vmatrix}": "\\begin{vmatrix}",
                    fr"\\left\({IGNORE_L}{BEGIN_ARR}": "\\left(\\begin{array}{COLUMN_SPECIFIERS}",
                    fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": "(\\begin{array}{COLUMN_SPECIFIERS}",
                    fr"\\left\[{IGNORE_L}{BEGIN_ARR}": "\\left[\\begin{array}{COLUMN_SPECIFIERS}",
                    fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": "[\\begin{array}{COLUMN_SPECIFIERS}",
                    fr"\\left\|{IGNORE_L}{BEGIN_ARR}": "\\left|\\begin{array}{COLUMN_SPECIFIERS}",
                    fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": "|\\begin{array}{COLUMN_SPECIFIERS}"
                    }

# end_delim_regex: ideal_end_delim_representative
END_DELIM_REPR = {fr"{END_AMS_MAT}{IGNORE_R}\\right\)": "\\end{matrix}\\right)",
                  fr"{END_AMS_MAT}{IGNORE_R}\)": "\\end{matrix})",
                  fr"{END_AMS_MAT}{IGNORE_R}\\right\]": "\\end{matrix}\\right]",
                  fr"{END_AMS_MAT}{IGNORE_R}\]": "\\end{matrix}]",
                  fr"{END_AMS_MAT}{IGNORE_R}\\right\|": "\\end{matrix}\\right|",
                  fr"{END_AMS_MAT}{IGNORE_R}\|": "\\end{matrix}|",
                  r"\\end{pmatrix}": "\\end{pmatrix}",
                  r"\\end{bmatrix}": "\\end{bmatrix}",
                  r"\\end{vmatrix}": "\\end{vmatrix}",
                  fr"{END_ARR}{IGNORE_R}\\right\)": "\\end{array}\\right)",
                  fr"{END_ARR}{IGNORE_R}\)": "\\end{array})",
                  fr"{END_ARR}{IGNORE_R}\\right\]": "\\end{array}\\right]",
                  fr"{END_ARR}{IGNORE_R}\]": "\\end{array}]",
                  fr"{END_ARR}{IGNORE_R}\\right\|": "\\end{array}\\right|",
                  fr"{END_ARR}{IGNORE_R}\|": "\\end{array}|"
                  }


def check_matrix_delimiters(latex_str):
    """Report mismatched, excess, or missing matrix delimiters."""
    spans = []
    for begin_delim in MATRIX_DELIMS:
        end_delim = MATRIX_DELIMS[begin_delim]

        p = rcompile(begin_delim)
        q = rcompile(end_delim)

        spans.extend([(*m.span(), m.group(),
                       begin_delim) for m in p.finditer(latex_str)])
        spans.extend([(*m.span(), m.group(),
                       end_delim) for m in q.finditer(latex_str)])

    spans.sort(key=(lambda x: x[0]))
    if len(spans) % 2 == 1:
        # Odd number of delimiters; therefore something
        # is wrong. We do not complain yet; let's see if
        # we can pinpoint the actual error.
        spans.append((None, None, None, None))

    spans = [(*x, *y) for (x, y) in zip(spans[::2], spans[1::2])]
    for x in spans:
        # x is supposed to be an 8-tuple of the following form:
        #
        # (begin_delim_span_start, begin_delim_span_end,
        # begin_delim_match, begin_delim_regex,
        # end_delim_span_start, end_delim_span_end,
        # end_delim_match, end_delim_regex)

        sellipsis = "..."
        s = x[0] - 10
        if s < 0:
            s = 0
            sellipsis = ""

        eellipsis = "..."
        e = x[1] + 10
        if e > len(latex_str):
            e = len(latex_str)
            eellipsis = ""

        if x[3] in END_DELIM_REPR:
            err = (f"Extra '{x[2]}' at index {x[0]} or "
                   "missing corresponding "
                   f"'{BEGIN_DELIM_REPR[MATRIX_DELIMS_INV[x[3]]]}' "
                   f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
                   f"{eellipsis}")
            raise LaTeXParsingError(err)

        if x[7] is None:
            err = (f"Extra '{x[2]}' at index {x[0]} or "
                   "missing corresponding "
                   f"'{END_DELIM_REPR[MATRIX_DELIMS[x[3]]]}' "
                   f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
                   f"{eellipsis}")
            raise LaTeXParsingError(err)

        correct_end_regex = MATRIX_DELIMS[x[3]]
        sellipsis = "..." if x[0] > 0 else ""
        eellipsis = "..." if x[5] < len(latex_str) else ""
        if x[7] != correct_end_regex:
            err = ("Expected "
                   f"'{END_DELIM_REPR[correct_end_regex]}' "
                   f"to close the '{x[2]}' at index {x[0]} but "
                   f"found '{x[6]}' at index {x[4]} of LaTeX "
                   f"string instead: {sellipsis}{latex_str[x[0]:x[5]]}"
                   f"{eellipsis}")
            raise LaTeXParsingError(err)

__doctest_requires__ = {('parse_latex',): ['antlr4', 'lark']}


@doctest_depends_on(modules=('antlr4', 'lark'))
def parse_latex(s, strict=False, backend="antlr"):
    r"""Converts the input LaTeX string ``s`` to a SymPy ``Expr``.

    Parameters
    ==========

    s : str
        The LaTeX string to parse. In Python source containing LaTeX,
        *raw strings* (denoted with ``r"``, like this one) are preferred,
        as LaTeX makes liberal use of the ``\`` character, which would
        trigger escaping in normal Python strings.
    backend : str, optional
        Currently, there are two backends supported: ANTLR, and Lark.
        The default setting is to use the ANTLR backend, which can be
        changed to Lark if preferred.

        Use ``backend="antlr"`` for the ANTLR-based parser, and
        ``backend="lark"`` for the Lark-based parser.

        The ``backend`` option is case-sensitive, and must be in
        all lowercase.
    strict : bool, optional
        This option is only available with the ANTLR backend.

        If True, raise an exception if the string cannot be parsed as
        valid LaTeX. If False, try to recover gracefully from common
        mistakes.

    Examples
    ========

    >>> from sympy.parsing.latex import parse_latex
    >>> expr = parse_latex(r"\frac {1 + \sqrt {\a}} {\b}")
    >>> expr
    (sqrt(a) + 1)/b
    >>> expr.evalf(4, subs=dict(a=5, b=2))
    1.618
    >>> func = parse_latex(r"\int_1^\alpha \dfrac{\mathrm{d}t}{t}", backend="lark")
    >>> func.evalf(subs={"alpha": 2})
    0.693147180559945
    """

    check_matrix_delimiters(s)

    if backend == "antlr":
        _latex = import_module(
            'sympy.parsing.latex._parse_latex_antlr',
            import_kwargs={'fromlist': ['X']})

        if _latex is not None:
            return _latex.parse_latex(s, strict)
    elif backend == "lark":
        return parse_latex_lark(s)
    else:
        raise NotImplementedError(f"Using the '{backend}' backend in the LaTeX" \
                                   " parser is not supported.")