| | |
| | import html.entities |
| | import re |
| | import typing |
| |
|
| | from . import __diag__ |
| | from .core import * |
| | from .util import _bslash, _flatten, _escape_regex_range_chars |
| |
|
| |
|
| | |
| | |
| | |
| | def delimited_list( |
| | expr: Union[str, ParserElement], |
| | delim: Union[str, ParserElement] = ",", |
| | combine: bool = False, |
| | min: typing.Optional[int] = None, |
| | max: typing.Optional[int] = None, |
| | *, |
| | allow_trailing_delim: bool = False, |
| | ) -> ParserElement: |
| | """Helper to define a delimited list of expressions - the delimiter |
| | defaults to ','. By default, the list elements and delimiters can |
| | have intervening whitespace, and comments, but this can be |
| | overridden by passing ``combine=True`` in the constructor. If |
| | ``combine`` is set to ``True``, the matching tokens are |
| | returned as a single token string, with the delimiters included; |
| | otherwise, the matching tokens are returned as a list of tokens, |
| | with the delimiters suppressed. |
| | |
| | If ``allow_trailing_delim`` is set to True, then the list may end with |
| | a delimiter. |
| | |
| | Example:: |
| | |
| | delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] |
| | delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] |
| | """ |
| | if isinstance(expr, str_type): |
| | expr = ParserElement._literalStringClass(expr) |
| |
|
| | dlName = "{expr} [{delim} {expr}]...{end}".format( |
| | expr=str(expr.copy().streamline()), |
| | delim=str(delim), |
| | end=" [{}]".format(str(delim)) if allow_trailing_delim else "", |
| | ) |
| |
|
| | if not combine: |
| | delim = Suppress(delim) |
| |
|
| | if min is not None: |
| | if min < 1: |
| | raise ValueError("min must be greater than 0") |
| | min -= 1 |
| | if max is not None: |
| | if min is not None and max <= min: |
| | raise ValueError("max must be greater than, or equal to min") |
| | max -= 1 |
| | delimited_list_expr = expr + (delim + expr)[min, max] |
| |
|
| | if allow_trailing_delim: |
| | delimited_list_expr += Opt(delim) |
| |
|
| | if combine: |
| | return Combine(delimited_list_expr).set_name(dlName) |
| | else: |
| | return delimited_list_expr.set_name(dlName) |
| |
|
| |
|
| | def counted_array( |
| | expr: ParserElement, |
| | int_expr: typing.Optional[ParserElement] = None, |
| | *, |
| | intExpr: typing.Optional[ParserElement] = None, |
| | ) -> ParserElement: |
| | """Helper to define a counted list of expressions. |
| | |
| | This helper defines a pattern of the form:: |
| | |
| | integer expr expr expr... |
| | |
| | where the leading integer tells how many expr expressions follow. |
| | The matched tokens returns the array of expr tokens as a list - the |
| | leading count token is suppressed. |
| | |
| | If ``int_expr`` is specified, it should be a pyparsing expression |
| | that produces an integer value. |
| | |
| | Example:: |
| | |
| | counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] |
| | |
| | # in this parser, the leading integer value is given in binary, |
| | # '10' indicating that 2 values are in the array |
| | binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) |
| | counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] |
| | |
| | # if other fields must be parsed after the count but before the |
| | # list items, give the fields results names and they will |
| | # be preserved in the returned ParseResults: |
| | count_with_metadata = integer + Word(alphas)("type") |
| | typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") |
| | result = typed_array.parse_string("3 bool True True False") |
| | print(result.dump()) |
| | |
| | # prints |
| | # ['True', 'True', 'False'] |
| | # - items: ['True', 'True', 'False'] |
| | # - type: 'bool' |
| | """ |
| | intExpr = intExpr or int_expr |
| | array_expr = Forward() |
| |
|
| | def count_field_parse_action(s, l, t): |
| | nonlocal array_expr |
| | n = t[0] |
| | array_expr <<= (expr * n) if n else Empty() |
| | |
| | del t[:] |
| |
|
| | if intExpr is None: |
| | intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) |
| | else: |
| | intExpr = intExpr.copy() |
| | intExpr.set_name("arrayLen") |
| | intExpr.add_parse_action(count_field_parse_action, call_during_try=True) |
| | return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") |
| |
|
| |
|
| | def match_previous_literal(expr: ParserElement) -> ParserElement: |
| | """Helper to define an expression that is indirectly defined from |
| | the tokens matched in a previous expression, that is, it looks for |
| | a 'repeat' of a previous expression. For example:: |
| | |
| | first = Word(nums) |
| | second = match_previous_literal(first) |
| | match_expr = first + ":" + second |
| | |
| | will match ``"1:1"``, but not ``"1:2"``. Because this |
| | matches a previous literal, will also match the leading |
| | ``"1:1"`` in ``"1:10"``. If this is not desired, use |
| | :class:`match_previous_expr`. Do *not* use with packrat parsing |
| | enabled. |
| | """ |
| | rep = Forward() |
| |
|
| | def copy_token_to_repeater(s, l, t): |
| | if t: |
| | if len(t) == 1: |
| | rep << t[0] |
| | else: |
| | |
| | tflat = _flatten(t.as_list()) |
| | rep << And(Literal(tt) for tt in tflat) |
| | else: |
| | rep << Empty() |
| |
|
| | expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) |
| | rep.set_name("(prev) " + str(expr)) |
| | return rep |
| |
|
| |
|
| | def match_previous_expr(expr: ParserElement) -> ParserElement: |
| | """Helper to define an expression that is indirectly defined from |
| | the tokens matched in a previous expression, that is, it looks for |
| | a 'repeat' of a previous expression. For example:: |
| | |
| | first = Word(nums) |
| | second = match_previous_expr(first) |
| | match_expr = first + ":" + second |
| | |
| | will match ``"1:1"``, but not ``"1:2"``. Because this |
| | matches by expressions, will *not* match the leading ``"1:1"`` |
| | in ``"1:10"``; the expressions are evaluated first, and then |
| | compared, so ``"1"`` is compared with ``"10"``. Do *not* use |
| | with packrat parsing enabled. |
| | """ |
| | rep = Forward() |
| | e2 = expr.copy() |
| | rep <<= e2 |
| |
|
| | def copy_token_to_repeater(s, l, t): |
| | matchTokens = _flatten(t.as_list()) |
| |
|
| | def must_match_these_tokens(s, l, t): |
| | theseTokens = _flatten(t.as_list()) |
| | if theseTokens != matchTokens: |
| | raise ParseException( |
| | s, l, "Expected {}, found{}".format(matchTokens, theseTokens) |
| | ) |
| |
|
| | rep.set_parse_action(must_match_these_tokens, callDuringTry=True) |
| |
|
| | expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) |
| | rep.set_name("(prev) " + str(expr)) |
| | return rep |
| |
|
| |
|
| | def one_of( |
| | strs: Union[typing.Iterable[str], str], |
| | caseless: bool = False, |
| | use_regex: bool = True, |
| | as_keyword: bool = False, |
| | *, |
| | useRegex: bool = True, |
| | asKeyword: bool = False, |
| | ) -> ParserElement: |
| | """Helper to quickly define a set of alternative :class:`Literal` s, |
| | and makes sure to do longest-first testing when there is a conflict, |
| | regardless of the input order, but returns |
| | a :class:`MatchFirst` for best performance. |
| | |
| | Parameters: |
| | |
| | - ``strs`` - a string of space-delimited literals, or a collection of |
| | string literals |
| | - ``caseless`` - treat all literals as caseless - (default= ``False``) |
| | - ``use_regex`` - as an optimization, will |
| | generate a :class:`Regex` object; otherwise, will generate |
| | a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if |
| | creating a :class:`Regex` raises an exception) - (default= ``True``) |
| | - ``as_keyword`` - enforce :class:`Keyword`-style matching on the |
| | generated expressions - (default= ``False``) |
| | - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, |
| | but will be removed in a future release |
| | |
| | Example:: |
| | |
| | comp_oper = one_of("< = > <= >= !=") |
| | var = Word(alphas) |
| | number = Word(nums) |
| | term = var | number |
| | comparison_expr = term + comp_oper + term |
| | print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) |
| | |
| | prints:: |
| | |
| | [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] |
| | """ |
| | asKeyword = asKeyword or as_keyword |
| | useRegex = useRegex and use_regex |
| |
|
| | if ( |
| | isinstance(caseless, str_type) |
| | and __diag__.warn_on_multiple_string_args_to_oneof |
| | ): |
| | warnings.warn( |
| | "More than one string argument passed to one_of, pass" |
| | " choices as a list or space-delimited string", |
| | stacklevel=2, |
| | ) |
| |
|
| | if caseless: |
| | isequal = lambda a, b: a.upper() == b.upper() |
| | masks = lambda a, b: b.upper().startswith(a.upper()) |
| | parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral |
| | else: |
| | isequal = lambda a, b: a == b |
| | masks = lambda a, b: b.startswith(a) |
| | parseElementClass = Keyword if asKeyword else Literal |
| |
|
| | symbols: List[str] = [] |
| | if isinstance(strs, str_type): |
| | symbols = strs.split() |
| | elif isinstance(strs, Iterable): |
| | symbols = list(strs) |
| | else: |
| | raise TypeError("Invalid argument to one_of, expected string or iterable") |
| | if not symbols: |
| | return NoMatch() |
| |
|
| | |
| | |
| | if any(len(sym) > 1 for sym in symbols): |
| | i = 0 |
| | while i < len(symbols) - 1: |
| | cur = symbols[i] |
| | for j, other in enumerate(symbols[i + 1 :]): |
| | if isequal(other, cur): |
| | del symbols[i + j + 1] |
| | break |
| | elif masks(cur, other): |
| | del symbols[i + j + 1] |
| | symbols.insert(i, other) |
| | break |
| | else: |
| | i += 1 |
| |
|
| | if useRegex: |
| | re_flags: int = re.IGNORECASE if caseless else 0 |
| |
|
| | try: |
| | if all(len(sym) == 1 for sym in symbols): |
| | |
| | patt = "[{}]".format( |
| | "".join(_escape_regex_range_chars(sym) for sym in symbols) |
| | ) |
| | else: |
| | patt = "|".join(re.escape(sym) for sym in symbols) |
| |
|
| | |
| | if asKeyword: |
| | patt = r"\b(?:{})\b".format(patt) |
| |
|
| | ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) |
| |
|
| | if caseless: |
| | |
| | |
| | symbol_map = {sym.lower(): sym for sym in symbols} |
| | ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) |
| |
|
| | return ret |
| |
|
| | except re.error: |
| | warnings.warn( |
| | "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 |
| | ) |
| |
|
| | |
| | return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( |
| | " | ".join(symbols) |
| | ) |
| |
|
| |
|
| | def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: |
| | """Helper to easily and clearly define a dictionary by specifying |
| | the respective patterns for the key and value. Takes care of |
| | defining the :class:`Dict`, :class:`ZeroOrMore`, and |
| | :class:`Group` tokens in the proper order. The key pattern |
| | can include delimiting markers or punctuation, as long as they are |
| | suppressed, thereby leaving the significant key text. The value |
| | pattern can include named results, so that the :class:`Dict` results |
| | can include named token fields. |
| | |
| | Example:: |
| | |
| | text = "shape: SQUARE posn: upper left color: light blue texture: burlap" |
| | attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) |
| | print(attr_expr[1, ...].parse_string(text).dump()) |
| | |
| | attr_label = label |
| | attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) |
| | |
| | # similar to Dict, but simpler call format |
| | result = dict_of(attr_label, attr_value).parse_string(text) |
| | print(result.dump()) |
| | print(result['shape']) |
| | print(result.shape) # object attribute access works too |
| | print(result.as_dict()) |
| | |
| | prints:: |
| | |
| | [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] |
| | - color: 'light blue' |
| | - posn: 'upper left' |
| | - shape: 'SQUARE' |
| | - texture: 'burlap' |
| | SQUARE |
| | SQUARE |
| | {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} |
| | """ |
| | return Dict(OneOrMore(Group(key + value))) |
| |
|
| |
|
| | def original_text_for( |
| | expr: ParserElement, as_string: bool = True, *, asString: bool = True |
| | ) -> ParserElement: |
| | """Helper to return the original, untokenized text for a given |
| | expression. Useful to restore the parsed fields of an HTML start |
| | tag into the raw tag text itself, or to revert separate tokens with |
| | intervening whitespace back to the original matching input text. By |
| | default, returns astring containing the original parsed text. |
| | |
| | If the optional ``as_string`` argument is passed as |
| | ``False``, then the return value is |
| | a :class:`ParseResults` containing any results names that |
| | were originally matched, and a single token containing the original |
| | matched text from the input string. So if the expression passed to |
| | :class:`original_text_for` contains expressions with defined |
| | results names, you must set ``as_string`` to ``False`` if you |
| | want to preserve those results name values. |
| | |
| | The ``asString`` pre-PEP8 argument is retained for compatibility, |
| | but will be removed in a future release. |
| | |
| | Example:: |
| | |
| | src = "this is test <b> bold <i>text</i> </b> normal text " |
| | for tag in ("b", "i"): |
| | opener, closer = make_html_tags(tag) |
| | patt = original_text_for(opener + SkipTo(closer) + closer) |
| | print(patt.search_string(src)[0]) |
| | |
| | prints:: |
| | |
| | ['<b> bold <i>text</i> </b>'] |
| | ['<i>text</i>'] |
| | """ |
| | asString = asString and as_string |
| |
|
| | locMarker = Empty().set_parse_action(lambda s, loc, t: loc) |
| | endlocMarker = locMarker.copy() |
| | endlocMarker.callPreparse = False |
| | matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") |
| | if asString: |
| | extractText = lambda s, l, t: s[t._original_start : t._original_end] |
| | else: |
| |
|
| | def extractText(s, l, t): |
| | t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] |
| |
|
| | matchExpr.set_parse_action(extractText) |
| | matchExpr.ignoreExprs = expr.ignoreExprs |
| | matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) |
| | return matchExpr |
| |
|
| |
|
| | def ungroup(expr: ParserElement) -> ParserElement: |
| | """Helper to undo pyparsing's default grouping of And expressions, |
| | even if all but one are non-empty. |
| | """ |
| | return TokenConverter(expr).add_parse_action(lambda t: t[0]) |
| |
|
| |
|
| | def locatedExpr(expr: ParserElement) -> ParserElement: |
| | """ |
| | (DEPRECATED - future code should use the Located class) |
| | Helper to decorate a returned token with its starting and ending |
| | locations in the input string. |
| | |
| | This helper adds the following results names: |
| | |
| | - ``locn_start`` - location where matched expression begins |
| | - ``locn_end`` - location where matched expression ends |
| | - ``value`` - the actual parsed results |
| | |
| | Be careful if the input text contains ``<TAB>`` characters, you |
| | may want to call :class:`ParserElement.parseWithTabs` |
| | |
| | Example:: |
| | |
| | wd = Word(alphas) |
| | for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): |
| | print(match) |
| | |
| | prints:: |
| | |
| | [[0, 'ljsdf', 5]] |
| | [[8, 'lksdjjf', 15]] |
| | [[18, 'lkkjj', 23]] |
| | """ |
| | locator = Empty().set_parse_action(lambda ss, ll, tt: ll) |
| | return Group( |
| | locator("locn_start") |
| | + expr("value") |
| | + locator.copy().leaveWhitespace()("locn_end") |
| | ) |
| |
|
| |
|
| | def nested_expr( |
| | opener: Union[str, ParserElement] = "(", |
| | closer: Union[str, ParserElement] = ")", |
| | content: typing.Optional[ParserElement] = None, |
| | ignore_expr: ParserElement = quoted_string(), |
| | *, |
| | ignoreExpr: ParserElement = quoted_string(), |
| | ) -> ParserElement: |
| | """Helper method for defining nested lists enclosed in opening and |
| | closing delimiters (``"("`` and ``")"`` are the default). |
| | |
| | Parameters: |
| | - ``opener`` - opening character for a nested list |
| | (default= ``"("``); can also be a pyparsing expression |
| | - ``closer`` - closing character for a nested list |
| | (default= ``")"``); can also be a pyparsing expression |
| | - ``content`` - expression for items within the nested lists |
| | (default= ``None``) |
| | - ``ignore_expr`` - expression for ignoring opening and closing delimiters |
| | (default= :class:`quoted_string`) |
| | - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility |
| | but will be removed in a future release |
| | |
| | If an expression is not provided for the content argument, the |
| | nested expression will capture all whitespace-delimited content |
| | between delimiters as a list of separate values. |
| | |
| | Use the ``ignore_expr`` argument to define expressions that may |
| | contain opening or closing characters that should not be treated as |
| | opening or closing characters for nesting, such as quoted_string or |
| | a comment expression. Specify multiple expressions using an |
| | :class:`Or` or :class:`MatchFirst`. The default is |
| | :class:`quoted_string`, but if no expressions are to be ignored, then |
| | pass ``None`` for this argument. |
| | |
| | Example:: |
| | |
| | data_type = one_of("void int short long char float double") |
| | decl_data_type = Combine(data_type + Opt(Word('*'))) |
| | ident = Word(alphas+'_', alphanums+'_') |
| | number = pyparsing_common.number |
| | arg = Group(decl_data_type + ident) |
| | LPAR, RPAR = map(Suppress, "()") |
| | |
| | code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) |
| | |
| | c_function = (decl_data_type("type") |
| | + ident("name") |
| | + LPAR + Opt(delimited_list(arg), [])("args") + RPAR |
| | + code_body("body")) |
| | c_function.ignore(c_style_comment) |
| | |
| | source_code = ''' |
| | int is_odd(int x) { |
| | return (x%2); |
| | } |
| | |
| | int dec_to_hex(char hchar) { |
| | if (hchar >= '0' && hchar <= '9') { |
| | return (ord(hchar)-ord('0')); |
| | } else { |
| | return (10+ord(hchar)-ord('A')); |
| | } |
| | } |
| | ''' |
| | for func in c_function.search_string(source_code): |
| | print("%(name)s (%(type)s) args: %(args)s" % func) |
| | |
| | |
| | prints:: |
| | |
| | is_odd (int) args: [['int', 'x']] |
| | dec_to_hex (int) args: [['char', 'hchar']] |
| | """ |
| | if ignoreExpr != ignore_expr: |
| | ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr |
| | if opener == closer: |
| | raise ValueError("opening and closing strings cannot be the same") |
| | if content is None: |
| | if isinstance(opener, str_type) and isinstance(closer, str_type): |
| | if len(opener) == 1 and len(closer) == 1: |
| | if ignoreExpr is not None: |
| | content = Combine( |
| | OneOrMore( |
| | ~ignoreExpr |
| | + CharsNotIn( |
| | opener + closer + ParserElement.DEFAULT_WHITE_CHARS, |
| | exact=1, |
| | ) |
| | ) |
| | ).set_parse_action(lambda t: t[0].strip()) |
| | else: |
| | content = empty.copy() + CharsNotIn( |
| | opener + closer + ParserElement.DEFAULT_WHITE_CHARS |
| | ).set_parse_action(lambda t: t[0].strip()) |
| | else: |
| | if ignoreExpr is not None: |
| | content = Combine( |
| | OneOrMore( |
| | ~ignoreExpr |
| | + ~Literal(opener) |
| | + ~Literal(closer) |
| | + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) |
| | ) |
| | ).set_parse_action(lambda t: t[0].strip()) |
| | else: |
| | content = Combine( |
| | OneOrMore( |
| | ~Literal(opener) |
| | + ~Literal(closer) |
| | + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) |
| | ) |
| | ).set_parse_action(lambda t: t[0].strip()) |
| | else: |
| | raise ValueError( |
| | "opening and closing arguments must be strings if no content expression is given" |
| | ) |
| | ret = Forward() |
| | if ignoreExpr is not None: |
| | ret <<= Group( |
| | Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) |
| | ) |
| | else: |
| | ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) |
| | ret.set_name("nested %s%s expression" % (opener, closer)) |
| | return ret |
| |
|
| |
|
| | def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): |
| | """Internal helper to construct opening and closing tag expressions, given a tag name""" |
| | if isinstance(tagStr, str_type): |
| | resname = tagStr |
| | tagStr = Keyword(tagStr, caseless=not xml) |
| | else: |
| | resname = tagStr.name |
| |
|
| | tagAttrName = Word(alphas, alphanums + "_-:") |
| | if xml: |
| | tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) |
| | openTag = ( |
| | suppress_LT |
| | + tagStr("tag") |
| | + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) |
| | + Opt("/", default=[False])("empty").set_parse_action( |
| | lambda s, l, t: t[0] == "/" |
| | ) |
| | + suppress_GT |
| | ) |
| | else: |
| | tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( |
| | printables, exclude_chars=">" |
| | ) |
| | openTag = ( |
| | suppress_LT |
| | + tagStr("tag") |
| | + Dict( |
| | ZeroOrMore( |
| | Group( |
| | tagAttrName.set_parse_action(lambda t: t[0].lower()) |
| | + Opt(Suppress("=") + tagAttrValue) |
| | ) |
| | ) |
| | ) |
| | + Opt("/", default=[False])("empty").set_parse_action( |
| | lambda s, l, t: t[0] == "/" |
| | ) |
| | + suppress_GT |
| | ) |
| | closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) |
| |
|
| | openTag.set_name("<%s>" % resname) |
| | |
| | openTag.add_parse_action( |
| | lambda t: t.__setitem__( |
| | "start" + "".join(resname.replace(":", " ").title().split()), t.copy() |
| | ) |
| | ) |
| | closeTag = closeTag( |
| | "end" + "".join(resname.replace(":", " ").title().split()) |
| | ).set_name("</%s>" % resname) |
| | openTag.tag = resname |
| | closeTag.tag = resname |
| | openTag.tag_body = SkipTo(closeTag()) |
| | return openTag, closeTag |
| |
|
| |
|
| | def make_html_tags( |
| | tag_str: Union[str, ParserElement] |
| | ) -> Tuple[ParserElement, ParserElement]: |
| | """Helper to construct opening and closing tag expressions for HTML, |
| | given a tag name. Matches tags in either upper or lower case, |
| | attributes with namespaces and with quoted or unquoted values. |
| | |
| | Example:: |
| | |
| | text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' |
| | # make_html_tags returns pyparsing expressions for the opening and |
| | # closing tags as a 2-tuple |
| | a, a_end = make_html_tags("A") |
| | link_expr = a + SkipTo(a_end)("link_text") + a_end |
| | |
| | for link in link_expr.search_string(text): |
| | # attributes in the <A> tag (like "href" shown here) are |
| | # also accessible as named results |
| | print(link.link_text, '->', link.href) |
| | |
| | prints:: |
| | |
| | pyparsing -> https://github.com/pyparsing/pyparsing/wiki |
| | """ |
| | return _makeTags(tag_str, False) |
| |
|
| |
|
| | def make_xml_tags( |
| | tag_str: Union[str, ParserElement] |
| | ) -> Tuple[ParserElement, ParserElement]: |
| | """Helper to construct opening and closing tag expressions for XML, |
| | given a tag name. Matches tags only in the given upper/lower case. |
| | |
| | Example: similar to :class:`make_html_tags` |
| | """ |
| | return _makeTags(tag_str, True) |
| |
|
| |
|
| | any_open_tag: ParserElement |
| | any_close_tag: ParserElement |
| | any_open_tag, any_close_tag = make_html_tags( |
| | Word(alphas, alphanums + "_:").set_name("any tag") |
| | ) |
| |
|
| | _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} |
| | common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( |
| | "common HTML entity" |
| | ) |
| |
|
| |
|
| | def replace_html_entity(t): |
| | """Helper parser action to replace common HTML entities with their special characters""" |
| | return _htmlEntityMap.get(t.entity) |
| |
|
| |
|
| | class OpAssoc(Enum): |
| | LEFT = 1 |
| | RIGHT = 2 |
| |
|
| |
|
| | InfixNotationOperatorArgType = Union[ |
| | ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] |
| | ] |
| | InfixNotationOperatorSpec = Union[ |
| | Tuple[ |
| | InfixNotationOperatorArgType, |
| | int, |
| | OpAssoc, |
| | typing.Optional[ParseAction], |
| | ], |
| | Tuple[ |
| | InfixNotationOperatorArgType, |
| | int, |
| | OpAssoc, |
| | ], |
| | ] |
| |
|
| |
|
| | def infix_notation( |
| | base_expr: ParserElement, |
| | op_list: List[InfixNotationOperatorSpec], |
| | lpar: Union[str, ParserElement] = Suppress("("), |
| | rpar: Union[str, ParserElement] = Suppress(")"), |
| | ) -> ParserElement: |
| | """Helper method for constructing grammars of expressions made up of |
| | operators working in a precedence hierarchy. Operators may be unary |
| | or binary, left- or right-associative. Parse actions can also be |
| | attached to operator expressions. The generated parser will also |
| | recognize the use of parentheses to override operator precedences |
| | (see example below). |
| | |
| | Note: if you define a deep operator list, you may see performance |
| | issues when using infix_notation. See |
| | :class:`ParserElement.enable_packrat` for a mechanism to potentially |
| | improve your parser performance. |
| | |
| | Parameters: |
| | - ``base_expr`` - expression representing the most basic operand to |
| | be used in the expression |
| | - ``op_list`` - list of tuples, one for each operator precedence level |
| | in the expression grammar; each tuple is of the form ``(op_expr, |
| | num_operands, right_left_assoc, (optional)parse_action)``, where: |
| | |
| | - ``op_expr`` is the pyparsing expression for the operator; may also |
| | be a string, which will be converted to a Literal; if ``num_operands`` |
| | is 3, ``op_expr`` is a tuple of two expressions, for the two |
| | operators separating the 3 terms |
| | - ``num_operands`` is the number of terms for this operator (must be 1, |
| | 2, or 3) |
| | - ``right_left_assoc`` is the indicator whether the operator is right |
| | or left associative, using the pyparsing-defined constants |
| | ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. |
| | - ``parse_action`` is the parse action to be associated with |
| | expressions matching this operator expression (the parse action |
| | tuple member may be omitted); if the parse action is passed |
| | a tuple or list of functions, this is equivalent to calling |
| | ``set_parse_action(*fn)`` |
| | (:class:`ParserElement.set_parse_action`) |
| | - ``lpar`` - expression for matching left-parentheses; if passed as a |
| | str, then will be parsed as Suppress(lpar). If lpar is passed as |
| | an expression (such as ``Literal('(')``), then it will be kept in |
| | the parsed results, and grouped with them. (default= ``Suppress('(')``) |
| | - ``rpar`` - expression for matching right-parentheses; if passed as a |
| | str, then will be parsed as Suppress(rpar). If rpar is passed as |
| | an expression (such as ``Literal(')')``), then it will be kept in |
| | the parsed results, and grouped with them. (default= ``Suppress(')')``) |
| | |
| | Example:: |
| | |
| | # simple example of four-function arithmetic with ints and |
| | # variable names |
| | integer = pyparsing_common.signed_integer |
| | varname = pyparsing_common.identifier |
| | |
| | arith_expr = infix_notation(integer | varname, |
| | [ |
| | ('-', 1, OpAssoc.RIGHT), |
| | (one_of('* /'), 2, OpAssoc.LEFT), |
| | (one_of('+ -'), 2, OpAssoc.LEFT), |
| | ]) |
| | |
| | arith_expr.run_tests(''' |
| | 5+3*6 |
| | (5+3)*6 |
| | -2--11 |
| | ''', full_dump=False) |
| | |
| | prints:: |
| | |
| | 5+3*6 |
| | [[5, '+', [3, '*', 6]]] |
| | |
| | (5+3)*6 |
| | [[[5, '+', 3], '*', 6]] |
| | |
| | -2--11 |
| | [[['-', 2], '-', ['-', 11]]] |
| | """ |
| | |
| | class _FB(FollowedBy): |
| | def parseImpl(self, instring, loc, doActions=True): |
| | self.expr.try_parse(instring, loc) |
| | return loc, [] |
| |
|
| | _FB.__name__ = "FollowedBy>" |
| |
|
| | ret = Forward() |
| | if isinstance(lpar, str): |
| | lpar = Suppress(lpar) |
| | if isinstance(rpar, str): |
| | rpar = Suppress(rpar) |
| |
|
| | |
| | if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): |
| | lastExpr = base_expr | Group(lpar + ret + rpar) |
| | else: |
| | lastExpr = base_expr | (lpar + ret + rpar) |
| |
|
| | for i, operDef in enumerate(op_list): |
| | opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] |
| | if isinstance(opExpr, str_type): |
| | opExpr = ParserElement._literalStringClass(opExpr) |
| | if arity == 3: |
| | if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: |
| | raise ValueError( |
| | "if numterms=3, opExpr must be a tuple or list of two expressions" |
| | ) |
| | opExpr1, opExpr2 = opExpr |
| | term_name = "{}{} term".format(opExpr1, opExpr2) |
| | else: |
| | term_name = "{} term".format(opExpr) |
| |
|
| | if not 1 <= arity <= 3: |
| | raise ValueError("operator must be unary (1), binary (2), or ternary (3)") |
| |
|
| | if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): |
| | raise ValueError("operator must indicate right or left associativity") |
| |
|
| | thisExpr: Forward = Forward().set_name(term_name) |
| | if rightLeftAssoc is OpAssoc.LEFT: |
| | if arity == 1: |
| | matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) |
| | elif arity == 2: |
| | if opExpr is not None: |
| | matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( |
| | lastExpr + (opExpr + lastExpr)[1, ...] |
| | ) |
| | else: |
| | matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) |
| | elif arity == 3: |
| | matchExpr = _FB( |
| | lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr |
| | ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) |
| | elif rightLeftAssoc is OpAssoc.RIGHT: |
| | if arity == 1: |
| | |
| | if not isinstance(opExpr, Opt): |
| | opExpr = Opt(opExpr) |
| | matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) |
| | elif arity == 2: |
| | if opExpr is not None: |
| | matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( |
| | lastExpr + (opExpr + thisExpr)[1, ...] |
| | ) |
| | else: |
| | matchExpr = _FB(lastExpr + thisExpr) + Group( |
| | lastExpr + thisExpr[1, ...] |
| | ) |
| | elif arity == 3: |
| | matchExpr = _FB( |
| | lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr |
| | ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) |
| | if pa: |
| | if isinstance(pa, (tuple, list)): |
| | matchExpr.set_parse_action(*pa) |
| | else: |
| | matchExpr.set_parse_action(pa) |
| | thisExpr <<= (matchExpr | lastExpr).setName(term_name) |
| | lastExpr = thisExpr |
| | ret <<= lastExpr |
| | return ret |
| |
|
| |
|
| | def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): |
| | """ |
| | (DEPRECATED - use IndentedBlock class instead) |
| | Helper method for defining space-delimited indentation blocks, |
| | such as those used to define block statements in Python source code. |
| | |
| | Parameters: |
| | |
| | - ``blockStatementExpr`` - expression defining syntax of statement that |
| | is repeated within the indented block |
| | - ``indentStack`` - list created by caller to manage indentation stack |
| | (multiple ``statementWithIndentedBlock`` expressions within a single |
| | grammar should share a common ``indentStack``) |
| | - ``indent`` - boolean indicating whether block must be indented beyond |
| | the current level; set to ``False`` for block of left-most statements |
| | (default= ``True``) |
| | |
| | A valid block must contain at least one ``blockStatement``. |
| | |
| | (Note that indentedBlock uses internal parse actions which make it |
| | incompatible with packrat parsing.) |
| | |
| | Example:: |
| | |
| | data = ''' |
| | def A(z): |
| | A1 |
| | B = 100 |
| | G = A2 |
| | A2 |
| | A3 |
| | B |
| | def BB(a,b,c): |
| | BB1 |
| | def BBA(): |
| | bba1 |
| | bba2 |
| | bba3 |
| | C |
| | D |
| | def spam(x,y): |
| | def eggs(z): |
| | pass |
| | ''' |
| | |
| | |
| | indentStack = [1] |
| | stmt = Forward() |
| | |
| | identifier = Word(alphas, alphanums) |
| | funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") |
| | func_body = indentedBlock(stmt, indentStack) |
| | funcDef = Group(funcDecl + func_body) |
| | |
| | rvalue = Forward() |
| | funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") |
| | rvalue << (funcCall | identifier | Word(nums)) |
| | assignment = Group(identifier + "=" + rvalue) |
| | stmt << (funcDef | assignment | identifier) |
| | |
| | module_body = stmt[1, ...] |
| | |
| | parseTree = module_body.parseString(data) |
| | parseTree.pprint() |
| | |
| | prints:: |
| | |
| | [['def', |
| | 'A', |
| | ['(', 'z', ')'], |
| | ':', |
| | [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], |
| | 'B', |
| | ['def', |
| | 'BB', |
| | ['(', 'a', 'b', 'c', ')'], |
| | ':', |
| | [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], |
| | 'C', |
| | 'D', |
| | ['def', |
| | 'spam', |
| | ['(', 'x', 'y', ')'], |
| | ':', |
| | [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] |
| | """ |
| | backup_stacks.append(indentStack[:]) |
| |
|
| | def reset_stack(): |
| | indentStack[:] = backup_stacks[-1] |
| |
|
| | def checkPeerIndent(s, l, t): |
| | if l >= len(s): |
| | return |
| | curCol = col(l, s) |
| | if curCol != indentStack[-1]: |
| | if curCol > indentStack[-1]: |
| | raise ParseException(s, l, "illegal nesting") |
| | raise ParseException(s, l, "not a peer entry") |
| |
|
| | def checkSubIndent(s, l, t): |
| | curCol = col(l, s) |
| | if curCol > indentStack[-1]: |
| | indentStack.append(curCol) |
| | else: |
| | raise ParseException(s, l, "not a subentry") |
| |
|
| | def checkUnindent(s, l, t): |
| | if l >= len(s): |
| | return |
| | curCol = col(l, s) |
| | if not (indentStack and curCol in indentStack): |
| | raise ParseException(s, l, "not an unindent") |
| | if curCol < indentStack[-1]: |
| | indentStack.pop() |
| |
|
| | NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) |
| | INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") |
| | PEER = Empty().set_parse_action(checkPeerIndent).set_name("") |
| | UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") |
| | if indent: |
| | smExpr = Group( |
| | Opt(NL) |
| | + INDENT |
| | + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) |
| | + UNDENT |
| | ) |
| | else: |
| | smExpr = Group( |
| | Opt(NL) |
| | + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) |
| | + Opt(UNDENT) |
| | ) |
| |
|
| | |
| | smExpr.add_parse_action( |
| | lambda: backup_stacks.pop(-1) and None if backup_stacks else None |
| | ) |
| | smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) |
| | blockStatementExpr.ignore(_bslash + LineEnd()) |
| | return smExpr.set_name("indented block") |
| |
|
| |
|
| | |
| | c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( |
| | "C style comment" |
| | ) |
| | "Comment of the form ``/* ... */``" |
| |
|
| | html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") |
| | "Comment of the form ``<!-- ... -->``" |
| |
|
| | rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") |
| | dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") |
| | "Comment of the form ``// ... (to end of line)``" |
| |
|
| | cpp_style_comment = Combine( |
| | Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment |
| | ).set_name("C++ style comment") |
| | "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" |
| |
|
| | java_style_comment = cpp_style_comment |
| | "Same as :class:`cpp_style_comment`" |
| |
|
| | python_style_comment = Regex(r"#.*").set_name("Python style comment") |
| | "Comment of the form ``# ... (to end of line)``" |
| |
|
| |
|
| | |
| | |
| | _builtin_exprs: List[ParserElement] = [ |
| | v for v in vars().values() if isinstance(v, ParserElement) |
| | ] |
| |
|
| |
|
| | |
| | delimitedList = delimited_list |
| | countedArray = counted_array |
| | matchPreviousLiteral = match_previous_literal |
| | matchPreviousExpr = match_previous_expr |
| | oneOf = one_of |
| | dictOf = dict_of |
| | originalTextFor = original_text_for |
| | nestedExpr = nested_expr |
| | makeHTMLTags = make_html_tags |
| | makeXMLTags = make_xml_tags |
| | anyOpenTag, anyCloseTag = any_open_tag, any_close_tag |
| | commonHTMLEntity = common_html_entity |
| | replaceHTMLEntity = replace_html_entity |
| | opAssoc = OpAssoc |
| | infixNotation = infix_notation |
| | cStyleComment = c_style_comment |
| | htmlComment = html_comment |
| | restOfLine = rest_of_line |
| | dblSlashComment = dbl_slash_comment |
| | cppStyleComment = cpp_style_comment |
| | javaStyleComment = java_style_comment |
| | pythonStyleComment = python_style_comment |
| |
|