| | |
| |
|
| | from .exceptions import ParseException |
| | from .util import col |
| |
|
| |
|
| | class OnlyOnce: |
| | """ |
| | Wrapper for parse actions, to ensure they are only called once. |
| | """ |
| |
|
| | def __init__(self, method_call): |
| | from .core import _trim_arity |
| |
|
| | self.callable = _trim_arity(method_call) |
| | self.called = False |
| |
|
| | def __call__(self, s, l, t): |
| | if not self.called: |
| | results = self.callable(s, l, t) |
| | self.called = True |
| | return results |
| | raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset") |
| |
|
| | def reset(self): |
| | """ |
| | Allow the associated parse action to be called once more. |
| | """ |
| |
|
| | self.called = False |
| |
|
| |
|
| | def match_only_at_col(n): |
| | """ |
| | Helper method for defining parse actions that require matching at |
| | a specific column in the input text. |
| | """ |
| |
|
| | def verify_col(strg, locn, toks): |
| | if col(locn, strg) != n: |
| | raise ParseException(strg, locn, "matched token not at column {}".format(n)) |
| |
|
| | return verify_col |
| |
|
| |
|
| | def replace_with(repl_str): |
| | """ |
| | Helper method for common parse actions that simply return |
| | a literal value. Especially useful when used with |
| | :class:`transform_string<ParserElement.transform_string>` (). |
| | |
| | Example:: |
| | |
| | num = Word(nums).set_parse_action(lambda toks: int(toks[0])) |
| | na = one_of("N/A NA").set_parse_action(replace_with(math.nan)) |
| | term = na | num |
| | |
| | term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234] |
| | """ |
| | return lambda s, l, t: [repl_str] |
| |
|
| |
|
| | def remove_quotes(s, l, t): |
| | """ |
| | Helper parse action for removing quotation marks from parsed |
| | quoted strings. |
| | |
| | Example:: |
| | |
| | # by default, quotation marks are included in parsed results |
| | quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] |
| | |
| | # use remove_quotes to strip quotation marks from parsed results |
| | quoted_string.set_parse_action(remove_quotes) |
| | quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] |
| | """ |
| | return t[0][1:-1] |
| |
|
| |
|
| | def with_attribute(*args, **attr_dict): |
| | """ |
| | Helper to create a validating parse action to be used with start |
| | tags created with :class:`make_xml_tags` or |
| | :class:`make_html_tags`. Use ``with_attribute`` to qualify |
| | a starting tag with a required attribute value, to avoid false |
| | matches on common tags such as ``<TD>`` or ``<DIV>``. |
| | |
| | Call ``with_attribute`` with a series of attribute names and |
| | values. Specify the list of filter attributes names and values as: |
| | |
| | - keyword arguments, as in ``(align="right")``, or |
| | - as an explicit dict with ``**`` operator, when an attribute |
| | name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` |
| | - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` |
| | |
| | For attribute names with a namespace prefix, you must use the second |
| | form. Attribute names are matched insensitive to upper/lower case. |
| | |
| | If just testing for ``class`` (with or without a namespace), use |
| | :class:`with_class`. |
| | |
| | To verify that the attribute exists, but without specifying a value, |
| | pass ``with_attribute.ANY_VALUE`` as the value. |
| | |
| | Example:: |
| | |
| | html = ''' |
| | <div> |
| | Some text |
| | <div type="grid">1 4 0 1 0</div> |
| | <div type="graph">1,3 2,3 1,1</div> |
| | <div>this has no type</div> |
| | </div> |
| | |
| | ''' |
| | div,div_end = make_html_tags("div") |
| | |
| | # only match div tag having a type attribute with value "grid" |
| | div_grid = div().set_parse_action(with_attribute(type="grid")) |
| | grid_expr = div_grid + SkipTo(div | div_end)("body") |
| | for grid_header in grid_expr.search_string(html): |
| | print(grid_header.body) |
| | |
| | # construct a match with any div tag having a type attribute, regardless of the value |
| | div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE)) |
| | div_expr = div_any_type + SkipTo(div | div_end)("body") |
| | for div_header in div_expr.search_string(html): |
| | print(div_header.body) |
| | |
| | prints:: |
| | |
| | 1 4 0 1 0 |
| | |
| | 1 4 0 1 0 |
| | 1,3 2,3 1,1 |
| | """ |
| | if args: |
| | attrs = args[:] |
| | else: |
| | attrs = attr_dict.items() |
| | attrs = [(k, v) for k, v in attrs] |
| |
|
| | def pa(s, l, tokens): |
| | for attrName, attrValue in attrs: |
| | if attrName not in tokens: |
| | raise ParseException(s, l, "no matching attribute " + attrName) |
| | if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: |
| | raise ParseException( |
| | s, |
| | l, |
| | "attribute {!r} has value {!r}, must be {!r}".format( |
| | attrName, tokens[attrName], attrValue |
| | ), |
| | ) |
| |
|
| | return pa |
| |
|
| |
|
| | with_attribute.ANY_VALUE = object() |
| |
|
| |
|
| | def with_class(classname, namespace=""): |
| | """ |
| | Simplified version of :class:`with_attribute` when |
| | matching on a div class - made difficult because ``class`` is |
| | a reserved word in Python. |
| | |
| | Example:: |
| | |
| | html = ''' |
| | <div> |
| | Some text |
| | <div class="grid">1 4 0 1 0</div> |
| | <div class="graph">1,3 2,3 1,1</div> |
| | <div>this <div> has no class</div> |
| | </div> |
| | |
| | ''' |
| | div,div_end = make_html_tags("div") |
| | div_grid = div().set_parse_action(with_class("grid")) |
| | |
| | grid_expr = div_grid + SkipTo(div | div_end)("body") |
| | for grid_header in grid_expr.search_string(html): |
| | print(grid_header.body) |
| | |
| | div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE)) |
| | div_expr = div_any_type + SkipTo(div | div_end)("body") |
| | for div_header in div_expr.search_string(html): |
| | print(div_header.body) |
| | |
| | prints:: |
| | |
| | 1 4 0 1 0 |
| | |
| | 1 4 0 1 0 |
| | 1,3 2,3 1,1 |
| | """ |
| | classattr = "{}:class".format(namespace) if namespace else "class" |
| | return with_attribute(**{classattr: classname}) |
| |
|
| |
|
| | |
| | replaceWith = replace_with |
| | removeQuotes = remove_quotes |
| | withAttribute = with_attribute |
| | withClass = with_class |
| | matchOnlyAtCol = match_only_at_col |
| |
|