Buckets:
| # common.py | |
| from .core import * | |
| from .helpers import DelimitedList, any_open_tag, any_close_tag | |
| from datetime import datetime | |
| import sys | |
| PY_310_OR_LATER = sys.version_info >= (3, 10) | |
| # some other useful expressions - using lower-case class name since we are really using this as a namespace | |
| class pyparsing_common: | |
| """Here are some common low-level expressions that may be useful in | |
| jump-starting parser development: | |
| - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, | |
| :class:`scientific notation<sci_real>`) | |
| - common :class:`programming identifiers<identifier>` | |
| - network addresses (:class:`MAC<mac_address>`, | |
| :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) | |
| - ISO8601 :class:`dates<iso8601_date>` and | |
| :class:`datetime<iso8601_datetime>` | |
| - :class:`UUID<uuid>` | |
| - :class:`comma-separated list<comma_separated_list>` | |
| - :class:`url` | |
| Parse actions: | |
| - :class:`convert_to_integer` | |
| - :class:`convert_to_float` | |
| - :class:`convert_to_date` | |
| - :class:`convert_to_datetime` | |
| - :class:`strip_html_tags` | |
| - :class:`upcase_tokens` | |
| - :class:`downcase_tokens` | |
| Examples: | |
| .. testcode:: | |
| pyparsing_common.number.run_tests(''' | |
| # any int or real number, returned as the appropriate type | |
| 100 | |
| -100 | |
| +100 | |
| 3.14159 | |
| 6.02e23 | |
| 1e-12 | |
| ''') | |
| .. testoutput:: | |
| :options: +NORMALIZE_WHITESPACE | |
| # any int or real number, returned as the appropriate type | |
| 100 | |
| [100] | |
| -100 | |
| [-100] | |
| +100 | |
| [100] | |
| 3.14159 | |
| [3.14159] | |
| 6.02e23 | |
| [6.02e+23] | |
| 1e-12 | |
| [1e-12] | |
| .. testcode:: | |
| pyparsing_common.fnumber.run_tests(''' | |
| # any int or real number, returned as float | |
| 100 | |
| -100 | |
| +100 | |
| 3.14159 | |
| 6.02e23 | |
| 1e-12 | |
| ''') | |
| .. testoutput:: | |
| :options: +NORMALIZE_WHITESPACE | |
| # any int or real number, returned as float | |
| 100 | |
| [100.0] | |
| -100 | |
| [-100.0] | |
| +100 | |
| [100.0] | |
| 3.14159 | |
| [3.14159] | |
| 6.02e23 | |
| [6.02e+23] | |
| 1e-12 | |
| [1e-12] | |
| .. testcode:: | |
| pyparsing_common.hex_integer.run_tests(''' | |
| # hex numbers | |
| 100 | |
| FF | |
| ''') | |
| .. testoutput:: | |
| :options: +NORMALIZE_WHITESPACE | |
| # hex numbers | |
| 100 | |
| [256] | |
| FF | |
| [255] | |
| .. testcode:: | |
| pyparsing_common.fraction.run_tests(''' | |
| # fractions | |
| 1/2 | |
| -3/4 | |
| ''') | |
| .. testoutput:: | |
| :options: +NORMALIZE_WHITESPACE | |
| # fractions | |
| 1/2 | |
| [0.5] | |
| -3/4 | |
| [-0.75] | |
| .. testcode:: | |
| pyparsing_common.mixed_integer.run_tests(''' | |
| # mixed fractions | |
| 1 | |
| 1/2 | |
| -3/4 | |
| 1-3/4 | |
| ''') | |
| .. testoutput:: | |
| :options: +NORMALIZE_WHITESPACE | |
| # mixed fractions | |
| 1 | |
| [1] | |
| 1/2 | |
| [0.5] | |
| -3/4 | |
| [-0.75] | |
| 1-3/4 | |
| [1.75] | |
| .. testcode:: | |
| import uuid | |
| pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID)) | |
| pyparsing_common.uuid.run_tests(''' | |
| # uuid | |
| 12345678-1234-5678-1234-567812345678 | |
| ''') | |
| .. testoutput:: | |
| :options: +NORMALIZE_WHITESPACE | |
| # uuid | |
| 12345678-1234-5678-1234-567812345678 | |
| [UUID('12345678-1234-5678-1234-567812345678')] | |
| """ | |
| def convert_to_integer(_, __, t): | |
| """ | |
| Parse action for converting parsed integers to Python int | |
| """ | |
| return [int(tt) for tt in t] | |
| def convert_to_float(_, __, t): | |
| """ | |
| Parse action for converting parsed numbers to Python float | |
| """ | |
| return [float(tt) for tt in t] | |
| integer = ( | |
| Word(nums) | |
| .set_name("integer") | |
| .set_parse_action( | |
| convert_to_integer | |
| if PY_310_OR_LATER | |
| else lambda t: [int(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ) | |
| """expression that parses an unsigned integer, converts to an int""" | |
| hex_integer = ( | |
| Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) | |
| ) | |
| """expression that parses a hexadecimal integer, converts to an int""" | |
| signed_integer = ( | |
| Regex(r"[+-]?\d+") | |
| .set_name("signed integer") | |
| .set_parse_action( | |
| convert_to_integer | |
| if PY_310_OR_LATER | |
| else lambda t: [int(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ) | |
| """expression that parses an integer with optional leading sign, converts to an int""" | |
| fraction = ( | |
| signed_integer().set_parse_action( | |
| convert_to_float | |
| if PY_310_OR_LATER | |
| else lambda t: [float(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| + "/" | |
| + signed_integer().set_parse_action( | |
| convert_to_float | |
| if PY_310_OR_LATER | |
| else lambda t: [float(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ).set_name("fraction") | |
| """fractional expression of an integer divided by an integer, converts to a float""" | |
| fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) | |
| mixed_integer = ( | |
| fraction | signed_integer + Opt(Opt("-").suppress() + fraction) | |
| ).set_name("fraction or mixed integer-fraction") | |
| """mixed integer of the form 'integer - fraction', with optional leading integer, converts to a float""" | |
| mixed_integer.add_parse_action(sum) | |
| real = ( | |
| Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") | |
| .set_name("real number") | |
| .set_parse_action( | |
| convert_to_float | |
| if PY_310_OR_LATER | |
| else lambda t: [float(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ) | |
| """expression that parses a floating point number, converts to a float""" | |
| sci_real = ( | |
| Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") | |
| .set_name("real number with scientific notation") | |
| .set_parse_action( | |
| convert_to_float | |
| if PY_310_OR_LATER | |
| else lambda t: [float(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ) | |
| """expression that parses a floating point number with optional | |
| scientific notation, converts to a float""" | |
| # streamlining this expression makes the docs nicer-looking | |
| number = (sci_real | real | signed_integer).set_name("number").streamline() | |
| """any numeric expression, converts to the corresponding Python type""" | |
| fnumber = ( | |
| Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?") | |
| .set_name("fnumber") | |
| .set_parse_action( | |
| convert_to_float | |
| if PY_310_OR_LATER | |
| else lambda t: [float(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ) | |
| """any int or real number, always converts to a float""" | |
| ieee_float = ( | |
| Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))") | |
| .set_name("ieee_float") | |
| .set_parse_action( | |
| convert_to_float | |
| if PY_310_OR_LATER | |
| else lambda t: [float(tt) for tt in t] # type: ignore[misc] | |
| ) | |
| ) | |
| """any floating-point literal (int, real number, infinity, or NaN), converts to a float""" | |
| identifier = Word(identchars, identbodychars).set_name("identifier") | |
| """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" | |
| ipv4_address = Regex( | |
| r"(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(?:\.(?:25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" | |
| ).set_name("IPv4 address") | |
| "IPv4 address (``0.0.0.0 - 255.255.255.255``)" | |
| _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") | |
| _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( | |
| "full IPv6 address" | |
| ) | |
| _short_ipv6_address = ( | |
| Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) | |
| + "::" | |
| + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) | |
| ).set_name("short IPv6 address") | |
| _short_ipv6_address.add_condition( | |
| lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 | |
| ) | |
| _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") | |
| ipv6_address = Combine( | |
| (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( | |
| "IPv6 address" | |
| ) | |
| ).set_name("IPv6 address") | |
| "IPv6 address (long, short, or mixed form)" | |
| mac_address = Regex( | |
| r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" | |
| ).set_name("MAC address") | |
| "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" | |
| def convert_to_date(fmt: str = "%Y-%m-%d"): | |
| """ | |
| Helper to create a parse action for converting parsed date string to Python datetime.date | |
| Params - | |
| - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) | |
| Example: | |
| .. testcode:: | |
| date_expr = pyparsing_common.iso8601_date.copy() | |
| date_expr.set_parse_action(pyparsing_common.convert_to_date()) | |
| print(date_expr.parse_string("1999-12-31")) | |
| prints: | |
| .. testoutput:: | |
| [datetime.date(1999, 12, 31)] | |
| """ | |
| def cvt_fn(ss, ll, tt): | |
| try: | |
| return datetime.strptime(tt[0], fmt).date() | |
| except ValueError as ve: | |
| raise ParseException(ss, ll, str(ve)) | |
| return cvt_fn | |
| def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): | |
| """Helper to create a parse action for converting parsed | |
| datetime string to Python :class:`datetime.datetime` | |
| Params - | |
| - fmt - format to be passed to :class:`datetime.strptime` (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) | |
| Example: | |
| .. testcode:: | |
| dt_expr = pyparsing_common.iso8601_datetime.copy() | |
| dt_expr.set_parse_action(pyparsing_common.convert_to_datetime()) | |
| print(dt_expr.parse_string("1999-12-31T23:59:59.999")) | |
| prints: | |
| .. testoutput:: | |
| [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] | |
| """ | |
| def cvt_fn(s, l, t): | |
| try: | |
| return datetime.strptime(t[0], fmt) | |
| except ValueError as ve: | |
| raise ParseException(s, l, str(ve)) | |
| return cvt_fn | |
| iso8601_date = Regex( | |
| r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?" | |
| ).set_name("ISO8601 date") | |
| "ISO8601 date (``yyyy-mm-dd``)" | |
| iso8601_datetime = Regex( | |
| r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?" | |
| ).set_name("ISO8601 datetime") | |
| "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" | |
| def as_datetime(s, l, t): | |
| """Parse action to convert parsed dates or datetimes to a Python | |
| :class:`datetime.datetime`. | |
| This parse action will use the year, month, day, etc. results | |
| names defined in the ISO8601 date expressions, but it can be | |
| used with any expression that provides one or more of these fields. | |
| Omitted fields will default to fields from Jan 1, 00:00:00. | |
| Invalid dates will raise a :class:`ParseException` with the | |
| error message indicating the invalid date fields. | |
| """ | |
| year = int(t.year.lstrip("0") or 0) | |
| month = int(t.month or 1) | |
| day = int(t.day or 1) | |
| hour = int(t.hour or 0) | |
| minute = int(t.minute or 0) | |
| second = float(t.second or 0) | |
| try: | |
| return datetime( | |
| year, month, day, hour, minute, int(second), int((second % 1) * 1000) | |
| ) | |
| except ValueError as ve: | |
| raise ParseException(t, l, f"Invalid date/time: {ve}").with_traceback( | |
| ve.__traceback__ | |
| ) from None | |
| if PY_310_OR_LATER: | |
| iso8601_date_validated = iso8601_date().add_parse_action(as_datetime) | |
| "Validated ISO8601 date strings, raising :class:`ParseException` for invalid date values." | |
| iso8601_datetime_validated = iso8601_datetime().add_parse_action(as_datetime) | |
| "Validated ISO8601 date and time strings, raising :class:`ParseException` for invalid date/time values." | |
| uuid = Regex(r"[0-9a-fA-F]{8}(?:-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name( | |
| "UUID" | |
| ) | |
| "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" | |
| _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() | |
| def strip_html_tags(s: str, l: int, tokens: ParseResults): | |
| """Parse action to remove HTML tags from web page HTML source | |
| Example: | |
| .. testcode:: | |
| # strip HTML links from normal text | |
| text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' | |
| td, td_end = make_html_tags("TD") | |
| table_text = td + SkipTo(td_end).set_parse_action( | |
| pyparsing_common.strip_html_tags)("body") + td_end | |
| print(table_text.parse_string(text).body) | |
| Prints: | |
| .. testoutput:: | |
| More info at the pyparsing wiki page | |
| """ | |
| return pyparsing_common._html_stripper.transform_string(tokens[0]) | |
| _commasepitem = ( | |
| Combine( | |
| OneOrMore( | |
| ~Literal(",") | |
| + ~LineEnd() | |
| + Word(printables, exclude_chars=",") | |
| + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) | |
| ) | |
| ) | |
| .streamline() | |
| .set_name("commaItem") | |
| ) | |
| comma_separated_list = DelimitedList( | |
| Opt(quoted_string.copy() | _commasepitem, default="") | |
| ).set_name("comma separated list") | |
| """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" | |
| def upcase_tokens(s, l, t): | |
| """Parse action to convert tokens to upper case.""" | |
| return [tt.upper() for tt in t] | |
| def downcase_tokens(s, l, t): | |
| """Parse action to convert tokens to lower case.""" | |
| return [tt.lower() for tt in t] | |
| # fmt: off | |
| url = Regex( | |
| # https://mathiasbynens.be/demo/url-regex | |
| # https://gist.github.com/dperini/729294 | |
| r"(?P<url>" | |
| # protocol identifier (optional) | |
| # short syntax // still required | |
| r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" | |
| # user:pass BasicAuth (optional) | |
| r"(?:(?P<auth>\S+(?::\S*)?)@)?" | |
| r"(?P<host>" | |
| # IP address exclusion | |
| # private & local networks | |
| r"(?!(?:10|127)(?:\.\d{1,3}){3})" | |
| r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" | |
| r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" | |
| # IP address dotted notation octets | |
| # excludes loopback network 0.0.0.0 | |
| # excludes reserved space >= 224.0.0.0 | |
| # excludes network & broadcast addresses | |
| # (first & last IP address of each class) | |
| r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" | |
| r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" | |
| r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" | |
| r"|" | |
| # host & domain names, may end with dot | |
| # can be replaced by a shortest alternative | |
| # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ | |
| r"(?:" | |
| r"(?:" | |
| r"[a-z0-9\u00a1-\uffff]" | |
| r"[a-z0-9\u00a1-\uffff_-]{0,62}" | |
| r")?" | |
| r"[a-z0-9\u00a1-\uffff]\." | |
| r")+" | |
| # TLD identifier name, may end with dot | |
| r"(?:[a-z\u00a1-\uffff]{2,}\.?)" | |
| r")" | |
| # port number (optional) | |
| r"(:(?P<port>\d{2,5}))?" | |
| # resource path (optional) | |
| r"(?P<path>\/[^?# ]*)?" | |
| # query string (optional) | |
| r"(\?(?P<query>[^#]*))?" | |
| # fragment (optional) | |
| r"(#(?P<fragment>\S*))?" | |
| r")" | |
| ).set_name("url") | |
| """ | |
| URL (http/https/ftp scheme) | |
| .. versionchanged:: 3.1.0 | |
| ``url`` named group added | |
| """ | |
| # fmt: on | |
| # pre-PEP8 compatibility names | |
| # fmt: off | |
| convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer)) | |
| convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float)) | |
| convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date)) | |
| convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime)) | |
| stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags)) | |
| upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens)) | |
| downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens)) | |
| # fmt: on | |
| _builtin_exprs = [ | |
| v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) | |
| ] | |
Xet Storage Details
- Size:
- 17.2 kB
- Xet hash:
- 2f82ba8bd48230da0396c0750c8b324803df52528668661d18c61cd45714fd5b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.