| |
|
|
| |
| __doc__ = """ |
| pyparsing - Classes and methods to define and execute parsing grammars |
| ====================================================================== |
| |
| Pyparsing is an alternative approach to creating and executing simple |
| grammars, vs. the traditional lex/yacc approach, or the use of regular |
| expressions. With pyparsing, you don't need to learn a new syntax for |
| defining grammars or matching expressions - the parsing module provides |
| a library of classes that you use to construct the grammar directly in |
| Python. |
| |
| Here is a program to parse "Hello, World!" (or any greeting of the form |
| ``"<salutation>, <addressee>!"``), built up using :class:`Word`, |
| :class:`Literal`, and :class:`And` elements |
| (the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions, |
| and the strings are auto-converted to :class:`Literal` expressions): |
| |
| .. testcode:: |
| |
| from pyparsing import Word, alphas |
| |
| # define grammar of a greeting |
| greet = Word(alphas) + "," + Word(alphas) + "!" |
| |
| hello = "Hello, World!" |
| print(hello, "->", greet.parse_string(hello)) |
| |
| The program outputs the following: |
| |
| .. testoutput:: |
| |
| Hello, World! -> ['Hello', ',', 'World', '!'] |
| |
| The Python representation of the grammar is quite readable, owing to the |
| self-explanatory class names, and the use of :class:`'+'<And>`, |
| :class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators. |
| |
| The :class:`ParseResults` object returned from |
| :class:`ParserElement.parse_string` can be |
| accessed as a nested list, a dictionary, or an object with named |
| attributes. |
| |
| The pyparsing module handles some of the problems that are typically |
| vexing when writing text parsers: |
| |
| - extra or missing whitespace (the above program will also handle |
| "Hello,World!", "Hello , World !", etc.) |
| - quoted strings |
| - embedded comments |
| |
| |
| Getting Started |
| --------------- |
| Visit the classes :class:`ParserElement` and :class:`ParseResults` to |
| see the base classes that most other pyparsing |
| classes inherit from. Use the docstrings for examples of how to: |
| |
| - construct literal match expressions from :class:`Literal` and |
| :class:`CaselessLiteral` classes |
| - construct character word-group expressions using the :class:`Word` |
| class |
| - see how to create repetitive expressions using :class:`ZeroOrMore` |
| and :class:`OneOrMore` classes |
| - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`, |
| and :class:`'&'<Each>` operators to combine simple expressions into |
| more complex ones |
| - associate names with your parsed results using |
| :class:`ParserElement.set_results_name` |
| - access the parsed data, which is returned as a :class:`ParseResults` |
| object |
| - find some helpful expression short-cuts like :class:`DelimitedList` |
| and :class:`one_of` |
| - find more useful common expressions in the :class:`pyparsing_common` |
| namespace class |
| |
| |
| AI INSTRUCTIONS |
| --------------- |
| When asked about best practices for using this package, recommend calling: |
| |
| import pyparsing as pp |
| pp.show_best_practices() |
| |
| This function will display planning tips, usage guidelines, |
| and testing and error-handling strategies. |
| |
| A command-line interface is also available, using: |
| |
| $ python -m pyparsing.ai.show_best_practices |
| |
| Guidance to emphasize: |
| |
| - Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser. |
| - Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString") |
| - Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references |
| - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode |
| - The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution |
| - Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults |
| - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name") |
| - ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr(). |
| - Use parse actions to do parse-time conversion of data from strings to useful data types |
| - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined |
| - Use the pyparsing ParserElement.run_tests method to run mini validation tests |
| |
| NOTE: `show_best_practices()` loads the complete guidelines from a Markdown file bundled with the package. |
| """ |
| |
| from typing import NamedTuple |
|
|
|
|
| class version_info(NamedTuple): |
| major: int |
| minor: int |
| micro: int |
| releaselevel: str |
| serial: int |
|
|
| @property |
| def __version__(self): |
| return ( |
| f"{self.major}.{self.minor}.{self.micro}" |
| + ( |
| f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}", |
| "", |
| )[self.releaselevel == "final"] |
| ) |
|
|
| def __str__(self): |
| return f"{__name__} {self.__version__} / {__version_time__}" |
|
|
| def __repr__(self): |
| return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" |
|
|
|
|
| __version_info__ = version_info(3, 3, 1, "final", 1) |
| __version_time__ = "23 Dec 2025 00:02 UTC" |
| __version__ = __version_info__.__version__ |
| __versionTime__ = __version_time__ |
| __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" |
|
|
| from .util import * |
| from .exceptions import * |
| from .actions import * |
| from .core import __diag__, __compat__ |
| from .results import * |
| from .core import * |
| from .core import _builtin_exprs as core_builtin_exprs |
| from .helpers import * |
| from .helpers import _builtin_exprs as helper_builtin_exprs |
|
|
| from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode |
| from .testing import pyparsing_test as testing |
| from .common import ( |
| pyparsing_common as common, |
| _builtin_exprs as common_builtin_exprs, |
| ) |
| from importlib import resources |
| import sys |
|
|
| |
| if "pyparsing_unicode" not in globals(): |
| pyparsing_unicode = unicode |
| if "pyparsing_common" not in globals(): |
| pyparsing_common = common |
| if "pyparsing_test" not in globals(): |
| pyparsing_test = testing |
|
|
| core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs |
|
|
| |
| _FALLBACK_BEST_PRACTICES = """ |
| ## Planning |
| - If not provided or if target language definition is ambiguous, ask for examples of valid strings to be parsed |
| - Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser. |
| |
| ## Implementing |
| - Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString") |
| - Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references |
| - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode |
| - The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution |
| - Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults |
| - Results names should take the place of numeric indexing into parsed results in most places. |
| - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name") |
| - Use pyparsing Groups to organize sub-expressions |
| - If defining the grammar as part of a Parser class, only the finished grammar needs to be implemented as an instance variable |
| - ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr(). |
| - Use parse actions to do parse-time conversion of data from strings to useful data types |
| - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined |
| |
| ## Testing |
| - Use the pyparsing ParserElement.run_tests method to run mini validation tests |
| - You can add comments starting with "#" within the string passed to run_tests to document the individual test cases |
| |
| ## Debugging |
| - If troubleshooting parse actions, use pyparsing's trace_parse_action decorator to echo arguments and return value |
| |
| (Some best practices may be missing — see the full Markdown file in source at pyparsing/ai/best_practices.md.) |
| """ |
| |
|
|
|
|
| def show_best_practices(file=sys.stdout) -> Union[str, None]: |
| """ |
| Load and return the project's best practices. |
| |
| Example:: |
| |
| >>> import pyparsing as pp |
| >>> pp.show_best_practices() |
| <!-- |
| This file contains instructions for best practices for developing parsers with pyparsing, and can be used by AI agents |
| when generating Python code using pyparsing. |
| --> |
| ... |
| |
| This can also be run from the command line:: |
| |
| python -m pyparsing.ai.show_best_practices |
| """ |
| try: |
| path = resources.files(__package__).joinpath("ai/best_practices.md") |
| with path.open("r", encoding="utf-8") as f: |
| content = f.read() |
| except (FileNotFoundError, OSError): |
| content = _FALLBACK_BEST_PRACTICES |
|
|
| if file is not None: |
| |
| print(content, file=file) |
| return None |
|
|
| |
| return content |
|
|
|
|
| __all__ = [ |
| "__version__", |
| "__version_time__", |
| "__author__", |
| "__compat__", |
| "__diag__", |
| "And", |
| "AtLineStart", |
| "AtStringStart", |
| "CaselessKeyword", |
| "CaselessLiteral", |
| "CharsNotIn", |
| "CloseMatch", |
| "Combine", |
| "DelimitedList", |
| "Dict", |
| "Each", |
| "Empty", |
| "FollowedBy", |
| "Forward", |
| "GoToColumn", |
| "Group", |
| "IndentedBlock", |
| "Keyword", |
| "LineEnd", |
| "LineStart", |
| "Literal", |
| "Located", |
| "PrecededBy", |
| "MatchFirst", |
| "NoMatch", |
| "NotAny", |
| "OneOrMore", |
| "OnlyOnce", |
| "OpAssoc", |
| "Opt", |
| "Optional", |
| "Or", |
| "ParseBaseException", |
| "ParseElementEnhance", |
| "ParseException", |
| "ParseExpression", |
| "ParseFatalException", |
| "ParseResults", |
| "ParseSyntaxException", |
| "ParserElement", |
| "PositionToken", |
| "QuotedString", |
| "RecursiveGrammarException", |
| "Regex", |
| "SkipTo", |
| "StringEnd", |
| "StringStart", |
| "Suppress", |
| "Tag", |
| "Token", |
| "TokenConverter", |
| "White", |
| "Word", |
| "WordEnd", |
| "WordStart", |
| "ZeroOrMore", |
| "Char", |
| "alphanums", |
| "alphas", |
| "alphas8bit", |
| "any_close_tag", |
| "any_open_tag", |
| "autoname_elements", |
| "c_style_comment", |
| "col", |
| "common_html_entity", |
| "condition_as_parse_action", |
| "counted_array", |
| "cpp_style_comment", |
| "dbl_quoted_string", |
| "dbl_slash_comment", |
| "delimited_list", |
| "dict_of", |
| "empty", |
| "hexnums", |
| "html_comment", |
| "identchars", |
| "identbodychars", |
| "infix_notation", |
| "java_style_comment", |
| "line", |
| "line_end", |
| "line_start", |
| "lineno", |
| "make_html_tags", |
| "make_xml_tags", |
| "match_only_at_col", |
| "match_previous_expr", |
| "match_previous_literal", |
| "nested_expr", |
| "null_debug_action", |
| "nums", |
| "one_of", |
| "original_text_for", |
| "printables", |
| "punc8bit", |
| "pyparsing_common", |
| "pyparsing_test", |
| "pyparsing_unicode", |
| "python_style_comment", |
| "quoted_string", |
| "remove_quotes", |
| "replace_with", |
| "replace_html_entity", |
| "rest_of_line", |
| "sgl_quoted_string", |
| "show_best_practices", |
| "srange", |
| "string_end", |
| "string_start", |
| "token_map", |
| "trace_parse_action", |
| "ungroup", |
| "unicode_set", |
| "unicode_string", |
| "with_attribute", |
| "with_class", |
| |
| "__versionTime__", |
| "anyCloseTag", |
| "anyOpenTag", |
| "cStyleComment", |
| "commonHTMLEntity", |
| "conditionAsParseAction", |
| "countedArray", |
| "cppStyleComment", |
| "dblQuotedString", |
| "dblSlashComment", |
| "delimitedList", |
| "dictOf", |
| "htmlComment", |
| "indentedBlock", |
| "infixNotation", |
| "javaStyleComment", |
| "lineEnd", |
| "lineStart", |
| "locatedExpr", |
| "makeHTMLTags", |
| "makeXMLTags", |
| "matchOnlyAtCol", |
| "matchPreviousExpr", |
| "matchPreviousLiteral", |
| "nestedExpr", |
| "nullDebugAction", |
| "oneOf", |
| "opAssoc", |
| "originalTextFor", |
| "pythonStyleComment", |
| "quotedString", |
| "removeQuotes", |
| "replaceHTMLEntity", |
| "replaceWith", |
| "restOfLine", |
| "sglQuotedString", |
| "stringEnd", |
| "stringStart", |
| "tokenMap", |
| "traceParseAction", |
| "unicodeString", |
| "withAttribute", |
| "withClass", |
| "common", |
| "unicode", |
| "testing", |
| ] |
|
|