| | |
| | |
| |
|
| | |
| | |
| | |
| | from typing import Optional, Iterator, Tuple, List |
| |
|
| | from parso.python.tokenize import tokenize |
| | from parso.utils import parse_version_string |
| | from parso.python.token import PythonTokenTypes |
| |
|
| |
|
| | class NFAArc: |
| | def __init__(self, next_: 'NFAState', nonterminal_or_string: Optional[str]): |
| | self.next: NFAState = next_ |
| | self.nonterminal_or_string: Optional[str] = nonterminal_or_string |
| |
|
| | def __repr__(self): |
| | return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string) |
| |
|
| |
|
| | class NFAState: |
| | def __init__(self, from_rule: str): |
| | self.from_rule: str = from_rule |
| | self.arcs: List[NFAArc] = [] |
| |
|
| | def add_arc(self, next_, nonterminal_or_string=None): |
| | assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) |
| | assert isinstance(next_, NFAState) |
| | self.arcs.append(NFAArc(next_, nonterminal_or_string)) |
| |
|
| | def __repr__(self): |
| | return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) |
| |
|
| |
|
| | class GrammarParser: |
| | """ |
| | The parser for Python grammar files. |
| | """ |
| | def __init__(self, bnf_grammar: str): |
| | self._bnf_grammar = bnf_grammar |
| | self.generator = tokenize( |
| | bnf_grammar, |
| | version_info=parse_version_string('3.9') |
| | ) |
| | self._gettoken() |
| |
|
| | def parse(self) -> Iterator[Tuple[NFAState, NFAState]]: |
| | |
| | while self.type != PythonTokenTypes.ENDMARKER: |
| | while self.type == PythonTokenTypes.NEWLINE: |
| | self._gettoken() |
| |
|
| | |
| | self._current_rule_name = self._expect(PythonTokenTypes.NAME) |
| | self._expect(PythonTokenTypes.OP, ':') |
| |
|
| | a, z = self._parse_rhs() |
| | self._expect(PythonTokenTypes.NEWLINE) |
| |
|
| | yield a, z |
| |
|
| | def _parse_rhs(self): |
| | |
| | a, z = self._parse_items() |
| | if self.value != "|": |
| | return a, z |
| | else: |
| | aa = NFAState(self._current_rule_name) |
| | zz = NFAState(self._current_rule_name) |
| | while True: |
| | |
| | |
| | aa.add_arc(a) |
| | z.add_arc(zz) |
| | if self.value != "|": |
| | break |
| |
|
| | self._gettoken() |
| | a, z = self._parse_items() |
| | return aa, zz |
| |
|
| | def _parse_items(self): |
| | |
| | a, b = self._parse_item() |
| | while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \ |
| | or self.value in ('(', '['): |
| | c, d = self._parse_item() |
| | |
| | b.add_arc(c) |
| | b = d |
| | return a, b |
| |
|
| | def _parse_item(self): |
| | |
| | if self.value == "[": |
| | self._gettoken() |
| | a, z = self._parse_rhs() |
| | self._expect(PythonTokenTypes.OP, ']') |
| | |
| | |
| | a.add_arc(z) |
| | return a, z |
| | else: |
| | a, z = self._parse_atom() |
| | value = self.value |
| | if value not in ("+", "*"): |
| | return a, z |
| | self._gettoken() |
| | |
| | z.add_arc(a) |
| | if value == "+": |
| | return a, z |
| | else: |
| | |
| | |
| | return a, a |
| |
|
| | def _parse_atom(self): |
| | |
| | if self.value == "(": |
| | self._gettoken() |
| | a, z = self._parse_rhs() |
| | self._expect(PythonTokenTypes.OP, ')') |
| | return a, z |
| | elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): |
| | a = NFAState(self._current_rule_name) |
| | z = NFAState(self._current_rule_name) |
| | |
| | a.add_arc(z, self.value) |
| | self._gettoken() |
| | return a, z |
| | else: |
| | self._raise_error("expected (...) or NAME or STRING, got %s/%s", |
| | self.type, self.value) |
| |
|
| | def _expect(self, type_, value=None): |
| | if self.type != type_: |
| | self._raise_error("expected %s, got %s [%s]", |
| | type_, self.type, self.value) |
| | if value is not None and self.value != value: |
| | self._raise_error("expected %s, got %s", value, self.value) |
| | value = self.value |
| | self._gettoken() |
| | return value |
| |
|
| | def _gettoken(self): |
| | tup = next(self.generator) |
| | self.type, self.value, self.begin, prefix = tup |
| |
|
| | def _raise_error(self, msg, *args): |
| | if args: |
| | try: |
| | msg = msg % args |
| | except: |
| | msg = " ".join([msg] + list(map(str, args))) |
| | line = self._bnf_grammar.splitlines()[self.begin[0] - 1] |
| | raise SyntaxError(msg, ('<grammar>', self.begin[0], |
| | self.begin[1], line)) |
| |
|