| | from __future__ import annotations |
| |
|
| | import contextlib |
| | import re |
| | from dataclasses import dataclass |
| | from typing import Generator, Mapping, NoReturn |
| |
|
| | from .specifiers import Specifier |
| |
|
| |
|
| | @dataclass |
| | class Token: |
| | name: str |
| | text: str |
| | position: int |
| |
|
| |
|
| | class ParserSyntaxError(Exception): |
| | """The provided source text could not be parsed correctly.""" |
| |
|
| | def __init__( |
| | self, |
| | message: str, |
| | *, |
| | source: str, |
| | span: tuple[int, int], |
| | ) -> None: |
| | self.span = span |
| | self.message = message |
| | self.source = source |
| |
|
| | super().__init__() |
| |
|
| | def __str__(self) -> str: |
| | marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" |
| | return f"{self.message}\n {self.source}\n {marker}" |
| |
|
| |
|
| | DEFAULT_RULES: dict[str, re.Pattern[str]] = { |
| | "LEFT_PARENTHESIS": re.compile(r"\("), |
| | "RIGHT_PARENTHESIS": re.compile(r"\)"), |
| | "LEFT_BRACKET": re.compile(r"\["), |
| | "RIGHT_BRACKET": re.compile(r"\]"), |
| | "SEMICOLON": re.compile(r";"), |
| | "COMMA": re.compile(r","), |
| | "QUOTED_STRING": re.compile( |
| | r""" |
| | ( |
| | ('[^']*') |
| | | |
| | ("[^"]*") |
| | ) |
| | """, |
| | re.VERBOSE, |
| | ), |
| | "OP": re.compile(r"(===|==|~=|!=|<=|>=|<|>)"), |
| | "BOOLOP": re.compile(r"\b(or|and)\b"), |
| | "IN": re.compile(r"\bin\b"), |
| | "NOT": re.compile(r"\bnot\b"), |
| | "VARIABLE": re.compile( |
| | r""" |
| | \b( |
| | python_version |
| | |python_full_version |
| | |os[._]name |
| | |sys[._]platform |
| | |platform_(release|system) |
| | |platform[._](version|machine|python_implementation) |
| | |python_implementation |
| | |implementation_(name|version) |
| | |extras? |
| | |dependency_groups |
| | )\b |
| | """, |
| | re.VERBOSE, |
| | ), |
| | "SPECIFIER": re.compile( |
| | Specifier._operator_regex_str + Specifier._version_regex_str, |
| | re.VERBOSE | re.IGNORECASE, |
| | ), |
| | "AT": re.compile(r"\@"), |
| | "URL": re.compile(r"[^ \t]+"), |
| | "IDENTIFIER": re.compile(r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b"), |
| | "VERSION_PREFIX_TRAIL": re.compile(r"\.\*"), |
| | "VERSION_LOCAL_LABEL_TRAIL": re.compile(r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*"), |
| | "WS": re.compile(r"[ \t]+"), |
| | "END": re.compile(r"$"), |
| | } |
| |
|
| |
|
| | class Tokenizer: |
| | """Context-sensitive token parsing. |
| | |
| | Provides methods to examine the input stream to check whether the next token |
| | matches. |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | source: str, |
| | *, |
| | rules: Mapping[str, re.Pattern[str]], |
| | ) -> None: |
| | self.source = source |
| | self.rules = rules |
| | self.next_token: Token | None = None |
| | self.position = 0 |
| |
|
| | def consume(self, name: str) -> None: |
| | """Move beyond provided token name, if at current position.""" |
| | if self.check(name): |
| | self.read() |
| |
|
| | def check(self, name: str, *, peek: bool = False) -> bool: |
| | """Check whether the next token has the provided name. |
| | |
| | By default, if the check succeeds, the token *must* be read before |
| | another check. If `peek` is set to `True`, the token is not loaded and |
| | would need to be checked again. |
| | """ |
| | assert self.next_token is None, ( |
| | f"Cannot check for {name!r}, already have {self.next_token!r}" |
| | ) |
| | assert name in self.rules, f"Unknown token name: {name!r}" |
| |
|
| | expression = self.rules[name] |
| |
|
| | match = expression.match(self.source, self.position) |
| | if match is None: |
| | return False |
| | if not peek: |
| | self.next_token = Token(name, match[0], self.position) |
| | return True |
| |
|
| | def expect(self, name: str, *, expected: str) -> Token: |
| | """Expect a certain token name next, failing with a syntax error otherwise. |
| | |
| | The token is *not* read. |
| | """ |
| | if not self.check(name): |
| | raise self.raise_syntax_error(f"Expected {expected}") |
| | return self.read() |
| |
|
| | def read(self) -> Token: |
| | """Consume the next token and return it.""" |
| | token = self.next_token |
| | assert token is not None |
| |
|
| | self.position += len(token.text) |
| | self.next_token = None |
| |
|
| | return token |
| |
|
| | def raise_syntax_error( |
| | self, |
| | message: str, |
| | *, |
| | span_start: int | None = None, |
| | span_end: int | None = None, |
| | ) -> NoReturn: |
| | """Raise ParserSyntaxError at the given position.""" |
| | span = ( |
| | self.position if span_start is None else span_start, |
| | self.position if span_end is None else span_end, |
| | ) |
| | raise ParserSyntaxError( |
| | message, |
| | source=self.source, |
| | span=span, |
| | ) |
| |
|
| | @contextlib.contextmanager |
| | def enclosing_tokens( |
| | self, open_token: str, close_token: str, *, around: str |
| | ) -> Generator[None, None, None]: |
| | if self.check(open_token): |
| | open_position = self.position |
| | self.read() |
| | else: |
| | open_position = None |
| |
|
| | yield |
| |
|
| | if open_position is None: |
| | return |
| |
|
| | if not self.check(close_token): |
| | self.raise_syntax_error( |
| | f"Expected matching {close_token} for {open_token}, after {around}", |
| | span_start=open_position, |
| | ) |
| |
|
| | self.read() |
| |
|