Buckets:
| from __future__ import annotations | |
| import contextlib | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Generator, Mapping, NoReturn | |
| from .specifiers import Specifier | |
| class Token: | |
| name: str | |
| text: str | |
| position: int | |
| class ParserSyntaxError(Exception): | |
| """The provided source text could not be parsed correctly.""" | |
| def __init__( | |
| self, | |
| message: str, | |
| *, | |
| source: str, | |
| span: tuple[int, int], | |
| ) -> None: | |
| self.span = span | |
| self.message = message | |
| self.source = source | |
| super().__init__() | |
| def __str__(self) -> str: | |
| marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" | |
| return f"{self.message}\n {self.source}\n {marker}" | |
| DEFAULT_RULES: dict[str, re.Pattern[str]] = { | |
| "LEFT_PARENTHESIS": re.compile(r"\("), | |
| "RIGHT_PARENTHESIS": re.compile(r"\)"), | |
| "LEFT_BRACKET": re.compile(r"\["), | |
| "RIGHT_BRACKET": re.compile(r"\]"), | |
| "SEMICOLON": re.compile(r";"), | |
| "COMMA": re.compile(r","), | |
| "QUOTED_STRING": re.compile( | |
| r""" | |
| ( | |
| ('[^']*') | |
| | | |
| ("[^"]*") | |
| ) | |
| """, | |
| re.VERBOSE, | |
| ), | |
| "OP": re.compile(r"(===|==|~=|!=|<=|>=|<|>)"), | |
| "BOOLOP": re.compile(r"\b(or|and)\b"), | |
| "IN": re.compile(r"\bin\b"), | |
| "NOT": re.compile(r"\bnot\b"), | |
| "VARIABLE": re.compile( | |
| r""" | |
| \b( | |
| python_version | |
| |python_full_version | |
| |os[._]name | |
| |sys[._]platform | |
| |platform_(release|system) | |
| |platform[._](version|machine|python_implementation) | |
| |python_implementation | |
| |implementation_(name|version) | |
| |extras? | |
| |dependency_groups | |
| )\b | |
| """, | |
| re.VERBOSE, | |
| ), | |
| "SPECIFIER": re.compile( | |
| Specifier._operator_regex_str + Specifier._version_regex_str, | |
| re.VERBOSE | re.IGNORECASE, | |
| ), | |
| "AT": re.compile(r"\@"), | |
| "URL": re.compile(r"[^ \t]+"), | |
| "IDENTIFIER": re.compile(r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b"), | |
| "VERSION_PREFIX_TRAIL": re.compile(r"\.\*"), | |
| "VERSION_LOCAL_LABEL_TRAIL": re.compile(r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*"), | |
| "WS": re.compile(r"[ \t]+"), | |
| "END": re.compile(r"$"), | |
| } | |
| class Tokenizer: | |
| """Context-sensitive token parsing. | |
| Provides methods to examine the input stream to check whether the next token | |
| matches. | |
| """ | |
| def __init__( | |
| self, | |
| source: str, | |
| *, | |
| rules: Mapping[str, re.Pattern[str]], | |
| ) -> None: | |
| self.source = source | |
| self.rules = rules | |
| self.next_token: Token | None = None | |
| self.position = 0 | |
| def consume(self, name: str) -> None: | |
| """Move beyond provided token name, if at current position.""" | |
| if self.check(name): | |
| self.read() | |
| def check(self, name: str, *, peek: bool = False) -> bool: | |
| """Check whether the next token has the provided name. | |
| By default, if the check succeeds, the token *must* be read before | |
| another check. If `peek` is set to `True`, the token is not loaded and | |
| would need to be checked again. | |
| """ | |
| assert self.next_token is None, ( | |
| f"Cannot check for {name!r}, already have {self.next_token!r}" | |
| ) | |
| assert name in self.rules, f"Unknown token name: {name!r}" | |
| expression = self.rules[name] | |
| match = expression.match(self.source, self.position) | |
| if match is None: | |
| return False | |
| if not peek: | |
| self.next_token = Token(name, match[0], self.position) | |
| return True | |
| def expect(self, name: str, *, expected: str) -> Token: | |
| """Expect a certain token name next, failing with a syntax error otherwise. | |
| The token is *not* read. | |
| """ | |
| if not self.check(name): | |
| raise self.raise_syntax_error(f"Expected {expected}") | |
| return self.read() | |
| def read(self) -> Token: | |
| """Consume the next token and return it.""" | |
| token = self.next_token | |
| assert token is not None | |
| self.position += len(token.text) | |
| self.next_token = None | |
| return token | |
| def raise_syntax_error( | |
| self, | |
| message: str, | |
| *, | |
| span_start: int | None = None, | |
| span_end: int | None = None, | |
| ) -> NoReturn: | |
| """Raise ParserSyntaxError at the given position.""" | |
| span = ( | |
| self.position if span_start is None else span_start, | |
| self.position if span_end is None else span_end, | |
| ) | |
| raise ParserSyntaxError( | |
| message, | |
| source=self.source, | |
| span=span, | |
| ) | |
| def enclosing_tokens( | |
| self, open_token: str, close_token: str, *, around: str | |
| ) -> Generator[None, None, None]: | |
| if self.check(open_token): | |
| open_position = self.position | |
| self.read() | |
| else: | |
| open_position = None | |
| yield | |
| if open_position is None: | |
| return | |
| if not self.check(close_token): | |
| self.raise_syntax_error( | |
| f"Expected matching {close_token} for {open_token}, after {around}", | |
| span_start=open_position, | |
| ) | |
| self.read() | |
Xet Storage Details
- Size:
- 5.42 kB
- Xet hash:
- 0febebf73cfc5b352a6d03c222e2c6e520263fd2d27a6e6d393017afaf402f23
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.