Spaces:
Paused
Paused
File size: 6,382 Bytes
cb1a5c9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | r"""Evaluate match expressions, as used by `-k` and `-m`.
The grammar is:
expression: expr? EOF
expr: and_expr ('or' and_expr)*
and_expr: not_expr ('and' not_expr)*
not_expr: 'not' not_expr | '(' expr ')' | ident
ident: (\w|:|\+|-|\.|\[|\]|\\|/)+
The semantics are:
- Empty expression evaluates to False.
- ident evaluates to True of False according to a provided matcher function.
- or/and/not evaluate according to the usual boolean semantics.
"""
import ast
import dataclasses
import enum
import re
import types
from typing import Callable
from typing import Iterator
from typing import Mapping
from typing import NoReturn
from typing import Optional
from typing import Sequence
__all__ = [
"Expression",
"ParseError",
]
class TokenType(enum.Enum):
LPAREN = "left parenthesis"
RPAREN = "right parenthesis"
OR = "or"
AND = "and"
NOT = "not"
IDENT = "identifier"
EOF = "end of input"
@dataclasses.dataclass(frozen=True)
class Token:
__slots__ = ("type", "value", "pos")
type: TokenType
value: str
pos: int
class ParseError(Exception):
"""The expression contains invalid syntax.
:param column: The column in the line where the error occurred (1-based).
:param message: A description of the error.
"""
def __init__(self, column: int, message: str) -> None:
self.column = column
self.message = message
def __str__(self) -> str:
return f"at column {self.column}: {self.message}"
class Scanner:
__slots__ = ("tokens", "current")
def __init__(self, input: str) -> None:
self.tokens = self.lex(input)
self.current = next(self.tokens)
def lex(self, input: str) -> Iterator[Token]:
pos = 0
while pos < len(input):
if input[pos] in (" ", "\t"):
pos += 1
elif input[pos] == "(":
yield Token(TokenType.LPAREN, "(", pos)
pos += 1
elif input[pos] == ")":
yield Token(TokenType.RPAREN, ")", pos)
pos += 1
else:
match = re.match(r"(:?\w|:|\+|-|\.|\[|\]|\\|/)+", input[pos:])
if match:
value = match.group(0)
if value == "or":
yield Token(TokenType.OR, value, pos)
elif value == "and":
yield Token(TokenType.AND, value, pos)
elif value == "not":
yield Token(TokenType.NOT, value, pos)
else:
yield Token(TokenType.IDENT, value, pos)
pos += len(value)
else:
raise ParseError(
pos + 1,
f'unexpected character "{input[pos]}"',
)
yield Token(TokenType.EOF, "", pos)
def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]:
if self.current.type is type:
token = self.current
if token.type is not TokenType.EOF:
self.current = next(self.tokens)
return token
if reject:
self.reject((type,))
return None
def reject(self, expected: Sequence[TokenType]) -> NoReturn:
raise ParseError(
self.current.pos + 1,
"expected {}; got {}".format(
" OR ".join(type.value for type in expected),
self.current.type.value,
),
)
# True, False and None are legal match expression identifiers,
# but illegal as Python identifiers. To fix this, this prefix
# is added to identifiers in the conversion to Python AST.
IDENT_PREFIX = "$"
def expression(s: Scanner) -> ast.Expression:
if s.accept(TokenType.EOF):
ret: ast.expr = ast.Constant(False)
else:
ret = expr(s)
s.accept(TokenType.EOF, reject=True)
return ast.fix_missing_locations(ast.Expression(ret))
def expr(s: Scanner) -> ast.expr:
ret = and_expr(s)
while s.accept(TokenType.OR):
rhs = and_expr(s)
ret = ast.BoolOp(ast.Or(), [ret, rhs])
return ret
def and_expr(s: Scanner) -> ast.expr:
ret = not_expr(s)
while s.accept(TokenType.AND):
rhs = not_expr(s)
ret = ast.BoolOp(ast.And(), [ret, rhs])
return ret
def not_expr(s: Scanner) -> ast.expr:
if s.accept(TokenType.NOT):
return ast.UnaryOp(ast.Not(), not_expr(s))
if s.accept(TokenType.LPAREN):
ret = expr(s)
s.accept(TokenType.RPAREN, reject=True)
return ret
ident = s.accept(TokenType.IDENT)
if ident:
return ast.Name(IDENT_PREFIX + ident.value, ast.Load())
s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))
class MatcherAdapter(Mapping[str, bool]):
"""Adapts a matcher function to a locals mapping as required by eval()."""
def __init__(self, matcher: Callable[[str], bool]) -> None:
self.matcher = matcher
def __getitem__(self, key: str) -> bool:
return self.matcher(key[len(IDENT_PREFIX) :])
def __iter__(self) -> Iterator[str]:
raise NotImplementedError()
def __len__(self) -> int:
raise NotImplementedError()
class Expression:
"""A compiled match expression as used by -k and -m.
The expression can be evaluated against different matchers.
"""
__slots__ = ("code",)
def __init__(self, code: types.CodeType) -> None:
self.code = code
@classmethod
def compile(self, input: str) -> "Expression":
"""Compile a match expression.
:param input: The input expression - one line.
"""
astexpr = expression(Scanner(input))
code: types.CodeType = compile(
astexpr,
filename="<pytest match expression>",
mode="eval",
)
return Expression(code)
def evaluate(self, matcher: Callable[[str], bool]) -> bool:
"""Evaluate the match expression.
:param matcher:
Given an identifier, should return whether it matches or not.
Should be prepared to handle arbitrary strings as input.
:returns: Whether the expression matches or not.
"""
ret: bool = eval(self.code, {"__builtins__": {}}, MatcherAdapter(matcher))
return ret
|