| """ | |
| pygments.scanner | |
| ~~~~~~~~~~~~~~~~ | |
| This library implements a regex based scanner. Some languages | |
| like Pascal are easy to parse but have some keywords that | |
| depend on the context. Because of this it's impossible to lex | |
| that just by using a regular expression lexer like the | |
| `RegexLexer`. | |
| Have a look at the `DelphiLexer` to get an idea of how to use | |
| this scanner. | |
| :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. | |
| :license: BSD, see LICENSE for details. | |
| """ | |
| import re | |
| class EndOfText(RuntimeError): | |
| """ | |
| Raise if end of text is reached and the user | |
| tried to call a match function. | |
| """ | |
| class Scanner: | |
| """ | |
| Simple scanner | |
| All method patterns are regular expression strings (not | |
| compiled expressions!) | |
| """ | |
| def __init__(self, text, flags=0): | |
| """ | |
| :param text: The text which should be scanned | |
| :param flags: default regular expression flags | |
| """ | |
| self.data = text | |
| self.data_length = len(text) | |
| self.start_pos = 0 | |
| self.pos = 0 | |
| self.flags = flags | |
| self.last = None | |
| self.match = None | |
| self._re_cache = {} | |
| def eos(self): | |
| """`True` if the scanner reached the end of text.""" | |
| return self.pos >= self.data_length | |
| eos = property(eos, eos.__doc__) | |
| def check(self, pattern): | |
| """ | |
| Apply `pattern` on the current position and return | |
| the match object. (Doesn't touch pos). Use this for | |
| lookahead. | |
| """ | |
| if self.eos: | |
| raise EndOfText() | |
| if pattern not in self._re_cache: | |
| self._re_cache[pattern] = re.compile(pattern, self.flags) | |
| return self._re_cache[pattern].match(self.data, self.pos) | |
| def test(self, pattern): | |
| """Apply a pattern on the current position and check | |
| if it patches. Doesn't touch pos. | |
| """ | |
| return self.check(pattern) is not None | |
| def scan(self, pattern): | |
| """ | |
| Scan the text for the given pattern and update pos/match | |
| and related fields. The return value is a boolen that | |
| indicates if the pattern matched. The matched value is | |
| stored on the instance as ``match``, the last value is | |
| stored as ``last``. ``start_pos`` is the position of the | |
| pointer before the pattern was matched, ``pos`` is the | |
| end position. | |
| """ | |
| if self.eos: | |
| raise EndOfText() | |
| if pattern not in self._re_cache: | |
| self._re_cache[pattern] = re.compile(pattern, self.flags) | |
| self.last = self.match | |
| m = self._re_cache[pattern].match(self.data, self.pos) | |
| if m is None: | |
| return False | |
| self.start_pos = m.start() | |
| self.pos = m.end() | |
| self.match = m.group() | |
| return True | |
| def get_char(self): | |
| """Scan exactly one char.""" | |
| self.scan('.') | |
| def __repr__(self): | |
| return '<%s %d/%d>' % ( | |
| self.__class__.__name__, | |
| self.pos, | |
| self.data_length | |
| ) | |