Spaces:
Runtime error
Runtime error
| #======================================================================= | |
| # | |
| # Python Lexical Analyser | |
| # | |
| # Traditional Regular Expression Syntax | |
| # | |
| #======================================================================= | |
| from __future__ import absolute_import | |
| from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char | |
| from .Errors import PlexError | |
| class RegexpSyntaxError(PlexError): | |
| pass | |
| def re(s): | |
| """ | |
| Convert traditional string representation of regular expression |s| | |
| into Plex representation. | |
| """ | |
| return REParser(s).parse_re() | |
| class REParser(object): | |
| def __init__(self, s): | |
| self.s = s | |
| self.i = -1 | |
| self.end = 0 | |
| self.next() | |
| def parse_re(self): | |
| re = self.parse_alt() | |
| if not self.end: | |
| self.error("Unexpected %s" % repr(self.c)) | |
| return re | |
| def parse_alt(self): | |
| """Parse a set of alternative regexps.""" | |
| re = self.parse_seq() | |
| if self.c == '|': | |
| re_list = [re] | |
| while self.c == '|': | |
| self.next() | |
| re_list.append(self.parse_seq()) | |
| re = Alt(*re_list) | |
| return re | |
| def parse_seq(self): | |
| """Parse a sequence of regexps.""" | |
| re_list = [] | |
| while not self.end and not self.c in "|)": | |
| re_list.append(self.parse_mod()) | |
| return Seq(*re_list) | |
| def parse_mod(self): | |
| """Parse a primitive regexp followed by *, +, ? modifiers.""" | |
| re = self.parse_prim() | |
| while not self.end and self.c in "*+?": | |
| if self.c == '*': | |
| re = Rep(re) | |
| elif self.c == '+': | |
| re = Rep1(re) | |
| else: # self.c == '?' | |
| re = Opt(re) | |
| self.next() | |
| return re | |
| def parse_prim(self): | |
| """Parse a primitive regexp.""" | |
| c = self.get() | |
| if c == '.': | |
| re = AnyBut("\n") | |
| elif c == '^': | |
| re = Bol | |
| elif c == '$': | |
| re = Eol | |
| elif c == '(': | |
| re = self.parse_alt() | |
| self.expect(')') | |
| elif c == '[': | |
| re = self.parse_charset() | |
| self.expect(']') | |
| else: | |
| if c == '\\': | |
| c = self.get() | |
| re = Char(c) | |
| return re | |
| def parse_charset(self): | |
| """Parse a charset. Does not include the surrounding [].""" | |
| char_list = [] | |
| invert = 0 | |
| if self.c == '^': | |
| invert = 1 | |
| self.next() | |
| if self.c == ']': | |
| char_list.append(']') | |
| self.next() | |
| while not self.end and self.c != ']': | |
| c1 = self.get() | |
| if self.c == '-' and self.lookahead(1) != ']': | |
| self.next() | |
| c2 = self.get() | |
| for a in range(ord(c1), ord(c2) + 1): | |
| char_list.append(chr(a)) | |
| else: | |
| char_list.append(c1) | |
| chars = ''.join(char_list) | |
| if invert: | |
| return AnyBut(chars) | |
| else: | |
| return Any(chars) | |
| def next(self): | |
| """Advance to the next char.""" | |
| s = self.s | |
| i = self.i = self.i + 1 | |
| if i < len(s): | |
| self.c = s[i] | |
| else: | |
| self.c = '' | |
| self.end = 1 | |
| def get(self): | |
| if self.end: | |
| self.error("Premature end of string") | |
| c = self.c | |
| self.next() | |
| return c | |
| def lookahead(self, n): | |
| """Look ahead n chars.""" | |
| j = self.i + n | |
| if j < len(self.s): | |
| return self.s[j] | |
| else: | |
| return '' | |
| def expect(self, c): | |
| """ | |
| Expect to find character |c| at current position. | |
| Raises an exception otherwise. | |
| """ | |
| if self.c == c: | |
| self.next() | |
| else: | |
| self.error("Missing %s" % repr(c)) | |
| def error(self, mess): | |
| """Raise exception to signal syntax error in regexp.""" | |
| raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( | |
| repr(self.s), self.i, mess)) | |