| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | from __future__ import absolute_import |
| |
|
| | from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char |
| | from .Errors import PlexError |
| |
|
| |
|
| | class RegexpSyntaxError(PlexError): |
| | pass |
| |
|
| |
|
| | def re(s): |
| | """ |
| | Convert traditional string representation of regular expression |s| |
| | into Plex representation. |
| | """ |
| | return REParser(s).parse_re() |
| |
|
| |
|
| | class REParser(object): |
| | def __init__(self, s): |
| | self.s = s |
| | self.i = -1 |
| | self.end = 0 |
| | self.next() |
| |
|
| | def parse_re(self): |
| | re = self.parse_alt() |
| | if not self.end: |
| | self.error("Unexpected %s" % repr(self.c)) |
| | return re |
| |
|
| | def parse_alt(self): |
| | """Parse a set of alternative regexps.""" |
| | re = self.parse_seq() |
| | if self.c == '|': |
| | re_list = [re] |
| | while self.c == '|': |
| | self.next() |
| | re_list.append(self.parse_seq()) |
| | re = Alt(*re_list) |
| | return re |
| |
|
| | def parse_seq(self): |
| | """Parse a sequence of regexps.""" |
| | re_list = [] |
| | while not self.end and not self.c in "|)": |
| | re_list.append(self.parse_mod()) |
| | return Seq(*re_list) |
| |
|
| | def parse_mod(self): |
| | """Parse a primitive regexp followed by *, +, ? modifiers.""" |
| | re = self.parse_prim() |
| | while not self.end and self.c in "*+?": |
| | if self.c == '*': |
| | re = Rep(re) |
| | elif self.c == '+': |
| | re = Rep1(re) |
| | else: |
| | re = Opt(re) |
| | self.next() |
| | return re |
| |
|
| | def parse_prim(self): |
| | """Parse a primitive regexp.""" |
| | c = self.get() |
| | if c == '.': |
| | re = AnyBut("\n") |
| | elif c == '^': |
| | re = Bol |
| | elif c == '$': |
| | re = Eol |
| | elif c == '(': |
| | re = self.parse_alt() |
| | self.expect(')') |
| | elif c == '[': |
| | re = self.parse_charset() |
| | self.expect(']') |
| | else: |
| | if c == '\\': |
| | c = self.get() |
| | re = Char(c) |
| | return re |
| |
|
| | def parse_charset(self): |
| | """Parse a charset. Does not include the surrounding [].""" |
| | char_list = [] |
| | invert = 0 |
| | if self.c == '^': |
| | invert = 1 |
| | self.next() |
| | if self.c == ']': |
| | char_list.append(']') |
| | self.next() |
| | while not self.end and self.c != ']': |
| | c1 = self.get() |
| | if self.c == '-' and self.lookahead(1) != ']': |
| | self.next() |
| | c2 = self.get() |
| | for a in range(ord(c1), ord(c2) + 1): |
| | char_list.append(chr(a)) |
| | else: |
| | char_list.append(c1) |
| | chars = ''.join(char_list) |
| | if invert: |
| | return AnyBut(chars) |
| | else: |
| | return Any(chars) |
| |
|
| | def next(self): |
| | """Advance to the next char.""" |
| | s = self.s |
| | i = self.i = self.i + 1 |
| | if i < len(s): |
| | self.c = s[i] |
| | else: |
| | self.c = '' |
| | self.end = 1 |
| |
|
| | def get(self): |
| | if self.end: |
| | self.error("Premature end of string") |
| | c = self.c |
| | self.next() |
| | return c |
| |
|
| | def lookahead(self, n): |
| | """Look ahead n chars.""" |
| | j = self.i + n |
| | if j < len(self.s): |
| | return self.s[j] |
| | else: |
| | return '' |
| |
|
| | def expect(self, c): |
| | """ |
| | Expect to find character |c| at current position. |
| | Raises an exception otherwise. |
| | """ |
| | if self.c == c: |
| | self.next() |
| | else: |
| | self.error("Missing %s" % repr(c)) |
| |
|
| | def error(self, mess): |
| | """Raise exception to signal syntax error in regexp.""" |
| | raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( |
| | repr(self.s), self.i, mess)) |
| |
|
| |
|
| |
|
| |
|