|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
|
|
|
|
from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char |
|
|
from .Errors import PlexError |
|
|
|
|
|
|
|
|
class RegexpSyntaxError(PlexError): |
|
|
pass |
|
|
|
|
|
|
|
|
def re(s): |
|
|
""" |
|
|
Convert traditional string representation of regular expression |s| |
|
|
into Plex representation. |
|
|
""" |
|
|
return REParser(s).parse_re() |
|
|
|
|
|
|
|
|
class REParser(object): |
|
|
def __init__(self, s): |
|
|
self.s = s |
|
|
self.i = -1 |
|
|
self.end = 0 |
|
|
self.next() |
|
|
|
|
|
def parse_re(self): |
|
|
re = self.parse_alt() |
|
|
if not self.end: |
|
|
self.error("Unexpected %s" % repr(self.c)) |
|
|
return re |
|
|
|
|
|
def parse_alt(self): |
|
|
"""Parse a set of alternative regexps.""" |
|
|
re = self.parse_seq() |
|
|
if self.c == '|': |
|
|
re_list = [re] |
|
|
while self.c == '|': |
|
|
self.next() |
|
|
re_list.append(self.parse_seq()) |
|
|
re = Alt(*re_list) |
|
|
return re |
|
|
|
|
|
def parse_seq(self): |
|
|
"""Parse a sequence of regexps.""" |
|
|
re_list = [] |
|
|
while not self.end and not self.c in "|)": |
|
|
re_list.append(self.parse_mod()) |
|
|
return Seq(*re_list) |
|
|
|
|
|
def parse_mod(self): |
|
|
"""Parse a primitive regexp followed by *, +, ? modifiers.""" |
|
|
re = self.parse_prim() |
|
|
while not self.end and self.c in "*+?": |
|
|
if self.c == '*': |
|
|
re = Rep(re) |
|
|
elif self.c == '+': |
|
|
re = Rep1(re) |
|
|
else: |
|
|
re = Opt(re) |
|
|
self.next() |
|
|
return re |
|
|
|
|
|
def parse_prim(self): |
|
|
"""Parse a primitive regexp.""" |
|
|
c = self.get() |
|
|
if c == '.': |
|
|
re = AnyBut("\n") |
|
|
elif c == '^': |
|
|
re = Bol |
|
|
elif c == '$': |
|
|
re = Eol |
|
|
elif c == '(': |
|
|
re = self.parse_alt() |
|
|
self.expect(')') |
|
|
elif c == '[': |
|
|
re = self.parse_charset() |
|
|
self.expect(']') |
|
|
else: |
|
|
if c == '\\': |
|
|
c = self.get() |
|
|
re = Char(c) |
|
|
return re |
|
|
|
|
|
def parse_charset(self): |
|
|
"""Parse a charset. Does not include the surrounding [].""" |
|
|
char_list = [] |
|
|
invert = 0 |
|
|
if self.c == '^': |
|
|
invert = 1 |
|
|
self.next() |
|
|
if self.c == ']': |
|
|
char_list.append(']') |
|
|
self.next() |
|
|
while not self.end and self.c != ']': |
|
|
c1 = self.get() |
|
|
if self.c == '-' and self.lookahead(1) != ']': |
|
|
self.next() |
|
|
c2 = self.get() |
|
|
for a in range(ord(c1), ord(c2) + 1): |
|
|
char_list.append(chr(a)) |
|
|
else: |
|
|
char_list.append(c1) |
|
|
chars = ''.join(char_list) |
|
|
if invert: |
|
|
return AnyBut(chars) |
|
|
else: |
|
|
return Any(chars) |
|
|
|
|
|
def next(self): |
|
|
"""Advance to the next char.""" |
|
|
s = self.s |
|
|
i = self.i = self.i + 1 |
|
|
if i < len(s): |
|
|
self.c = s[i] |
|
|
else: |
|
|
self.c = '' |
|
|
self.end = 1 |
|
|
|
|
|
def get(self): |
|
|
if self.end: |
|
|
self.error("Premature end of string") |
|
|
c = self.c |
|
|
self.next() |
|
|
return c |
|
|
|
|
|
def lookahead(self, n): |
|
|
"""Look ahead n chars.""" |
|
|
j = self.i + n |
|
|
if j < len(self.s): |
|
|
return self.s[j] |
|
|
else: |
|
|
return '' |
|
|
|
|
|
def expect(self, c): |
|
|
""" |
|
|
Expect to find character |c| at current position. |
|
|
Raises an exception otherwise. |
|
|
""" |
|
|
if self.c == c: |
|
|
self.next() |
|
|
else: |
|
|
self.error("Missing %s" % repr(c)) |
|
|
|
|
|
def error(self, mess): |
|
|
"""Raise exception to signal syntax error in regexp.""" |
|
|
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( |
|
|
repr(self.s), self.i, mess)) |
|
|
|
|
|
|
|
|
|
|
|
|