Spaces:
Runtime error
Runtime error
| """ | |
| pygments.regexopt | |
| ~~~~~~~~~~~~~~~~~ | |
| An algorithm that generates optimized regexes for matching long lists of | |
| literal strings. | |
| :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. | |
| :license: BSD, see LICENSE for details. | |
| """ | |
| import re | |
| from re import escape | |
| from os.path import commonprefix | |
| from itertools import groupby | |
| from operator import itemgetter | |
| CS_ESCAPE = re.compile(r'[\[\^\\\-\]]') | |
| FIRST_ELEMENT = itemgetter(0) | |
| def make_charset(letters): | |
| return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']' | |
| def regex_opt_inner(strings, open_paren): | |
| """Return a regex that matches any string in the sorted list of strings.""" | |
| close_paren = open_paren and ')' or '' | |
| # print strings, repr(open_paren) | |
| if not strings: | |
| # print '-> nothing left' | |
| return '' | |
| first = strings[0] | |
| if len(strings) == 1: | |
| # print '-> only 1 string' | |
| return open_paren + escape(first) + close_paren | |
| if not first: | |
| # print '-> first string empty' | |
| return open_paren + regex_opt_inner(strings[1:], '(?:') \ | |
| + '?' + close_paren | |
| if len(first) == 1: | |
| # multiple one-char strings? make a charset | |
| oneletter = [] | |
| rest = [] | |
| for s in strings: | |
| if len(s) == 1: | |
| oneletter.append(s) | |
| else: | |
| rest.append(s) | |
| if len(oneletter) > 1: # do we have more than one oneletter string? | |
| if rest: | |
| # print '-> 1-character + rest' | |
| return open_paren + regex_opt_inner(rest, '') + '|' \ | |
| + make_charset(oneletter) + close_paren | |
| # print '-> only 1-character' | |
| return open_paren + make_charset(oneletter) + close_paren | |
| prefix = commonprefix(strings) | |
| if prefix: | |
| plen = len(prefix) | |
| # we have a prefix for all strings | |
| # print '-> prefix:', prefix | |
| return open_paren + escape(prefix) \ | |
| + regex_opt_inner([s[plen:] for s in strings], '(?:') \ | |
| + close_paren | |
| # is there a suffix? | |
| strings_rev = [s[::-1] for s in strings] | |
| suffix = commonprefix(strings_rev) | |
| if suffix: | |
| slen = len(suffix) | |
| # print '-> suffix:', suffix[::-1] | |
| return open_paren \ | |
| + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \ | |
| + escape(suffix[::-1]) + close_paren | |
| # recurse on common 1-string prefixes | |
| # print '-> last resort' | |
| return open_paren + \ | |
| '|'.join(regex_opt_inner(list(group[1]), '') | |
| for group in groupby(strings, lambda s: s[0] == first[0])) \ | |
| + close_paren | |
| def regex_opt(strings, prefix='', suffix=''): | |
| """Return a compiled regex that matches any string in the given list. | |
| The strings to match must be literal strings, not regexes. They will be | |
| regex-escaped. | |
| *prefix* and *suffix* are pre- and appended to the final regex. | |
| """ | |
| strings = sorted(strings) | |
| return prefix + regex_opt_inner(strings, '(') + suffix | |