Buckets:
MisterAI/LocalAI_Demo_backends / cpu-diffusers.upgrade-tmp /venv /lib /python3.10 /site-packages /lark /tools /nearley.py
| "Converts Nearley grammars to Lark" | |
| import os.path | |
| import sys | |
| import codecs | |
| import argparse | |
| from lark import Lark, Transformer, v_args | |
| nearley_grammar = r""" | |
| start: (ruledef|directive)+ | |
| directive: "@" NAME (STRING|NAME) | |
| | "@" JS -> js_code | |
| ruledef: NAME "->" expansions | |
| | NAME REGEXP "->" expansions -> macro | |
| expansions: expansion ("|" expansion)* | |
| expansion: expr+ js | |
| ?expr: item (":" /[+*?]/)? | |
| ?item: rule|string|regexp|null | |
| | "(" expansions ")" | |
| rule: NAME | |
| string: STRING | |
| regexp: REGEXP | |
| null: "null" | |
| JS: /{%.*?%}/s | |
| js: JS? | |
| NAME: /[a-zA-Z_$]\w*/ | |
| COMMENT: /#[^\n]*/ | |
| REGEXP: /\[.*?\]/ | |
| STRING: _STRING "i"? | |
| %import common.ESCAPED_STRING -> _STRING | |
| %import common.WS | |
| %ignore WS | |
| %ignore COMMENT | |
| """ | |
| nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic') | |
| def _get_rulename(name): | |
| name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name) | |
| return 'n_' + name.replace('$', '__DOLLAR__').lower() | |
| class NearleyToLark(Transformer): | |
| def __init__(self): | |
| self._count = 0 | |
| self.extra_rules = {} | |
| self.extra_rules_rev = {} | |
| self.alias_js_code = {} | |
| def _new_function(self, code): | |
| name = 'alias_%d' % self._count | |
| self._count += 1 | |
| self.alias_js_code[name] = code | |
| return name | |
| def _extra_rule(self, rule): | |
| if rule in self.extra_rules_rev: | |
| return self.extra_rules_rev[rule] | |
| name = 'xrule_%d' % len(self.extra_rules) | |
| assert name not in self.extra_rules | |
| self.extra_rules[name] = rule | |
| self.extra_rules_rev[rule] = name | |
| return name | |
| def rule(self, name): | |
| return _get_rulename(name) | |
| def ruledef(self, name, exps): | |
| return '!%s: %s' % (_get_rulename(name), exps) | |
| def expr(self, item, op): | |
| rule = '(%s)%s' % (item, op) | |
| return self._extra_rule(rule) | |
| def regexp(self, r): | |
| return '/%s/' % r | |
| def null(self): | |
| return '' | |
| def string(self, s): | |
| return self._extra_rule(s) | |
| def expansion(self, *x): | |
| x, js = x[:-1], x[-1] | |
| if js.children: | |
| js_code ,= js.children | |
| js_code = js_code[2:-2] | |
| alias = '-> ' + self._new_function(js_code) | |
| else: | |
| alias = '' | |
| return ' '.join(x) + alias | |
| def expansions(self, *x): | |
| return '%s' % ('\n |'.join(x)) | |
| def start(self, *rules): | |
| return '\n'.join(filter(None, rules)) | |
| def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes): | |
| rule_defs = [] | |
| tree = nearley_grammar_parser.parse(g) | |
| for statement in tree.children: | |
| if statement.data == 'directive': | |
| directive, arg = statement.children | |
| if directive in ('builtin', 'include'): | |
| folder = builtin_path if directive == 'builtin' else folder_path | |
| path = os.path.join(folder, arg[1:-1]) | |
| if path not in includes: | |
| includes.add(path) | |
| with codecs.open(path, encoding='utf8') as f: | |
| text = f.read() | |
| rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes) | |
| else: | |
| assert False, directive | |
| elif statement.data == 'js_code': | |
| code ,= statement.children | |
| code = code[2:-2] | |
| js_code.append(code) | |
| elif statement.data == 'macro': | |
| pass # TODO Add support for macros! | |
| elif statement.data == 'ruledef': | |
| rule_defs.append(n2l.transform(statement)) | |
| else: | |
| raise Exception("Unknown statement: %s" % statement) | |
| return rule_defs | |
| def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False): | |
| import js2py | |
| emit_code = [] | |
| def emit(x=None): | |
| if x: | |
| emit_code.append(x) | |
| emit_code.append('\n') | |
| js_code = ['function id(x) {return x[0];}'] | |
| n2l = NearleyToLark() | |
| rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set()) | |
| lark_g = '\n'.join(rule_defs) | |
| lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) | |
| emit('from lark import Lark, Transformer') | |
| emit() | |
| emit('grammar = ' + repr(lark_g)) | |
| emit() | |
| for alias, code in n2l.alias_js_code.items(): | |
| js_code.append('%s = (%s);' % (alias, code)) | |
| if es6: | |
| emit(js2py.translate_js6('\n'.join(js_code))) | |
| else: | |
| emit(js2py.translate_js('\n'.join(js_code))) | |
| emit('class TransformNearley(Transformer):') | |
| for alias in n2l.alias_js_code: | |
| emit(" %s = var.get('%s').to_python()" % (alias, alias)) | |
| emit(" __default__ = lambda self, n, c, m: c if c else None") | |
| emit() | |
| emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start) | |
| emit('def parse(text):') | |
| emit(' return TransformNearley().transform(parser.parse(text))') | |
| return ''.join(emit_code) | |
| def main(fn, start, nearley_lib, es6=False): | |
| with codecs.open(fn, encoding='utf8') as f: | |
| grammar = f.read() | |
| return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6) | |
| def get_arg_parser(): | |
| parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.') | |
| parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar') | |
| parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule') | |
| parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)') | |
| parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true') | |
| return parser | |
| if __name__ == '__main__': | |
| parser = get_arg_parser() | |
| if len(sys.argv) == 1: | |
| parser.print_help(sys.stderr) | |
| sys.exit(1) | |
| args = parser.parse_args() | |
| print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6)) | |
Xet Storage Details
- Size:
- 6.27 kB
- Xet hash:
- c44361a1b61694ab52014c6dfdadebb727f9507ae66d4967b3cc6114661c965c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.