| import json; | |
| import operator; | |
| import os; | |
| import sys; | |
| from graph import Graph | |
| def read(fp, text = None, robust = False): | |
| input, i = None, 0; | |
| def compute(form): | |
| nonlocal i; | |
| m = None; | |
| j = input.find(form, i); | |
| if j >= i: | |
| i, m = j, len(form); | |
| else: | |
| base = form; | |
| k, l = len(input), 0; | |
| for old, new in {("β", "`"), ("β", "'"), ("β", "'"), ("`", "'"), | |
| ("β", "\""), ("β", "\""), | |
| ("β", "--"), ("β", "---"), ("β", "---"), | |
| ("β¦", "..."), ("β¦", ". . .")}: | |
| form = base.replace(old, new); | |
| j = input.find(form, i); | |
| if j >= i and j < k: k, l = j, len(form); | |
| if k < len(input): i, m = k, l; | |
| if m: | |
| match = {"from": i, "to": i + m}; | |
| i += m; | |
| return match; | |
| else: | |
| raise Exception("failed to anchor |{}| in |{}|{}| ({})" | |
| "".format(form, input[:i], input[i:], i)); | |
| def anchor(graph, old, new): | |
| nonlocal input, i; | |
| strings = dict(); | |
| for node in graph.nodes: | |
| for j in range(len(node.anchors) if node.anchors else 0): | |
| start, end = node.anchors[j]["from"], node.anchors[j]["to"]; | |
| strings[(start, end)] = old[start:end]; | |
| input, i = new, 0; | |
| for key in sorted(strings.keys(), key = operator.itemgetter(0, 1)): | |
| strings[key] = compute(strings[key]); | |
| for node in graph.nodes: | |
| for j in range(len(node.anchors) if node.anchors else 0): | |
| node.anchors[j] \ | |
| = strings[(node.anchors[j]["from"], node.anchors[j]["to"])]; | |
| for j, line in enumerate(fp): | |
| try: | |
| graph = Graph.decode(json.loads(line.rstrip()), robust = robust); | |
| if text is not None: | |
| if graph.input in text: | |
| graph.id = text[graph.input]; | |
| else: | |
| old = graph.input; | |
| graph.add_input(text); | |
| anchor(graph, old, graph.input); | |
| yield graph, None; | |
| except Exception as error: | |
| print("codec.mrp.read(): ignoring line {}: {}" | |
| "".format(j, error), file = sys.stderr); | |