Spaces:
Runtime error
Runtime error
| import re; | |
| import sys; | |
| import codec.mrp; | |
| from graph import Edge, Graph; | |
| from smatch.amr import AMR; | |
| STASH = re.compile(r'__[0-9]+__'); | |
| INDEX = re.compile(r'x([0-9]+)((:?_[0-9]+)*)'); | |
| def amr_lines(fp, camr, alignment): | |
| id, snt, lines = None, None, []; | |
| stash = dict(); | |
| def _stash_(match): | |
| prefix, constant, suffix = match.groups(); | |
| fields = constant.split("/"); | |
| if fields[0] in stash: | |
| if stash[fields[0]][2] != fields[1]: | |
| raise Exception("amr_lines(): " | |
| "ambiguously defined constant in graph #{}, " | |
| "β{}β: β{}β vs. β{}β; exit." | |
| "".format(id, fields[0], | |
| stash[fields[0]][2], fields[1])); | |
| else: | |
| stash[fields[0]] = (len(stash), fields[0], fields[1]); | |
| return "{}__{}__{}".format(prefix, stash[fields[0]][0], suffix); | |
| alignment = read_alignment(alignment); | |
| for line in fp: | |
| line = line.strip(); | |
| if len(line) == 0: | |
| if len(lines) > 0: | |
| i = mapping = None; | |
| try: | |
| i, mapping = next(alignment); | |
| except Exception as error: | |
| print("amr_lines(): missing alignment for graph #{}." | |
| "".format(id), file = sys.stderr); | |
| pass; | |
| yield id, snt, " ".join(lines), stash.values(), \ | |
| mapping if mapping is not None and i == id else None; | |
| id, lines = None, []; stash.clear(); | |
| else: | |
| if line.startswith("#"): | |
| if line.startswith("# ::id"): | |
| id = line.split()[2]; | |
| if line.startswith("# ::snt"): | |
| snt = line[8:].strip(); | |
| else: | |
| if camr: | |
| line = re.sub(r'((?:^|[ \t]):[^( ]+)\([^ \t]*\)([ \t]|$)', | |
| "\\1\\2", line, count = 0); | |
| line = re.sub(r'(^|[ \t])(x[0-9]+/[^ \t]+)([ \t]|$)', | |
| _stash_, line, count = 0); | |
| lines.append(line) | |
| if len(lines) > 0: | |
| i = mapping = None; | |
| try: | |
| i, mapping = next(alignment); | |
| except: | |
| print("amr_lines(): missing alignment for graph #{}." | |
| "".format(id), file = sys.stderr); | |
| pass; | |
| yield id, snt, " ".join(lines), stash.values(), \ | |
| mapping if mapping is not None and i == id else None; | |
| def read_alignment(stream): | |
| if stream is None: | |
| while True: yield None, None; | |
| else: | |
| id = None; | |
| alignment = dict(); | |
| for line in stream: | |
| line = line.strip(); | |
| if len(line) == 0: | |
| yield id, alignment; | |
| id = None; | |
| alignment.clear(); | |
| else: | |
| if line.startswith("#"): | |
| if line.startswith("# ::id"): | |
| id = line.split()[2]; | |
| else: | |
| fields = line.split("\t"); | |
| if len(fields) == 2: | |
| start, end = fields[1].split("-"); | |
| span = set(range(int(start), int(end) + 1)); | |
| fields = fields[0].split(); | |
| if len(fields) > 1 and fields[1].startswith(":"): | |
| fields[1] = fields[1][1:]; | |
| if fields[1] == "wiki": continue; | |
| if fields[0] not in alignment: | |
| alignment[fields[0]] = bucket = dict(); | |
| else: bucket = alignment[fields[0]]; | |
| path = tuple(fields[1:]); | |
| if path not in bucket: bucket[path] = can = set(); | |
| else: can = bucket[path]; | |
| can |= span; | |
| yield id, alignment; | |
| def amr2graph(id, amr, text, stash, camr = False, | |
| full = False, reify = False, quiet = False, alignment = None): | |
| graph = Graph(id, flavor = 2, framework = "amr"); | |
| node2id = dict(); | |
| anchoring = list(); | |
| i = 0; | |
| def _anchor_(form): | |
| nonlocal i; | |
| m = None; | |
| j = graph.input.find(form, i); | |
| if j >= i: | |
| i, m = j, len(form); | |
| else: | |
| base = form; | |
| k, l = len(graph.input), 0; | |
| for old, new in {("β", "`"), ("β", "'"), ("β", "'"), ("`", "'"), | |
| ("β", "\""), ("β", "\""), | |
| ("β", "--"), ("β", "---"), ("β", "---"), | |
| ("β¦", "..."), ("β¦", ". . .")}: | |
| form = base.replace(old, new); | |
| j = graph.input.find(form, i); | |
| if j >= i and j < k: k, l = j, len(form); | |
| if k < len(graph.input): i, m = k, l; | |
| if m: | |
| match = {"from": i, "to": i + m}; | |
| i += m; | |
| return match; | |
| else: | |
| raise Exception("failed to anchor |{}| in |{}|{}| ({})" | |
| "".format(form, graph.input[:i], | |
| graph.input[i:], i)); | |
| if text: | |
| graph.add_input(text, quiet = quiet); | |
| if camr: | |
| for token in graph.input.split(" "): | |
| anchoring.append(_anchor_(token)); | |
| i = 0; | |
| for n, v, a in zip(amr.nodes, amr.node_values, amr.attributes): | |
| j = i; | |
| node2id[n] = j; | |
| top = False; | |
| for key, val in a: | |
| if key == "TOP": | |
| top = True; | |
| anchors = find_anchors(n, anchoring) if camr else None; | |
| node = graph.add_node(j, label = v, top = top, anchors = anchors); | |
| i += 1 | |
| for key, val in a: | |
| if STASH.match(val) is not None: | |
| index = int(val[2:-2]); | |
| val = next(v for k, x, v in stash if k == index); | |
| if key != "TOP" and (key not in {"wiki"} or full): | |
| if val.endswith("Β¦"): | |
| val = val[:-1]; | |
| if reify: | |
| graph.add_node(i, label = val); | |
| graph.add_edge(j, i, key); | |
| i += 1 | |
| else: | |
| # | |
| # _fix_me_ | |
| # this assumes that properties are unique. (1-apr-20; oe) | |
| # | |
| node.set_property(key.lower(), str(val).lower()); | |
| for src, r in zip(amr.nodes, amr.relations): | |
| for label, tgt in r: | |
| normal = None; | |
| if label == "mod": | |
| normal = "domain"; | |
| elif label.endswith("-of-of") \ | |
| or label.endswith("-of") \ | |
| and label not in {"consist-of" "subset-of"} \ | |
| and not label.startswith("prep-"): | |
| normal = label[:-3]; | |
| graph.add_edge(node2id[src], node2id[tgt], label, normal) | |
| overlay = None; | |
| if alignment is not None: | |
| overlay = Graph(id, flavor = -1, framework = "anchoring"); | |
| for node in alignment: | |
| for path, span in alignment[node].items(): | |
| if len(path) == 0: | |
| anchors = [{"#": token} for token in span]; | |
| node = overlay.add_node(node2id[node], anchors = anchors); | |
| for node in alignment: | |
| id = node2id[node]; | |
| for path, span in alignment[node].items(): | |
| if len(path) == 1: | |
| key = path[0].lower(); | |
| node = overlay.find_node(id); | |
| if node is None: node = overlay.add_node(id); | |
| reference = graph.find_node(id); | |
| anchors = [{"#": token} for token in span]; | |
| if reference.properties is not None \ | |
| and key in reference.properties: | |
| node.set_anchoring(key, anchors); | |
| else: | |
| edge = next(edge for edge in graph.edges if edge.lab.lower() == key and edge.src == id); | |
| overlay.edges.add(Edge(edge.id, None, None, None, anchors = anchors)); | |
| elif len(path) > 1: | |
| print("amr2graph(): " | |
| "ignoring alignment path {} on node #{} ({})" | |
| "".format(path, id, node)); | |
| return graph, overlay; | |
| def find_anchors(index, anchors): | |
| result = list(); | |
| for match in INDEX.finditer(index): | |
| i, suffix = match.group(1), match.group(2); | |
| i = int(i) - 1; | |
| if i >= len(anchors): continue; | |
| anchor = anchors[i]; | |
| if suffix != "": | |
| fields = suffix[1:].split("_"); | |
| start = anchor["from"]; | |
| for field in fields: | |
| j = int(field); | |
| result.append({"from": start + j - 1, "to": start + j}); | |
| else: | |
| result.append(anchor); | |
| return result if len(result) > 0 else None; | |
| def convert_amr_id(id): | |
| m = re.search(r'wsj_([0-9]+)\.([0-9]+)', id); | |
| if m: | |
| return "2%04d%03d" % (int(m.group(1)), int(m.group(2))); | |
| m = re.search(r'lpp_1943\.([0-9]+)', id); | |
| if m: | |
| return "1%04d0" % (int(m.group(1))); | |
| else: | |
| raise Exception('Could not convert id: %s' % id); | |
| def read(fp, full = False, reify = False, camr = False, | |
| text = None, alignment = None, | |
| quiet = False, trace = 0): | |
| n = 0; | |
| for id, snt, amr_line, stash, mapping in amr_lines(fp, camr, alignment): | |
| if trace: | |
| print("{}: {}".format(id, amr_line), file = sys.stderr); | |
| amr = AMR.parse_AMR_line(amr_line); | |
| if not amr: | |
| raise Exception("failed to parse #{} β{}β; exit." | |
| "".format(id, amr_line)); | |
| if id is not None: | |
| try: | |
| id = convert_amr_id(id); | |
| except: | |
| pass; | |
| else: | |
| id = n; | |
| n += 1; | |
| graph, overlay = amr2graph(id, amr, text or snt, stash, | |
| camr, full, reify, quiet, mapping); | |
| yield graph, overlay; | |