Spaces:
Runtime error
Runtime error
| from operator import itemgetter; | |
| import os.path; | |
| import re; | |
| import sys; | |
| from graph import Graph; | |
| conditions = {"APX": "โ", "EQU": "=", "LEQ": "โค", "LES": "<", "NEQ": "โ ", | |
| "SXN": "ยซ", "SXP": "ยป", "SXY": "โ", "SZN": "\\", "SZP": "/", | |
| "STI": "โ", "STO": "โ", "SY1": "โฅ", "SY2": "โฎ", | |
| "TAB": "โ", "TPR": "โบ"}; | |
| # | |
| # in parsing the clauses, patterns are ordered by specificity | |
| # | |
| id_matcher = re.compile(r'^%%% bin/boxer --input (?:[^/]+/)?p([0-9]+)/d([0-9]+)/'); | |
| referent_matcher = re.compile(r'^(b[0-9]+) REF ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); | |
| condition_matcher = re.compile(r'^(b[0-9]+) (EQU|NEQ|APX|LE[SQ]|TPR|TAB|S[ZX][PN]|ST[IO]|SY[12]|SXY) ([enpstx][0-9]+|"[^"]+") ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); | |
| role_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ([enpstx][0-9]+) ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); | |
| concept_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ("[^ ]+") ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); | |
| discourse_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) (b[0-9]+)(?: (b[0-9]+))? +%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$'); | |
| empty_matcher = re.compile(r'^ *%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$'); | |
| def read(fp, text = None, full = False, reify = False, trace = 0, strict = 0): | |
| def finish(graph, mapping, finis, scopes): | |
| if reify: | |
| for box, referent, node in finis: | |
| # | |
| # in full reification mode, or when the corresponding box cannot be | |
| # easily inferred for a reified role (including when the source node is | |
| # a constant, as e.g. in a 'future' temporal discourse conditions), | |
| # add an explicit box membership edge. | |
| # | |
| if full \ | |
| or referent[0] == referent[-1] == "\"" \ | |
| or box not in scopes[referent]: | |
| graph.add_edge(mapping[box].id, node.id, "โ"); | |
| else: | |
| for referent in scopes: | |
| if len(scopes[referent]) > 1: | |
| print("pbm.read(): [graph #{}] stray referent โ{}โ in boxes {}." | |
| "".format(graph.id, referent, scopes[referent]), | |
| file=sys.stderr); | |
| # | |
| # after the fact, mark all boxes that structurally are roots as top nodes. | |
| # | |
| for node in graph.nodes: | |
| if node.type == 0 and node.is_root(): node.is_top = True; | |
| graph = None; id = None; sentence = None; | |
| mapping = dict(); scopes = dict(); finis = list(); | |
| i = 0; | |
| header = 3; | |
| for line in fp: | |
| line = line.rstrip(); i += 1; | |
| if trace: print("{}: {}".format(i, line)); | |
| # | |
| # to support newline-separated concatenations of clause files (a format not | |
| # used in the native PMB 3.0 release), | |
| # | |
| if len(line) == 0: | |
| finish(graph, mapping, finis, scopes); | |
| yield graph, None; | |
| graph = None; id = None; | |
| mapping = dict(); scopes = dict(); finis = list(); | |
| header = 3; | |
| continue; | |
| # | |
| # each block of clauses is preceded by three comment lines, which we use to | |
| # extract the sentence identifier and underlying string. | |
| # | |
| if header: | |
| if header == 3: pass; | |
| elif header == 2: | |
| match = id_matcher.match(line); | |
| if match is None: | |
| raise Exception("pbm.read(): " | |
| "[line {}] missing identifier in โ{}โ; exit." | |
| "".format(i, line)); | |
| part, document = match.groups(); | |
| id = "{:02d}{:04d}".format(int(part), int(document)); | |
| elif header == 1: | |
| if text is not None and id in text: sentence = text[id]; | |
| else: sentence = line[5:-1]; | |
| graph = Graph(id, flavor = 2, framework = "drg"); | |
| graph.add_input(sentence); | |
| header -= 1; | |
| continue; | |
| # | |
| # from here onwards, we are looking at genuine, contentful clauses. from | |
| # inspecting some of the files, it appears they are organized according to | |
| # surface (reading) order, and we cannot assume that discourse referents | |
| # are 'introduced' (in some box) prior to their first occurance in e.g. a | |
| # role or concept clause. | |
| # | |
| anchor = None; | |
| match = referent_matcher.match(line); | |
| if match is not None: | |
| box, referent, start, end = match.groups(); | |
| if referent in scopes: | |
| if strict and box not in scopes[referent] and reify: | |
| raise Exception("pbm.read(): " | |
| "[line {}] stray referent โ{}โ in box โ{}โ " | |
| "(instead of โ{}โ); exit." | |
| "".format(i, referent, box, scopes[referent])); | |
| else: scopes[referent] = {box}; | |
| if box not in mapping: mapping[box] = graph.add_node(type = 0); | |
| if start is not None and end is not None: | |
| anchor = {"from": int(start), "to": int(end)}; | |
| if referent not in mapping: | |
| mapping[referent] \ | |
| = graph.add_node(anchors = [anchor] if anchor else None); | |
| else: | |
| node = mapping[referent]; | |
| node.add_anchor(anchor); | |
| graph.add_edge(mapping[box].id, mapping[referent].id, "โ"); | |
| else: | |
| match = condition_matcher.match(line); | |
| if match is not None: | |
| box, condition, source, target, start, end = match.groups(); | |
| condition = conditions[condition]; | |
| if source[0] == "\"" and source[-1] == "\"" and source not in mapping: | |
| if start is not None and end is not None: | |
| anchor = {"from": int(start), "to": int(end)}; | |
| mapping[source] \ | |
| = graph.add_node(label = source, | |
| anchors = [anchor] if anchor else None); | |
| elif source not in mapping: mapping[source] = graph.add_node(); | |
| if target[0] == "\"" and target[-1] == "\"" and target not in mapping: | |
| if start is not None and end is not None: | |
| anchor = {"from": int(start), "to": int(end)}; | |
| mapping[target] \ | |
| = graph.add_node(label = target, | |
| anchors = [anchor] if anchor else None); | |
| elif target not in mapping: mapping[target] = graph.add_node(); | |
| if reify: | |
| if box not in mapping: mapping[box] = graph.add_node(type = 0); | |
| node = graph.add_node(label = condition, type = 3); | |
| finis.append((box, source, node)); | |
| graph.add_edge(mapping[source].id, node.id, None); | |
| graph.add_edge(node.id, mapping[target].id, None); | |
| else: | |
| if source in scopes: scopes[source].add(box); | |
| else: scopes[source] = {box}; | |
| graph.add_edge(mapping[source].id, mapping[target].id, condition); | |
| else: | |
| match = role_matcher.match(line); | |
| if match is not None: | |
| box, role, source, target, start, end = match.groups(); | |
| if source not in mapping: mapping[source] = graph.add_node(); | |
| if target[0] == "\"" and target[-1] == "\"" and target not in mapping: | |
| if start is not None and end is not None: | |
| anchor = {"from": int(start), "to": int(end)}; | |
| mapping[target] \ | |
| = graph.add_node(label = target, | |
| anchors = [anchor] if anchor else None); | |
| elif target not in mapping: mapping[target] = graph.add_node(); | |
| if reify: | |
| if box not in mapping: mapping[box] = graph.add_node(type = 0); | |
| node = graph.add_node(label = role, type = 2); | |
| finis.append((box, source, node)); | |
| graph.add_edge(mapping[source].id, node.id, None); | |
| graph.add_edge(node.id, mapping[target].id, None); | |
| else: | |
| if source in scopes: scopes[source].add(box); | |
| else: scopes[source] = {box}; | |
| graph.add_edge(mapping[source].id, mapping[target].id, role); | |
| else: | |
| match = concept_matcher.match(line); | |
| if match is not None: | |
| box, lemma, sense, referent, start, end = match.groups(); | |
| if referent in scopes: | |
| if strict and box not in scopes[referent] and reify: | |
| raise Exception("pbm.read(): " | |
| "[line {}] stray referent โ{}โ in box โ{}โ " | |
| "(instead of โ{}โ); exit." | |
| "".format(i, referent, box, scopes[referent])); | |
| else: scopes[referent] = {box}; | |
| if start is not None and end is not None: | |
| anchor = {"from": int(start), "to": int(end)}; | |
| if referent not in mapping: | |
| mapping[referent] = node \ | |
| = graph.add_node(anchors = [anchor] if anchor else None); | |
| else: | |
| node = mapping[referent]; | |
| node.add_anchor(anchor); | |
| if strict and node.label is not None: | |
| raise Exception("pbm.read(): " | |
| "[line {}] duplicate label โ{}โ on referent โ{}โ " | |
| "(instead of โ{}โ); exit." | |
| "".format(i, lemma, referent, node.label)); | |
| node.label = lemma; | |
| if sense[0] == sense[-1] == "\"": sense = sense[1:-1]; | |
| node.set_property("sense", sense); | |
| else: | |
| match = discourse_matcher.match(line); | |
| if match is not None: | |
| top, relation, one, two = match.groups(); | |
| if one not in mapping: mapping[one] = graph.add_node(type = 0); | |
| if two is not None: | |
| if trace > 1: print("ternary discourse relation"); | |
| if two not in mapping: mapping[two] = graph.add_node(type = 0); | |
| graph.add_edge(mapping[one].id, mapping[two].id, relation); | |
| else: | |
| if top not in mapping: mapping[top] = graph.add_node(type = 0); | |
| graph.add_edge(mapping[top].id, mapping[one].id, relation); | |
| elif empty_matcher.search(line) is None: | |
| raise Exception("pmb.read(): [line {}] invalid clause โ{}โ." | |
| "".format(i, line)); | |
| # | |
| # finally, as we reach an end of file (without an empty line terminating the | |
| # preceding block of clauses, as is the standard format in PMB), finalize the | |
| # graph and return it. | |
| # | |
| if graph is not None: | |
| finish(graph, mapping, finis, scopes); | |
| yield graph, None; | |