| from operator import itemgetter; |
| import os.path; |
| import re; |
| import sys; |
|
|
| from graph import Graph; |
|
|
| conditions = {"APX": "โ", "EQU": "=", "LEQ": "โค", "LES": "<", "NEQ": "โ ", |
| "SXN": "ยซ", "SXP": "ยป", "SXY": "โ", "SZN": "\\", "SZP": "/", |
| "STI": "โ", "STO": "โ", "SY1": "โฅ", "SY2": "โฎ", |
| "TAB": "โ", "TPR": "โบ"}; |
|
|
| |
| |
| |
| id_matcher = re.compile(r'^%%% bin/boxer --input (?:[^/]+/)?p([0-9]+)/d([0-9]+)/'); |
| referent_matcher = re.compile(r'^(b[0-9]+) REF ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
| condition_matcher = re.compile(r'^(b[0-9]+) (EQU|NEQ|APX|LE[SQ]|TPR|TAB|S[ZX][PN]|ST[IO]|SY[12]|SXY) ([enpstx][0-9]+|"[^"]+") ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
| role_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ([enpstx][0-9]+) ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
| concept_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ("[^ ]+") ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$'); |
| discourse_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) (b[0-9]+)(?: (b[0-9]+))? +%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$'); |
| empty_matcher = re.compile(r'^ *%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$'); |
|
|
| def read(fp, text = None, full = False, reify = False, trace = 0, strict = 0): |
|
|
| def finish(graph, mapping, finis, scopes): |
| if reify: |
| for box, referent, node in finis: |
| |
| |
| |
| |
| |
| |
| if full \ |
| or referent[0] == referent[-1] == "\"" \ |
| or box not in scopes[referent]: |
| graph.add_edge(mapping[box].id, node.id, "โ"); |
| else: |
| for referent in scopes: |
| if len(scopes[referent]) > 1: |
| print("pbm.read(): [graph #{}] stray referent โ{}โ in boxes {}." |
| "".format(graph.id, referent, scopes[referent]), |
| file=sys.stderr); |
| |
| |
| |
| for node in graph.nodes: |
| if node.type == 0 and node.is_root(): node.is_top = True; |
| |
| graph = None; id = None; sentence = None; |
| mapping = dict(); scopes = dict(); finis = list(); |
| i = 0; |
| header = 3; |
| for line in fp: |
| line = line.rstrip(); i += 1; |
| if trace: print("{}: {}".format(i, line)); |
| |
| |
| |
| |
| if len(line) == 0: |
| finish(graph, mapping, finis, scopes); |
| yield graph, None; |
| graph = None; id = None; |
| mapping = dict(); scopes = dict(); finis = list(); |
| header = 3; |
| continue; |
| |
| |
| |
| |
| if header: |
| if header == 3: pass; |
| elif header == 2: |
| match = id_matcher.match(line); |
| if match is None: |
| raise Exception("pbm.read(): " |
| "[line {}] missing identifier in โ{}โ; exit." |
| "".format(i, line)); |
| part, document = match.groups(); |
| id = "{:02d}{:04d}".format(int(part), int(document)); |
| elif header == 1: |
| if text is not None and id in text: sentence = text[id]; |
| else: sentence = line[5:-1]; |
| graph = Graph(id, flavor = 2, framework = "drg"); |
| graph.add_input(sentence); |
| header -= 1; |
| continue; |
| |
| |
| |
| |
| |
| |
| |
| anchor = None; |
| match = referent_matcher.match(line); |
| if match is not None: |
| box, referent, start, end = match.groups(); |
| if referent in scopes: |
| if strict and box not in scopes[referent] and reify: |
| raise Exception("pbm.read(): " |
| "[line {}] stray referent โ{}โ in box โ{}โ " |
| "(instead of โ{}โ); exit." |
| "".format(i, referent, box, scopes[referent])); |
| else: scopes[referent] = {box}; |
| if box not in mapping: mapping[box] = graph.add_node(type = 0); |
| if start is not None and end is not None: |
| anchor = {"from": int(start), "to": int(end)}; |
| if referent not in mapping: |
| mapping[referent] \ |
| = graph.add_node(anchors = [anchor] if anchor else None); |
| else: |
| node = mapping[referent]; |
| node.add_anchor(anchor); |
| graph.add_edge(mapping[box].id, mapping[referent].id, "โ"); |
| else: |
| match = condition_matcher.match(line); |
| if match is not None: |
| box, condition, source, target, start, end = match.groups(); |
| condition = conditions[condition]; |
| if source[0] == "\"" and source[-1] == "\"" and source not in mapping: |
| if start is not None and end is not None: |
| anchor = {"from": int(start), "to": int(end)}; |
| mapping[source] \ |
| = graph.add_node(label = source, |
| anchors = [anchor] if anchor else None); |
| elif source not in mapping: mapping[source] = graph.add_node(); |
| if target[0] == "\"" and target[-1] == "\"" and target not in mapping: |
| if start is not None and end is not None: |
| anchor = {"from": int(start), "to": int(end)}; |
| mapping[target] \ |
| = graph.add_node(label = target, |
| anchors = [anchor] if anchor else None); |
| elif target not in mapping: mapping[target] = graph.add_node(); |
| if reify: |
| if box not in mapping: mapping[box] = graph.add_node(type = 0); |
| node = graph.add_node(label = condition, type = 3); |
| finis.append((box, source, node)); |
| graph.add_edge(mapping[source].id, node.id, None); |
| graph.add_edge(node.id, mapping[target].id, None); |
| else: |
| if source in scopes: scopes[source].add(box); |
| else: scopes[source] = {box}; |
| graph.add_edge(mapping[source].id, mapping[target].id, condition); |
| else: |
| match = role_matcher.match(line); |
| if match is not None: |
| box, role, source, target, start, end = match.groups(); |
| if source not in mapping: mapping[source] = graph.add_node(); |
| if target[0] == "\"" and target[-1] == "\"" and target not in mapping: |
| if start is not None and end is not None: |
| anchor = {"from": int(start), "to": int(end)}; |
| mapping[target] \ |
| = graph.add_node(label = target, |
| anchors = [anchor] if anchor else None); |
| elif target not in mapping: mapping[target] = graph.add_node(); |
| if reify: |
| if box not in mapping: mapping[box] = graph.add_node(type = 0); |
| node = graph.add_node(label = role, type = 2); |
| finis.append((box, source, node)); |
| graph.add_edge(mapping[source].id, node.id, None); |
| graph.add_edge(node.id, mapping[target].id, None); |
| else: |
| if source in scopes: scopes[source].add(box); |
| else: scopes[source] = {box}; |
| graph.add_edge(mapping[source].id, mapping[target].id, role); |
| else: |
| match = concept_matcher.match(line); |
| if match is not None: |
| box, lemma, sense, referent, start, end = match.groups(); |
| if referent in scopes: |
| if strict and box not in scopes[referent] and reify: |
| raise Exception("pbm.read(): " |
| "[line {}] stray referent โ{}โ in box โ{}โ " |
| "(instead of โ{}โ); exit." |
| "".format(i, referent, box, scopes[referent])); |
| else: scopes[referent] = {box}; |
| if start is not None and end is not None: |
| anchor = {"from": int(start), "to": int(end)}; |
| if referent not in mapping: |
| mapping[referent] = node \ |
| = graph.add_node(anchors = [anchor] if anchor else None); |
| else: |
| node = mapping[referent]; |
| node.add_anchor(anchor); |
| if strict and node.label is not None: |
| raise Exception("pbm.read(): " |
| "[line {}] duplicate label โ{}โ on referent โ{}โ " |
| "(instead of โ{}โ); exit." |
| "".format(i, lemma, referent, node.label)); |
| node.label = lemma; |
| if sense[0] == sense[-1] == "\"": sense = sense[1:-1]; |
| node.set_property("sense", sense); |
| else: |
| match = discourse_matcher.match(line); |
| if match is not None: |
| top, relation, one, two = match.groups(); |
| if one not in mapping: mapping[one] = graph.add_node(type = 0); |
| if two is not None: |
| if trace > 1: print("ternary discourse relation"); |
| if two not in mapping: mapping[two] = graph.add_node(type = 0); |
| graph.add_edge(mapping[one].id, mapping[two].id, relation); |
| else: |
| if top not in mapping: mapping[top] = graph.add_node(type = 0); |
| graph.add_edge(mapping[top].id, mapping[one].id, relation); |
| elif empty_matcher.search(line) is None: |
| raise Exception("pmb.read(): [line {}] invalid clause โ{}โ." |
| "".format(i, line)); |
| |
| |
| |
| |
| |
| if graph is not None: |
| finish(graph, mapping, finis, scopes); |
| yield graph, None; |
| |
|
|