| import json |
| import sys |
|
|
| from graph import Graph |
|
|
|
|
| def read(fp, text=None, node_centric=False): |
| def anchor(node): |
| anchors = list() |
| for string in node[1]: |
| string = string.split(":") |
| anchors.append({"from": int(string[0]), "to": int(string[1])}) |
| return anchors |
|
|
| for native in json.load(fp): |
| map = dict() |
| try: |
| graph = Graph(native["sent_id"], flavor=1, framework="norec") |
| graph.add_input(native["text"]) |
|
|
| if not node_centric: |
| top = graph.add_node(top=True) |
|
|
| for opinion in native["opinions"]: |
| expression = opinion["Polar_expression"] |
| properties, values = ["Intensity"], [opinion["Intensity"]] |
|
|
| if node_centric: |
| expression = graph.add_node( |
| label=opinion["Polarity"], |
| top=True, |
| properties=properties, |
| values=values, |
| anchors=anchor(expression), |
| ) |
| else: |
| expression = graph.add_node( |
| properties=properties, |
| values=values, |
| anchors=anchor(expression), |
| ) |
| key = tuple(opinion["Polar_expression"][1]) |
| if key in map: |
| print("we got double expression here", native["sent_id"]) |
| map[key] = expression |
|
|
| graph.add_edge(top.id, expression.id, opinion["Polarity"]) |
|
|
| source = opinion["Source"] |
| if len(source[1]): |
| key = tuple(source[1]) |
| if key in map: |
| source = map[key] |
| else: |
| source = graph.add_node( |
| label="Source" if node_centric else None, |
| anchors=anchor(source), |
| ) |
| map[key] = source |
| graph.add_edge(expression.id, source.id, None if node_centric else "Source") |
|
|
| target = opinion["Target"] |
| if len(target[1]): |
| key = tuple(target[1]) |
| if key in map: |
| target = map[key] |
| else: |
| target = graph.add_node( |
| label="Target" if node_centric else None, |
| anchors=anchor(target), |
| ) |
| map[key] = target |
| graph.add_edge(expression.id, target.id, None if node_centric else "Target") |
|
|
| yield graph, None |
|
|
| except Exception as error: |
| print( |
| f"codec.norec.read(): ignoring {native}: {error}", |
| file=sys.stderr, |
| ) |
|
|
|
|
| def get_text_span(node, text): |
| anchored_text = [text[anchor['from']:anchor['to']] for anchor in node.anchors] |
| anchors = [f"{anchor['from']}:{anchor['to']}" for anchor in node.anchors] |
| return anchored_text, anchors |
|
|
|
|
| def write(graph, input, node_centric=False): |
| try: |
| if node_centric: |
| return write_node_centric(graph, input) |
| return write_labeled_edge(graph, input) |
|
|
| except Exception as error: |
| print(f"Problem with decoding sentence {graph.id}") |
| raise error |
|
|
|
|
| def write_node_centric(graph, input): |
| nodes = {node.id: node for node in graph.nodes} |
|
|
| |
| opinions = {} |
| for node in graph.nodes: |
| if node.label in ["Source", "Target"]: |
| continue |
| opinions[node.id] = { |
| "Source": [[], []], |
| "Target": [[], []], |
| "Polar_expression": [*get_text_span(node, input)], |
| "Polarity": node.label, |
| } |
| if node.properties is not None and len(node.properties) > 0: |
| for key, value in zip(node.properties, node.values): |
| opinions[node.id][key] = value |
|
|
| |
| for edge in graph.edges: |
| if edge.src not in opinions: |
| continue |
|
|
| target_node = nodes[edge.tgt] |
| if target_node.label not in ["Source", "Target"]: |
| continue |
|
|
| anchored_text, anchors = get_text_span(target_node, input) |
| opinions[edge.src][target_node.label][0] += anchored_text |
| opinions[edge.src][target_node.label][1] += anchors |
|
|
| sentence = { |
| "sent_id": graph.id, |
| "text": input, |
| "opinions": list(opinions.values()), |
| } |
| return sentence |
|
|
|
|
| def write_labeled_edge(graph, input): |
| nodes = {node.id: node for node in graph.nodes} |
|
|
| |
| opinions = {} |
| for edge in graph.edges: |
| if edge.lab in ["Source", "Target"]: |
| continue |
|
|
| node = nodes[edge.tgt] |
| opinions[node.id] = { |
| "Source": [[], []], |
| "Target": [[], []], |
| "Polar_expression": [*get_text_span(node, input)], |
| "Polarity": edge.lab, |
| } |
|
|
| |
| for edge in graph.edges: |
| if edge.lab not in ["Source", "Target"]: |
| continue |
| if edge.src not in opinions: |
| continue |
|
|
| node = nodes[edge.tgt] |
| anchored_text, anchors = get_text_span(node, input) |
|
|
| opinions[edge.src][edge.lab][0] += anchored_text |
| opinions[edge.src][edge.lab][1] += anchors |
|
|
| sentence = { |
| "sent_id": graph.id, |
| "text": input, |
| "opinions": list(opinions.values()), |
| } |
| return sentence |
|
|