| | """Describes the foundational level elements (layer 1) of the UCCA annotation. |
| | |
| | Layer 1 is the foundational layer of UCCA, whose Nodes and Edges represent |
| | scene objects and relations. The basic building blocks of this layer are |
| | the FNode, which is a participant in a scene relation (including the |
| | relation itself), and the various Edges between these Nodes, which represent |
| | the type of relation between the Nodes. |
| | |
| | """ |
| |
|
| | import itertools |
| | import operator |
| |
|
| | from ucca import core, layer0 |
| |
|
| | LAYER_ID = '1' |
| |
|
| |
|
| | class NodeTags: |
| | """Layer 1 Node tags.""" |
| | Foundational = 'FN' |
| | Linkage = 'LKG' |
| | Punctuation = 'PNCT' |
| | __init__ = None |
| |
|
| |
|
| | class EdgeTags: |
| | """Layer 1 Edge tags.""" |
| | Unanalyzable = 'UNA' |
| | Uncertain = 'UNC' |
| | ParallelScene = 'H' |
| | Participant = 'A' |
| | Process = 'P' |
| | State = 'S' |
| | Adverbial = 'D' |
| | Ground = 'G' |
| | Center = 'C' |
| | Elaborator = 'E' |
| | Function = 'F' |
| | Connector = 'N' |
| | Relator = 'R' |
| | Time = 'T' |
| | Quantifier = 'Q' |
| | Linker = 'L' |
| | Punctuation = 'U' |
| | LinkRelation = 'LR' |
| | LinkArgument = 'LA' |
| | Terminal = 'Terminal' |
| | __init__ = None |
| |
|
| |
|
| | |
| | ATTRIB_KEYS = ('remote', 'implicit', 'uncertain', 'suggest') |
| |
|
| |
|
| | class MissingRelationError(core.UCCAError): |
| | """Exception raised when a required edge is not present.""" |
| | pass |
| |
|
| |
|
| | def _single_child_by_tag(node, tag, must=True): |
| | """Returns the Node which is connected with an Edge with the given tag. |
| | |
| | Assumes that there is only one Node connected with an Edge with this tag. |
| | |
| | Args: |
| | node: the Node which is the parent of the Edge (and returned Node). |
| | tag: the tag of the Edge to look for. |
| | must: if set to True (default), if no Node is found, raise an |
| | exception. Otherwise, returns None if not found. |
| | |
| | Returns: |
| | The connected Node, or None if not found |
| | |
| | Raises: |
| | MissingRelationError if Node not found and must is set to True |
| | |
| | """ |
| | for edge in node: |
| | if tag in edge.tags: |
| | return edge.child |
| | if must: |
| | raise MissingRelationError(node.ID, tag) |
| | return None |
| |
|
| |
|
| | def _multiple_children_by_tag(node, tag): |
| | """Returns the Nodes which are connected with an Edge with the given tag. |
| | |
| | Args: |
| | node: the Node which is the parent of the Edge (and returned Nodes). |
| | tag: the tag of the Edges to look for. |
| | |
| | Returns: |
| | A list of connected Nodes, can be empty |
| | |
| | """ |
| | return [edge.child for edge in node if tag in edge.tags] |
| |
|
| |
|
| | class Linkage(core.Node): |
| | """A Linkage between parallel scenes. |
| | |
| | A Linkage object represents a connection between two parallel scenes. |
| | The semantic type of the link is not determined in this object, but the |
| | :class:`FoundationalNode` of linkage is referred as the link relation, |
| | and the linked scenes are referred to as the arguments. |
| | |
| | Most cases will have two arguments, but some constructions have 1 or 3+ |
| | arguments, depending on the semantic connection. |
| | |
| | Attributes: |
| | relation: FoundationalNode of the relation words. |
| | arguments: list of FoundationalNodes of the relation participants. |
| | |
| | """ |
| |
|
| | @property |
| | def relation(self): |
| | return _single_child_by_tag(self, EdgeTags.LinkRelation) |
| |
|
| | @property |
| | def arguments(self): |
| | return _multiple_children_by_tag(self, EdgeTags.LinkArgument) |
| |
|
| | def __str__(self): |
| | return "{}-->{}".format(str(self.relation.ID), |
| | ','.join(x.ID for x in self.arguments)) |
| |
|
| |
|
| | class FoundationalNode(core.Node): |
| | """The basic building block of UCCA annotation, represents semantic units. |
| | |
| | Each FoundationalNode (FNode for short) represents a semantic unit in the |
| | text, with relations to other semantic units. In essence, the FNodes form |
| | a tree of annotation, when remote units are ignored. This means that each |
| | FNode has exactly one FNode parent, and for completeness, there is also |
| | a "Passage Head" FNode which is the FNode parent of all parallel scenes and |
| | linkers in the top-level of the annotation. |
| | |
| | Remote units are FNodes which are shared between two or more different |
| | FNodes, and hence have two FNode parents (participate in two relations). |
| | In such cases there is only one FNode parent, as the other Edges to parents |
| | are marked with the 'remote' attribute (set to True). |
| | |
| | Implicit Nodes are ones which aren't mentioned in the text, and hence |
| | doesn't have any Terminal units in their span. In such cases, they will |
| | have an 'implicit' attribute set to True, and will take the position -1 |
| | (both start and end positions). |
| | |
| | Attributes: |
| | participants: |
| | adverbials: |
| | connector: |
| | grounds: |
| | elaborators: |
| | centers: |
| | linkers: |
| | parallel_scenes: |
| | functions: |
| | punctuation: |
| | terminals: |
| | a list of all FNodes under self whose edge tag is one of |
| | these types. |
| | process: |
| | state: |
| | time: |
| | relator: |
| | Returns the FNode under self whose edge tag is one of these types, |
| | or None in case it isn't found. |
| | start_position: |
| | end_position: |
| | start/end position of the first/last terminal in the span of |
| | the FNode, without counting in remote FNodes. If the FNode is |
| | implicit or have no Terminals for some reason, returns -1 (both). |
| | fparent: the FNode parent (FNode with incoming Edge, not remote) of |
| | this FNode. There is exactly one for each FNode except the Passage |
| | head, which returns None. |
| | ftag: the tag of the Edge connecting the fparent (as described above) |
| | with this FNode |
| | discontiguous: whether this FNode has continuous Terminals or not |
| | |
| | """ |
| |
|
| | @property |
| | def participants(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Participant) |
| |
|
| | @property |
| | def adverbials(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Adverbial) |
| |
|
| | @property |
| | def times(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Time) |
| |
|
| | @property |
| | def quantifiers(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Quantifier) |
| |
|
| | @property |
| | def grounds(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Ground) |
| |
|
| | @property |
| | def centers(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Center) |
| |
|
| | @property |
| | def elaborators(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Elaborator) |
| |
|
| | @property |
| | def linkers(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Linker) |
| |
|
| | @property |
| | def parallel_scenes(self): |
| | return _multiple_children_by_tag(self, EdgeTags.ParallelScene) |
| |
|
| | @property |
| | def functions(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Function) |
| |
|
| | @property |
| | def punctuation(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Punctuation) |
| |
|
| | @property |
| | def terminals(self): |
| | return _multiple_children_by_tag(self, EdgeTags.Terminal) |
| |
|
| | @property |
| | def process(self): |
| | return _single_child_by_tag(self, EdgeTags.Process, False) |
| |
|
| | @property |
| | def state(self): |
| | return _single_child_by_tag(self, EdgeTags.State, False) |
| |
|
| | @property |
| | def connector(self): |
| | return _single_child_by_tag(self, EdgeTags.Connector, False) |
| |
|
| | @property |
| | def relator(self): |
| | return _single_child_by_tag(self, EdgeTags.Relator, False) |
| |
|
| | def _fedge(self): |
| | """Returns the Edge of the fparent, or None.""" |
| | for edge in self.incoming: |
| | if (edge.parent.layer.ID == LAYER_ID and |
| | edge.parent.tag == NodeTags.Foundational and |
| | not edge.attrib.get('remote')): |
| | return edge |
| | return None |
| |
|
| | @property |
| | def fparent(self): |
| | edge = self._fedge() |
| | return edge.parent if edge else None |
| |
|
| | @property |
| | def ftag(self): |
| | edge = self._fedge() |
| | return edge.tag if edge else None |
| |
|
| | @property |
| | def ftags(self): |
| | edge = self._fedge() |
| | return edge.tags if edge else None |
| |
|
| | def get_terminals(self, punct=True, remotes=False, visited=None): |
| | """Returns a list of all terminals under the span of this FoundationalNode. |
| | :param punct: whether to include punctuation Terminals, defaults to True |
| | :param remotes: whether to include Terminals from remote FoundationalNodes, defaults to false |
| | :param visited: used to detect cycles |
| | :return: a list of :class:`layer0`.Terminal objects |
| | """ |
| | if visited is None: |
| | return sorted(self.get_terminals(punct=punct, remotes=remotes, visited=set()), |
| | key=operator.attrgetter("position")) |
| | outgoing = {e for e in set(self) - visited if remotes or not e.attrib.get("remote")} |
| | return [t for e in outgoing for t in e.child.get_terminals( |
| | punct=punct, remotes=remotes, visited=visited | outgoing)] |
| |
|
| | @property |
| | def start_position(self): |
| | try: |
| | return self.get_terminals()[0].position |
| | except IndexError: |
| | return -1 |
| |
|
| | @property |
| | def end_position(self): |
| | try: |
| | return self.get_terminals()[-1].position |
| | except IndexError: |
| | return -1 |
| |
|
| | @property |
| | def discontiguous(self): |
| | terms = self.get_terminals() |
| | return any(terms[i].position + 1 != terms[i + 1].position |
| | for i in range(len(terms) - 1)) |
| |
|
| | def get_sequences(self): |
| | if self.attrib.get('implicit'): |
| | return [] |
| | pos = sorted([x.position for x in self.get_terminals()]) |
| |
|
| | |
| | seq_closers = [pos[i] for i in range(len(pos) - 1) |
| | if pos[i] + 1 < pos[i + 1]] + [pos[-1]] |
| |
|
| | |
| | seq_openers = [pos[0]] + [pos[i] for i in range(1, len(pos)) |
| | if pos[i - 1] < pos[i] - 1] |
| | return [(op, cl) for op, cl in zip(seq_openers, seq_closers)] |
| |
|
| | def to_text(self): |
| | """Returns the text in the span of self, separated by spaces.""" |
| | return ' '.join(t.text for t in self.get_terminals()) |
| |
|
| | def is_scene(self): |
| | return self.state is not None or self.process is not None |
| |
|
| | def __str__(self): |
| | def start(e): |
| | return e.child.position if e.child.layer.ID == layer0.LAYER_ID else e.child.start_position |
| |
|
| | sorted_edges = sorted(self, key=start) |
| | output = [] |
| | for edge, next_edge in zip(sorted_edges, sorted_edges[1:] + [None]): |
| | node = edge.child |
| | remote = edge.attrib.get('remote') |
| | end = node.position if node.layer.ID == layer0.LAYER_ID else node.end_position |
| | if edge.tag == EdgeTags.Terminal: |
| | output.append(str(node)) |
| | if end != self.end_position: |
| | output.append(" ") |
| | else: |
| | edge_tags = "|".join(edge.tags) |
| | if remote: |
| | edge_tags += '*' |
| | if edge.attrib.get('uncertain'): |
| | edge_tags += '?' |
| | if start(edge) == -1: |
| | output.append("[{} IMPLICIT] ".format(edge_tags)) |
| | else: |
| | output.append("[{} {}] ".format(edge_tags, str(node))) |
| | if start(edge) != -1 and not remote and next_edge is not None and end + 1 < start(next_edge): |
| | output.append("... ") |
| | return "".join(output) |
| |
|
| | def get_top_scene(self): |
| | """Returns the top-level scene this FNode is within, or None""" |
| | if self in self.layer.top_scenes: |
| | return self |
| | elif self.fparent is None: |
| | return None |
| | else: |
| | return self.fparent.get_top_scene() |
| |
|
| |
|
| | class PunctNode(FoundationalNode): |
| | """Encapsulates punctuation :class:`layer0`.Terminal objects. |
| | |
| | Attributes: |
| | terminals: return the :class:`layer0`.Terminal objects encapsulated |
| | by this Node in a list (at least one, usually not more than 1). |
| | start_position: |
| | end_position: |
| | start/end position of the first/last terminal in the span of |
| | the PunctNode. |
| | |
| | """ |
| |
|
| | def add(self, edge_tag, node, *, edge_attrib=None): |
| | if node.layer.ID != layer0.LAYER_ID: |
| | raise ValueError("Non-terminal child (%s) for %s node (%s)" % (node.ID, NodeTags.Punctuation, self.ID)) |
| | if not layer0.is_punct(node): |
| | node.tag = layer0.NodeTags.Punct |
| | |
| | super().add(edge_tag, node, edge_attrib=None) |
| |
|
| | @property |
| | def terminals(self): |
| | return self.children |
| |
|
| | def get_terminals(self, punct=True, *args, **kwargs): |
| | """Returns a list of all terminals under the span of this PunctNode. |
| | |
| | :param punct: whether to include punctuation Terminals, defaults to True |
| | |
| | :return: a list of :class:`layer0`.Terminal objects |
| | |
| | """ |
| | return self.children if punct else () |
| |
|
| | def __str__(self): |
| | return self.to_text() |
| |
|
| |
|
| | class Layer1(core.Layer): |
| | """ |
| | |
| | """ |
| |
|
| | def __init__(self, root, attrib=None, *, orderkey=core.id_orderkey): |
| | super().__init__(ID=LAYER_ID, root=root, attrib=attrib, |
| | orderkey=orderkey) |
| | self._scenes = [] |
| | self._linkages = [] |
| | self._head_fnode = FoundationalNode(root=root, |
| | tag=NodeTags.Foundational, |
| | ID=self.next_id()) |
| | self._all = [self._head_fnode] |
| | self._heads = [self._head_fnode] |
| |
|
| | @property |
| | def top_scenes(self): |
| | return self._scenes[:] |
| |
|
| | @property |
| | def top_linkages(self): |
| | return self._linkages[:] |
| |
|
| | def next_id(self): |
| | """Returns the next available ID string for this layer.""" |
| | for n in itertools.count(start=len(self._all) + 1): |
| | id_str = "{}{}{}".format(LAYER_ID, core.Node.ID_SEPARATOR, n) |
| | try: |
| | self._root.by_id(id_str) |
| | except KeyError: |
| | return id_str |
| |
|
| | def add_fnode_multiple(self, parent, edge_categories, *, implicit=False, edge_attrib=None): |
| | """Adds a new :class:`FNode` whose parent and Edge tag are given. |
| | |
| | :param parent: the FNode which will be the parent of the new FNode. |
| | If the parent is None, adds under the layer head FNode. |
| | :param edge_categories: list of categories on the Edge between the parent and the new FNode. |
| | :param implicit: whether to set the new FNode as implicit (default False) |
| | :param edge_attrib: Keyword only, dictionary of attributes to be passed |
| | to the Edge initializer. |
| | |
| | :return: the newly created FNode |
| | |
| | :raise core.FrozenPassageError if the Passage is frozen |
| | """ |
| | if parent is None: |
| | parent = self._head_fnode |
| | node_attrib = {'implicit': True} if implicit else {} |
| | fnode = FoundationalNode(root=self.root, tag=NodeTags.Foundational, |
| | ID=self.next_id(), attrib=node_attrib) |
| | if edge_categories: |
| | parent.add_multiple(edge_categories, fnode, edge_attrib=edge_attrib) |
| | return fnode |
| |
|
| | def add_fnode(self, parent, tag, *, implicit=False): |
| | return self.add_fnode_multiple(parent, [(tag,)], implicit=implicit) |
| |
|
| | def add_remote_multiple(self, parent, edge_categories, child, edge_attrib=None): |
| | """Adds a new :class:`core`.Edge with remote attribute between the nodes. |
| | |
| | :param parent: the parent of the remote Edge |
| | :param edge_categories: list of categories of the Edge |
| | :param child: the child of the remote Edge |
| | :param edge_attrib: Keyword only, dictionary of attributes to be passed |
| | to the Edge initializer. |
| | |
| | :raise core.FrozenPassageError if the Passage is frozen |
| | """ |
| | if edge_attrib is None: |
| | edge_attrib = {} |
| | edge_attrib["remote"] = True |
| | return parent.add_multiple(edge_categories, child, edge_attrib=edge_attrib) |
| |
|
| | def add_remote(self, parent, tag, child): |
| | return self.add_remote_multiple(parent, [(tag,)], child) |
| |
|
| | def add_punct(self, parent, terminal, layer=None, slot=None, edge_attrib=None): |
| | """Adds a PunctNode as the child of parent and the Terminal under it. |
| | |
| | :param parent: the parent of the newly created PunctNode. If None, adds |
| | under rhe layer head FNode. |
| | :param terminal: the punctuation Terminal we want to put under parent. |
| | :param edge_attrib: Keyword only, dictionary of attributes to be passed |
| | to the Edge initializer. |
| | |
| | :return: the newly create PunctNode. |
| | |
| | :raise core.FrozenPassageError if the Passage is frozen. |
| | |
| | """ |
| | if parent is None: |
| | parent = self._head_fnode |
| | punct_node = PunctNode(root=self.root, tag=NodeTags.Punctuation, |
| | ID=self.next_id()) |
| | parent.add_multiple([(EdgeTags.Punctuation, slot, layer)], punct_node, edge_attrib=edge_attrib) |
| | punct_node.add_multiple([(EdgeTags.Terminal, slot, layer)], terminal) |
| | return punct_node |
| |
|
| | def add_linkage(self, relation, *args): |
| | """Adds a Linkage between the link relation and the linked arguments. |
| | |
| | Linkage objects are all heads and have no parents. |
| | |
| | :param relation: the link relation FNode. |
| | :param args: any number (at least 1) of linkage arguments FNodes. |
| | |
| | :return: the newly created Linkage |
| | |
| | :raise core.FrozenPassageError if the Passage is frozen. |
| | |
| | """ |
| | linkage = Linkage(root=self.root, tag=NodeTags.Linkage, |
| | ID=self.next_id()) |
| | linkage.add(EdgeTags.LinkRelation, relation) |
| | for arg in args: |
| | linkage.add(EdgeTags.LinkArgument, arg) |
| | return linkage |
| |
|
| | def _check_top_scene(self, node): |
| | """Checks whether a node is a scene, and a top-level one. |
| | |
| | A top level scene is one which is not embedded in any other scene. |
| | |
| | :param node: the FNode to check. |
| | |
| | :return: True iff node is a top-level scenes. |
| | |
| | """ |
| | if not node.is_scene(): |
| | return False |
| | while node.fparent not in (None, self._head_fnode): |
| | node = node.fparent |
| | if node.is_scene(): |
| | return False |
| | return True |
| |
|
| | def _update_top_scene(self, node): |
| | """Adds/removes the node if it's a top-level scene.""" |
| | if node.tag != NodeTags.Foundational: |
| | return |
| | if node in self._scenes and not self._check_top_scene(node): |
| | self._scenes.remove(node) |
| | elif node not in self._scenes and self._check_top_scene(node): |
| | self._scenes.append(node) |
| | |
| | for ts in self._scenes[:-1]: |
| | if not self._check_top_scene(ts): |
| | self._scenes.remove(ts) |
| | self._scenes.sort(key=self.orderkey) |
| |
|
| | def _update_top_linkage(self, linkage): |
| | """Adds/removes the linkage if it's a top level linkage.""" |
| | if all(fnode in self._scenes for fnode in linkage.arguments): |
| | if linkage not in self._linkages: |
| | self._linkages.append(linkage) |
| | self._linkages.sort(key=self.orderkey) |
| | elif linkage in self._linkages: |
| | self._linkages.remove(linkage) |
| |
|
| | def _update_edge(self, edge): |
| | """Adds the Edge to the Layer, and updates top scenes and linkers.""" |
| | self._update_top_scene(edge.parent) |
| | self._update_top_scene(edge.child) |
| | for lkg in [x for x in edge.parent.parents |
| | if x.tag == NodeTags.Linkage]: |
| | self._update_top_linkage(lkg) |
| | for lkg in [x for x in edge.child.parents |
| | if x.tag == NodeTags.Linkage]: |
| | self._update_top_linkage(lkg) |
| |
|
| | def _add_edge(self, edge): |
| | super()._add_edge(edge) |
| | self._update_edge(edge) |
| |
|
| | def _remove_edge(self, edge): |
| | super()._remove_edge(edge) |
| | self._update_edge(edge) |
| |
|
| | def _change_edge_tag(self, edge, old_tag): |
| | super()._change_edge_tag(edge, old_tag) |
| | self._update_edge(edge) |
| |
|