package org.maltparser.concurrent.graph; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.maltparser.concurrent.graph.dataformat.ColumnDescription; import org.maltparser.concurrent.graph.dataformat.DataFormat; import org.maltparser.core.exception.MaltChainedException; import org.maltparser.core.symbol.SymbolTable; import org.maltparser.core.syntaxgraph.DependencyStructure; import org.maltparser.core.syntaxgraph.node.DependencyNode; /** * Immutable and tread-safe dependency graph implementation. * * @author Johan Hall */ public final class ConcurrentDependencyGraph { private static final String TAB_SIGN = "\t"; private final DataFormat dataFormat; private final ConcurrentDependencyNode[] nodes; /** * Creates a copy of a dependency graph * * @param graph a dependency graph * @throws ConcurrentGraphException */ public ConcurrentDependencyGraph(ConcurrentDependencyGraph graph) throws ConcurrentGraphException { this.dataFormat = graph.dataFormat; this.nodes = new ConcurrentDependencyNode[graph.nodes.length+1]; for (int i = 0; i < graph.nodes.length; i++) { nodes[i] = new ConcurrentDependencyNode(this, (ConcurrentDependencyNode)graph.nodes[i]); } } /** * Creates a immutable dependency graph * * @param dataFormat a data format that describes the label types (or the columns in the input and output) * @param inputTokens a string array of tokens. Each label is separated by a tab-character and must follow the order in the data format * @throws ConcurrentGraphException */ public ConcurrentDependencyGraph(DataFormat dataFormat, String[] inputTokens) throws ConcurrentGraphException { this.dataFormat = dataFormat; this.nodes = new ConcurrentDependencyNode[inputTokens.length+1]; // Add nodes nodes[0] = new ConcurrentDependencyNode(this, 0, null); // ROOT for (int i = 0; i < inputTokens.length; i++) { String[] columns = inputTokens[i].split(TAB_SIGN); nodes[i+1] = new ConcurrentDependencyNode(this, i+1, columns); } // Check graph for (int i = 0; i < nodes.length; i++) { if (nodes[i].getHeadIndex() >= nodes.length) { throw new ConcurrentGraphException("Not allowed to add a head node that doesn't exists"); } } } /** * Creates a immutable dependency graph * * @param dataFormat a data format that describes the label types (or the columns in the input and output) * @param sourceGraph a dependency graph that implements the interface org.maltparser.core.syntaxgraph.DependencyStructure * @param defaultRootLabel the default root label * @throws MaltChainedException */ public ConcurrentDependencyGraph(DataFormat dataFormat, DependencyStructure sourceGraph, String defaultRootLabel) throws MaltChainedException { this.dataFormat = dataFormat; this.nodes = new ConcurrentDependencyNode[sourceGraph.nDependencyNode()]; // Add nodes nodes[0] = new ConcurrentDependencyNode(this, 0, null); // ROOT for (int index : sourceGraph.getTokenIndices()) { final DependencyNode gnode = sourceGraph.getDependencyNode(index); String[] columns = new String[dataFormat.numberOfColumns()]; for (int i = 0; i < dataFormat.numberOfColumns(); i++) { ColumnDescription column = dataFormat.getColumnDescription(i); if (!column.isInternal()) { if (column.getCategory() == ColumnDescription.INPUT) { columns[i] = gnode.getLabelSymbol(sourceGraph.getSymbolTables().getSymbolTable(column.getName())); } else if (column.getCategory() == ColumnDescription.HEAD) { if (gnode.hasHead()) { columns[i] = Integer.toString(gnode.getHeadEdge().getSource().getIndex()); } else { columns[i] = Integer.toString(-1); } } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { SymbolTable sourceTable = sourceGraph.getSymbolTables().getSymbolTable(column.getName()); if (gnode.getHeadEdge().hasLabel(sourceTable)) { columns[i] = gnode.getHeadEdge().getLabelSymbol(sourceTable); } else { columns[i] = defaultRootLabel; } } else { columns[i] = "_"; } } } nodes[index] = new ConcurrentDependencyNode(this, index, columns); } } protected ConcurrentDependencyGraph(DataFormat dataFormat, ConcurrentDependencyNode[] inputNodes) throws ConcurrentGraphException { this.dataFormat = dataFormat; this.nodes = new ConcurrentDependencyNode[inputNodes.length]; // Add nodes for (int i = 0; i < inputNodes.length; i++) { nodes[i] = inputNodes[i]; } // Check graph for (int i = 0; i < nodes.length; i++) { if (nodes[i].getHeadIndex() >= nodes.length) { throw new ConcurrentGraphException("Not allowed to add a head node that doesn't exists"); } } } /** * Returns the data format that describes the label types (or the columns in the input and output) * * @return the data format that describes the label types */ public DataFormat getDataFormat() { return dataFormat; } /** * Returns the root node * * @return the root node */ public ConcurrentDependencyNode getRoot() { return nodes[0]; } /** * Returns a dependency node specified by the node index * * @param nodeIndex the index of the node * @return a dependency node specified by the node index */ // public ConcurrentDependencyNode getNode(int nodeIndex) { // if (nodeIndex < 0 || nodeIndex >= nodes.length) { // return null; // } // return nodes[nodeIndex]; // } /** * Returns a dependency node specified by the node index. Index 0 equals the root node * * @param index the index of the node * @return a dependency node specified by the node index, if out of range null is returned. */ public ConcurrentDependencyNode getDependencyNode(int index) { if (index < 0 || index >= nodes.length) { return null; } return nodes[index]; } /** * Returns a dependency node specified by the node index. If index is equals to 0 (the root node) then null will be returned because this node * is not a token node. * * @param index the index of the node * @return a dependency node specified by the node index, if out of range and root node then null is returned. */ public ConcurrentDependencyNode getTokenNode(int index) { if (index <= 0 || index >= nodes.length) { return null; } return nodes[index]; } /** * Returns the number of dependency nodes (including the root node) * * @return the number of dependency nodes */ public int nDependencyNodes() { return nodes.length; } /** * Returns the number of token nodes in the dependency graph (Number of dependency nodes - the root node). * * @return the number of token nodes in the dependency graph. */ public int nTokenNodes() { return nodes.length - 1; } /** * Returns the index of the last dependency node. * * @return the index of the last dependency node. */ public int getHighestDependencyNodeIndex() { return nodes.length - 1; } /** * Returns the index of the last token node. * * @return the index of the last token node. If there are no token nodes then -1 is returned. */ public int getHighestTokenIndex() { if (nodes.length == 1) { return - 1; } return nodes.length - 1; } /** * Returns true if the dependency graph has any token nodes, otherwise false. * * @return true if the dependency graph has any token nodes, otherwise false. */ public boolean hasTokens() { return nodes.length > 1; } /** * Returns the number of edges * * @return the number of edges */ public int nEdges() { int n = 0; for (int i = 1; i < nodes.length; i++) { if (nodes[i].hasHead()) { n++; } } return n; } /** * Returns a sorted set of edges. If no edges are found an empty set is returned * * @return a sorted set of edges. * @throws ConcurrentGraphException */ public SortedSet getEdges() throws ConcurrentGraphException { SortedSet edges = Collections.synchronizedSortedSet(new TreeSet()); for (int i = 1; i < nodes.length; i++) { ConcurrentDependencyEdge edge = nodes[i].getHeadEdge(); if (edge != null) { edges.add(edge); } } return edges; } /** * Returns a sorted set of integers {0,s,..n} , where each index i identifies a dependency node. Index 0 * should always be the root dependency node and index s is the first terminal node and index n is the * last terminal node. * * @return a sorted set of integers */ public SortedSet getDependencyIndices() { SortedSet indices = Collections.synchronizedSortedSet(new TreeSet()); for (int i = 0; i < nodes.length; i++) { indices.add(i); } return indices; } /** * Returns a sorted set of integers {s,...,n}, where each index i identifies a token node. Index s * is the first token node and index n is the last token node. * * @return a sorted set of integers {s,...,n}. If there are no token nodes then an empty set is returned. */ public SortedSet getTokenIndices() { SortedSet indices = Collections.synchronizedSortedSet(new TreeSet()); for (int i = 1; i < nodes.length; i++) { indices.add(i); } return indices; } /** * Returns true if the head edge of the dependency node with index is labeled, otherwise false. * * @param index the index of the dependency node * @return true if the head edge of the dependency node with index is labeled, otherwise false. */ public boolean hasLabeledDependency(int index) { if (index < 0 || index >= nodes.length) { return false; } if (!nodes[index].hasHead()) { return false; } return nodes[index].isHeadLabeled(); } /** * Returns true if all nodes in the dependency structure are connected, otherwise false. * * @return true if all nodes in the dependency structure are connected, otherwise false. */ public boolean isConnected() { int[] components = findComponents(); int tmp = components[0]; for (int i = 1; i < components.length; i++) { if (tmp != components[i]) { return false; } } return true; } /** * Returns true if all edges in the dependency structure are projective, otherwise false. * * @return true if all edges in the dependency structure are projective, otherwise false. */ public boolean isProjective() { for (int i = 1; i < nodes.length; i++) { if (!nodes[i].isProjective()) { return false; } } return true; } /** * Returns true if all dependency nodes have at most one incoming edge, otherwise false. * * Note: In this implementation this will always be true * * @return true if all dependency nodes have at most one incoming edge, otherwise false. */ public boolean isSingleHeaded() { return true; } /** * Returns true if the dependency structure are a tree (isConnected() && isSingleHeaded()), otherwise false. * * @return true if the dependency structure are a tree (isConnected() && isSingleHeaded()), otherwise false. */ public boolean isTree() { return isConnected() && isSingleHeaded(); } /** * Returns the number of non-projective edges in the dependency structure. * * @return the number of non-projective edges in the dependency structure. */ public int nNonProjectiveEdges() { int c = 0; for (int i = 1; i < nodes.length; i++) { if (!nodes[i].isProjective()) { c++; } } return c; } protected boolean hasDependent(int nodeIndex) { for (int i = 1; i < nodes.length; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { return true; } } return false; } protected boolean hasLeftDependent(int nodeIndex) { for (int i = 1; i < nodeIndex; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { return true; } } return false; } protected boolean hasRightDependent(int nodeIndex) { for (int i = nodeIndex + 1; i < nodes.length; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { return true; } } return false; } protected List getListOfLeftDependents(int nodeIndex) { List leftDependents = Collections.synchronizedList(new ArrayList()); for (int i = 1; i < nodeIndex; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { leftDependents.add(nodes[i]); } } return leftDependents; } protected SortedSet getSortedSetOfLeftDependents(int nodeIndex) { SortedSet leftDependents = Collections.synchronizedSortedSet(new TreeSet()); for (int i = 1; i < nodeIndex; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { leftDependents.add(nodes[i]); } } return leftDependents; } protected List getListOfRightDependents(int nodeIndex) { List rightDependents = Collections.synchronizedList(new ArrayList()); for (int i = nodeIndex + 1; i < nodes.length; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { rightDependents.add(nodes[i]); } } return rightDependents; } protected SortedSet getSortedSetOfRightDependents(int nodeIndex) { SortedSet rightDependents = Collections.synchronizedSortedSet(new TreeSet()); for (int i = nodeIndex + 1; i < nodes.length; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { rightDependents.add(nodes[i]); } } return rightDependents; } protected List getListOfDependents(int nodeIndex) { List dependents = Collections.synchronizedList(new ArrayList()); for (int i = 1; i < nodes.length; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { dependents.add(nodes[i]); } } return dependents; } protected SortedSet getSortedSetOfDependents(int nodeIndex) { SortedSet dependents = Collections.synchronizedSortedSet(new TreeSet()); for (int i = 1; i < nodes.length; i++) { if (nodeIndex == nodes[i].getHeadIndex()) { dependents.add(nodes[i]); } } return dependents; } protected int getRank(int nodeIndex) { int[] components = new int[nodes.length]; int[] ranks = new int[nodes.length]; for (int i = 0; i < components.length; i++) { components[i] = i; ranks[i] = 0; } for (int i = 1; i < nodes.length; i++) { if (nodes[i].hasHead()) { int hcIndex = findComponent(nodes[i].getHead().getIndex(), components); int dcIndex = findComponent(nodes[i].getIndex(), components); if (hcIndex != dcIndex) { link(hcIndex, dcIndex, components, ranks); } } } return ranks[nodeIndex]; } protected ConcurrentDependencyNode findComponent(int nodeIndex) { int[] components = new int[nodes.length]; int[] ranks = new int[nodes.length]; for (int i = 0; i < components.length; i++) { components[i] = i; ranks[i] = 0; } for (int i = 1; i < nodes.length; i++) { if (nodes[i].hasHead()) { int hcIndex = findComponent(nodes[i].getHead().getIndex(), components); int dcIndex = findComponent(nodes[i].getIndex(), components); if (hcIndex != dcIndex) { link(hcIndex, dcIndex, components, ranks); } } } return nodes[findComponent(nodeIndex, components)]; } private int[] findComponents() { int[] components = new int[nodes.length]; int[] ranks = new int[nodes.length]; for (int i = 0; i < components.length; i++) { components[i] = i; ranks[i] = 0; } for (int i = 1; i < nodes.length; i++) { if (nodes[i].hasHead()) { int hcIndex = findComponent(nodes[i].getHead().getIndex(), components); int dcIndex = findComponent(nodes[i].getIndex(), components); if (hcIndex != dcIndex) { link(hcIndex, dcIndex, components, ranks); } } } return components; } private int findComponent(int xIndex, int[] components) { if (xIndex != components[xIndex]) { components[xIndex] = findComponent(components[xIndex], components); } return components[xIndex]; } private int link(int xIndex, int yIndex, int[] components, int[] ranks) { if (ranks[xIndex] > ranks[yIndex]) { components[yIndex] = xIndex; } else { components[xIndex] = yIndex; if (ranks[xIndex] == ranks[yIndex]) { ranks[yIndex]++; } return yIndex; } return xIndex; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((dataFormat == null) ? 0 : dataFormat.hashCode()); result = prime * result + Arrays.hashCode(nodes); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ConcurrentDependencyGraph other = (ConcurrentDependencyGraph) obj; if (dataFormat == null) { if (other.dataFormat != null) return false; } else if (!dataFormat.equals(other.dataFormat)) return false; if (!Arrays.equals(nodes, other.nodes)) return false; return true; } public String toString() { final StringBuilder sb = new StringBuilder(); for (int i = 1; i < nodes.length; i++) { sb.append(nodes[i]); sb.append('\n'); } return sb.toString(); } }