| package org.maltparser.concurrent.graph; |
|
|
| import java.io.BufferedReader; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.util.Formatter; |
| import java.util.SortedSet; |
|
|
| import org.maltparser.concurrent.ConcurrentUtils; |
| import org.maltparser.concurrent.graph.dataformat.ColumnDescription; |
| import org.maltparser.concurrent.graph.dataformat.DataFormat; |
| import org.maltparser.core.exception.MaltChainedException; |
| import org.maltparser.core.symbol.hash.HashSymbolTableHandler; |
| import org.maltparser.core.syntaxgraph.DependencyStructure; |
| import org.maltparser.core.syntaxgraph.edge.Edge; |
| import org.maltparser.core.syntaxgraph.node.DependencyNode; |
|
|
| |
| |
| |
| |
| public class Test { |
| private static final String IGNORE_COLUMN_SIGN = "_"; |
| public static DependencyStructure getOldDependencyGraph(DataFormat dataFormat, String[] tokens) throws MaltChainedException { |
| DependencyStructure oldGraph = new org.maltparser.core.syntaxgraph.DependencyGraph(new HashSymbolTableHandler()); |
| for (int i = 0; i < tokens.length; i++) { |
| oldGraph.addDependencyNode(i+1); |
| } |
| for (int i = 0; i < tokens.length; i++) { |
| DependencyNode node = oldGraph.getDependencyNode(i+1); |
| String[] items = tokens[i].split("\t"); |
| Edge edge = null; |
| for (int j = 0; j < items.length; j++) { |
| ColumnDescription column = dataFormat.getColumnDescription(j); |
|
|
| if (column.getCategory() == ColumnDescription.INPUT && node != null) { |
| oldGraph.addLabel(node, column.getName(), items[j]); |
| } else if (column.getCategory() == ColumnDescription.HEAD) { |
| if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals(IGNORE_COLUMN_SIGN)) { |
| edge = oldGraph.addDependencyEdge(Integer.parseInt(items[j]), i+1); |
| } |
| } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { |
| oldGraph.addLabel(edge, column.getName(), items[j]); |
| } |
| } |
| } |
|
|
| oldGraph.setDefaultRootEdgeLabel(oldGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); |
| return oldGraph; |
| } |
| |
| public static void main(String[] args) { |
| long startTime = System.currentTimeMillis(); |
| String inFile = args[0]; |
| String charSet = "UTF-8"; |
|
|
| BufferedReader reader = null; |
| |
| try { |
| DataFormat dataFormat = DataFormat.parseDataFormatXMLfile("/appdata/dataformat/conllx.xml"); |
| reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), charSet)); |
| int sentenceCounter = 0; |
| while (true) { |
| String[] goldTokens = ConcurrentUtils.readSentence(reader); |
| if (goldTokens.length == 0) { |
| break; |
| } |
| sentenceCounter++; |
| ConcurrentDependencyGraph newGraph = new ConcurrentDependencyGraph(dataFormat, goldTokens); |
| DependencyStructure oldGraph = getOldDependencyGraph(dataFormat, goldTokens); |
| int newGraphINT; |
| int oldGraphINT; |
| boolean newGraphBOOL; |
| boolean oldGraphBOOL; |
| SortedSet<ConcurrentDependencyNode> newGraphSortedSet; |
| SortedSet<DependencyNode> oldGraphSortedSet; |
| |
| for (int i = 0; i < newGraph.nDependencyNodes(); i++) { |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| newGraphINT = newGraph.getDependencyNode(i).findComponent().getIndex(); |
| oldGraphINT = oldGraph.getDependencyNode(i).findComponent().getIndex(); |
|
|
| newGraphINT = newGraph.getDependencyNode(i).getRank(); |
| oldGraphINT = oldGraph.getDependencyNode(i).getRank(); |
| if (newGraphINT != oldGraphINT) { |
| System.out.println(newGraphINT + "\t" + oldGraphINT); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| } |
| |
| |
| |
| } |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } catch (ConcurrentGraphException e) { |
| e.printStackTrace(); |
| } catch (MaltChainedException e) { |
| e.printStackTrace(); |
| } finally { |
| if (reader != null) { |
| try { |
| reader.close(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| } |
| long elapsed = System.currentTimeMillis() - startTime; |
| System.out.println("Finished init basic : " + new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)"); |
| } |
|
|
|
|
| } |
|
|