| package org.maltparser; |
|
|
| import java.util.Iterator; |
|
|
| import org.maltparser.core.exception.MaltChainedException; |
| import org.maltparser.core.flow.FlowChartInstance; |
| import org.maltparser.core.io.dataformat.ColumnDescription; |
| import org.maltparser.core.io.dataformat.DataFormatInstance; |
| import org.maltparser.core.io.dataformat.DataFormatSpecification; |
| import org.maltparser.core.options.OptionManager; |
| import org.maltparser.core.symbol.SymbolTable; |
| import org.maltparser.core.symbol.SymbolTableHandler; |
| import org.maltparser.core.symbol.hash.HashSymbolTableHandler; |
| import org.maltparser.core.syntaxgraph.DependencyGraph; |
| import org.maltparser.core.syntaxgraph.DependencyStructure; |
| import org.maltparser.core.syntaxgraph.edge.Edge; |
| import org.maltparser.core.syntaxgraph.node.DependencyNode; |
| import org.maltparser.parser.SingleMalt; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public class MaltParserService { |
| private Engine engine; |
| private FlowChartInstance flowChartInstance; |
| private DataFormatInstance dataFormatInstance; |
| private SingleMalt singleMalt; |
| private int optionContainer; |
| private boolean initialized = false; |
| |
| |
| |
| |
| |
| |
| public MaltParserService() throws MaltChainedException { |
| this(0); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public MaltParserService(int optionContainer) throws MaltChainedException { |
| this.optionContainer = optionContainer; |
| initialize(); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| public MaltParserService(boolean optionFreeInitialization) throws MaltChainedException { |
| if (optionFreeInitialization == false) { |
| this.optionContainer = 0; |
| initialize(); |
| } else { |
| this.optionContainer = -1; |
| } |
| } |
| |
| |
| |
| |
| |
| |
| |
| public void runExperiment(String commandLine) throws MaltChainedException { |
| OptionManager.instance().parseCommandLine(commandLine, optionContainer); |
| engine = new Engine(); |
| engine.initialize(optionContainer); |
| engine.process(optionContainer); |
| engine.terminate(optionContainer); |
| } |
| |
| |
| |
| |
| |
| |
| |
| public void initializeParserModel(String commandLine) throws MaltChainedException { |
| if (optionContainer == -1) { |
| throw new MaltChainedException("MaltParserService has been initialized as an option free initialization and therefore no parser model can be initialized."); |
| } |
| OptionManager.instance().parseCommandLine(commandLine, optionContainer); |
| |
| engine = new Engine(); |
| |
| flowChartInstance = engine.initialize(optionContainer); |
| |
| if (flowChartInstance.hasPreProcessChartItems()) { |
| flowChartInstance.preprocess(); |
| } |
| singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt"); |
| singleMalt.getConfigurationDir().initDataFormat(); |
| dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance( |
| singleMalt.getSymbolTables(), |
| OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); |
| initialized = true; |
| } |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public DependencyStructure parse(String[] tokens) throws MaltChainedException { |
| if (!initialized) { |
| throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); |
| } |
| if (tokens == null || tokens.length == 0) { |
| throw new MaltChainedException("Nothing to parse. "); |
| } |
|
|
| DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); |
| |
| for (int i = 0; i < tokens.length; i++) { |
| Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); |
| DependencyNode node = outputGraph.addDependencyNode(i+1); |
| String[] items = tokens[i].split("\t"); |
| for (int j = 0; j < items.length; j++) { |
| if (columns.hasNext()) { |
| ColumnDescription column = columns.next(); |
| if (column.getCategory() == ColumnDescription.INPUT && node != null) { |
| outputGraph.addLabel(node, column.getName(), items[j]); |
| } |
| } |
| } |
| } |
| outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); |
| |
| singleMalt.parse(outputGraph); |
| return outputGraph; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException { |
| if (!initialized) { |
| throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); |
| } |
| if (tokens == null || tokens.length == 0) { |
| throw new MaltChainedException("Nothing to convert. "); |
| } |
| DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); |
| |
| for (int i = 0; i < tokens.length; i++) { |
| Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); |
| DependencyNode node = outputGraph.addDependencyNode(i+1); |
| String[] items = tokens[i].split("\t"); |
| Edge edge = null; |
| for (int j = 0; j < items.length; j++) { |
| if (columns.hasNext()) { |
| ColumnDescription column = columns.next(); |
| if (column.getCategory() == ColumnDescription.INPUT && node != null) { |
| outputGraph.addLabel(node, column.getName(), items[j]); |
| } else if (column.getCategory() == ColumnDescription.HEAD) { |
| if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { |
| edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); |
| } |
| } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { |
| outputGraph.addLabel(edge, column.getName(), items[j]); |
| } |
| } |
| } |
| } |
| outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); |
| return outputGraph; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| public DataFormatSpecification readDataFormatSpecification(String dataFormatFileName) throws MaltChainedException { |
| DataFormatSpecification dataFormat = new DataFormatSpecification(); |
| dataFormat.parseDataFormatXMLfile(dataFormatFileName); |
| return dataFormat; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public DependencyStructure toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification) throws MaltChainedException { |
| |
| SymbolTableHandler symbolTables = new HashSymbolTableHandler(); |
| |
| |
| DataFormatInstance dataFormatInstance = dataFormatSpecification.createDataFormatInstance(symbolTables, "none"); |
|
|
| |
| if (tokens == null || tokens.length == 0) { |
| throw new MaltChainedException("Nothing to convert. "); |
| } |
| DependencyStructure outputGraph = new DependencyGraph(symbolTables); |
| |
| for (int i = 0; i < tokens.length; i++) { |
| Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); |
| DependencyNode node = outputGraph.addDependencyNode(i+1); |
| String[] items = tokens[i].split("\t"); |
| Edge edge = null; |
| for (int j = 0; j < items.length; j++) { |
| if (columns.hasNext()) { |
| ColumnDescription column = columns.next(); |
| if (column.getCategory() == ColumnDescription.INPUT && node != null) { |
| outputGraph.addLabel(node, column.getName(), items[j]); |
| } else if (column.getCategory() == ColumnDescription.HEAD) { |
| if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { |
| edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); |
| } |
| } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { |
| outputGraph.addLabel(edge, column.getName(), items[j]); |
| } |
| } |
| } |
| } |
| outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); |
| return outputGraph; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException { |
| return toDependencyStructure(tokens, readDataFormatSpecification(dataFormatFileName)); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| public String[] parseTokens(String[] tokens) throws MaltChainedException { |
| DependencyStructure outputGraph = parse(tokens); |
| StringBuilder sb = new StringBuilder(); |
| String[] outputTokens = new String[tokens.length]; |
| SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL"); |
| for (Integer index : outputGraph.getTokenIndices()) { |
| sb.setLength(0); |
| if (index <= tokens.length) { |
| DependencyNode node = outputGraph.getDependencyNode(index); |
| sb.append(tokens[index -1]); |
| sb.append('\t'); |
| sb.append(node.getHead().getIndex()); |
| sb.append('\t'); |
| if (node.getHeadEdge().hasLabel(deprelTable)) { |
| sb.append(node.getHeadEdge().getLabelSymbol(deprelTable)); |
| } else { |
| sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable)); |
| } |
| outputTokens[index-1] = sb.toString(); |
| } |
| } |
| return outputTokens; |
| } |
| |
| |
| |
| |
| |
| |
| public void terminateParserModel() throws MaltChainedException { |
| if (!initialized) { |
| throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); |
| } |
| |
| if (flowChartInstance.hasPostProcessChartItems()) { |
| flowChartInstance.postprocess(); |
| } |
| |
| |
| engine.terminate(optionContainer); |
| } |
| |
| private void initialize() throws MaltChainedException { |
| if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) { |
| return; |
| } |
| OptionManager.instance().loadOptionDescriptionFile(); |
| OptionManager.instance().generateMaps(); |
| } |
| |
| |
| |
| |
| |
| |
| |
| public int getOptionContainer() { |
| return optionContainer; |
| } |
| } |
|
|