File size: 8,728 Bytes
6f3ebfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
package org.maltparser.concurrent;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.maltparser.concurrent.graph.ConcurrentDependencyGraph;
import org.maltparser.concurrent.graph.dataformat.DataFormat;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.feature.FeatureModelManager;
import org.maltparser.core.feature.system.FeatureEngine;
import org.maltparser.core.io.dataformat.DataFormatInstance;
import org.maltparser.core.io.dataformat.DataFormatManager;
import org.maltparser.core.lw.graph.LWDeprojectivizer;
import org.maltparser.core.lw.graph.LWDependencyGraph;
import org.maltparser.core.lw.parser.LWSingleMalt;
import org.maltparser.core.lw.parser.McoModel;
import org.maltparser.core.options.OptionManager;
import org.maltparser.core.plugin.PluginLoader;
import org.maltparser.core.symbol.SymbolTableHandler;
import org.maltparser.core.symbol.hash.HashSymbolTableHandler;
import org.maltparser.core.symbol.parse.ParseSymbolTableHandler;

/**
 * A concurrent MaltParser model that can be used to parse sentences in both a single threaded or multi threaded 
 * environment. To create an object of ConcurrentMaltParserModel use the static methods in ConcurrentMaltParserService.
 * 
 * @author Johan Hall
 *
 */
public final class ConcurrentMaltParserModel {
    private final DataFormatInstance dataFormatInstance;
    private final DataFormat concurrentDataFormat;
    private final SymbolTableHandler parentSymbolTableHandler;
    private final LWSingleMalt singleMalt;
    private final int optionContainer;
    private final McoModel mcoModel;    
    private final int markingStrategy; 
    private final boolean coveredRoot;
    private final String defaultRootLabel; 
    
    /**
     * This constructor can only be used by ConcurrentMaltParserService
     * 
     * @param _optionContainer a option container index
     * @param _mcoURL a URL to a valid MaltParser model file. 
     * @throws MaltChainedException
     */
    protected ConcurrentMaltParserModel(int _optionContainer, URL _mcoURL) throws MaltChainedException {
		this.optionContainer = _optionContainer;
		this.mcoModel = new McoModel(_mcoURL);
		String inputFormatName = OptionManager.instance().getOptionValue(optionContainer, "input", "format").toString().trim();	
		URL inputFormatURL = null;
		try {
			inputFormatURL = mcoModel.getMcoEntryURL(inputFormatName);
		} catch(IOException e) {
			throw new MaltChainedException("Couldn't read file "+inputFormatName+" from mco-file ", e);
		}
		DataFormatManager dataFormatManager = new DataFormatManager(inputFormatURL, inputFormatURL);
		this.parentSymbolTableHandler = new HashSymbolTableHandler();
		this.dataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(this.parentSymbolTableHandler, OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value"));
		try {
			this.parentSymbolTableHandler.load(mcoModel.getInputStreamReader("symboltables.sym", "UTF-8"));
		} catch(IOException e) {
			throw new MaltChainedException("Couldn't read file symboltables.sym from mco-file ", e);
		}
		this.defaultRootLabel = OptionManager.instance().getOptionValue(optionContainer, "graph", "root_label").toString().trim();	
		this.markingStrategy = LWDeprojectivizer.getMarkingStrategyInt(OptionManager.instance().getOptionValue(optionContainer, "pproj", "marking_strategy").toString().trim());
		this.coveredRoot = !OptionManager.instance().getOptionValue(optionContainer, "pproj", "covered_root").toString().trim().equalsIgnoreCase("none");
//		final PropagationManager propagationManager = loadPropagationManager(this.optionContainer, mcoModel);
		final FeatureModelManager featureModelManager = loadFeatureModelManager(this.optionContainer, mcoModel);
		this.singleMalt = new LWSingleMalt(this.optionContainer, this.dataFormatInstance, mcoModel, null, featureModelManager);
		this.concurrentDataFormat = DataFormat.parseDataFormatXMLfile(inputFormatURL);
    }
    
    /**
     * Parses an array of tokens and returns a dependency graph.
     * 
     * @param tokens an array of tokens
     * @return a dependency graph 
     * @throws MaltChainedException
     */
    public ConcurrentDependencyGraph parse(String[] tokens) throws MaltChainedException {
    	return new ConcurrentDependencyGraph(concurrentDataFormat, internalParse(tokens), defaultRootLabel);
    }
 
	/**
	 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
	 * 
	 * @param tokens an array of tokens to parse
	 * @return an array of tokens with a head index and a dependency type at the end of string 
	 * @throws MaltChainedException
	 */
	public String[] parseTokens(String[] tokens) throws MaltChainedException {
		LWDependencyGraph outputGraph = internalParse(tokens);
		String[] outputTokens = new String[tokens.length];
		for (int i = 0; i < outputTokens.length; i++) {
			outputTokens[i] = outputGraph.getDependencyNode(i+1).toString();
		}
		return outputTokens;
	}

    private LWDependencyGraph internalParse(String[] tokens) throws MaltChainedException {
		if (tokens == null || tokens.length == 0) {
		    throw new MaltChainedException("Nothing to parse. ");
		}

		LWDependencyGraph parseGraph = new LWDependencyGraph(concurrentDataFormat, new ParseSymbolTableHandler(parentSymbolTableHandler), tokens, defaultRootLabel, false);
		
		singleMalt.parse(parseGraph);
		if (markingStrategy != 0 || coveredRoot) { 
			new LWDeprojectivizer().deprojectivize(parseGraph, markingStrategy);
		}
		
		return parseGraph;
    }
	
    public List<String[]> parseSentences(List<String[]> inputSentences) throws MaltChainedException {
    	return singleMalt.parseSentences(inputSentences, defaultRootLabel, markingStrategy, coveredRoot, parentSymbolTableHandler, concurrentDataFormat);
//    	List<String[]> outputSentences = Collections.synchronizedList(new ArrayList<String[]>());;
//		for (int i = 0; i < inputSentences.size(); i++) {
//			String[] tokens = inputSentences.get(i);
//			// TODO nothing to parse
//			LWDependencyGraph parseGraph = new LWDependencyGraph(concurrentDataFormat, new ParseSymbolTableHandler(parentSymbolTableHandler), tokens, defaultRootLabel, false);
//			singleMalt.parse(parseGraph);
//			if (markingStrategy != 0 || coveredRoot) { 
//				new LWDeprojectivizer().deprojectivize(parseGraph, markingStrategy);
//			}
//			String[] outputTokens = new String[tokens.length];
//			for (int j = 0; j < outputTokens.length; j++) {
//				outputTokens[i] = parseGraph.getDependencyNode(j+1).toString();
//			}
//			outputSentences.add(outputTokens);
//		}
//		return outputSentences;
    }
    
    
//	private PropagationManager loadPropagationManager(int optionContainer, McoModel mcoModel) throws MaltChainedException {
//		String propagationSpecFileName = OptionManager.instance().getOptionValue(optionContainer, "singlemalt", "propagation").toString();
//		PropagationManager propagationManager = null;
//		if (propagationSpecFileName != null && propagationSpecFileName.length() > 0) {
//			propagationManager = new PropagationManager();
//			try {
//				propagationManager.loadSpecification(mcoModel.getMcoEntryURL(propagationSpecFileName));
//			} catch(IOException e) {
//				throw new MaltChainedException("Couldn't read file "+propagationSpecFileName+" from mco-file ", e);
//			}
//		}
//		return propagationManager;
//	}
	
	private FeatureModelManager loadFeatureModelManager(int optionContainer, McoModel mcoModel) throws MaltChainedException {
		final FeatureEngine system = new FeatureEngine();
		system.load("/appdata/features/ParserFeatureSystem.xml");
		system.load(PluginLoader.instance());
		FeatureModelManager featureModelManager = new FeatureModelManager(system);
		String featureModelFileName = OptionManager.instance().getOptionValue(optionContainer, "guide", "features").toString().trim();
		try {
			if (featureModelFileName.endsWith(".par")) {
				String markingStrategy = OptionManager.instance().getOptionValue(optionContainer, "pproj", "marking_strategy").toString().trim();
				String coveredRoot = OptionManager.instance().getOptionValue(optionContainer, "pproj", "covered_root").toString().trim();
				featureModelManager.loadParSpecification(mcoModel.getMcoEntryURL(featureModelFileName), markingStrategy, coveredRoot);
			} else {
				featureModelManager.loadSpecification(mcoModel.getMcoEntryURL(featureModelFileName));
			}
		} catch(IOException e) {
			throw new MaltChainedException("Couldn't read file "+featureModelFileName+" from mco-file ", e);
		}
		return featureModelManager;
	}
}