rain1024's picture
update
6f3ebfa
package org.maltparser.parser;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.OutputStreamWriter;
import java.lang.reflect.InvocationTargetException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Formatter;
import java.util.regex.Pattern;
import org.apache.log4j.FileAppender;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.maltparser.core.config.ConfigurationDir;
import org.maltparser.core.config.ConfigurationException;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.feature.FeatureModelManager;
import org.maltparser.core.feature.system.FeatureEngine;
import org.maltparser.core.helper.SystemLogger;
import org.maltparser.core.helper.URLFinder;
import org.maltparser.core.io.dataformat.DataFormatInstance;
import org.maltparser.core.options.OptionManager;
import org.maltparser.core.plugin.PluginLoader;
import org.maltparser.core.propagation.PropagationException;
import org.maltparser.core.propagation.PropagationManager;
import org.maltparser.core.symbol.SymbolTableHandler;
import org.maltparser.core.syntaxgraph.DependencyStructure;
import org.maltparser.parser.guide.ClassifierGuide;
/**
* @author Johan Hall
*
*/
public class SingleMalt implements DependencyParserConfig {
public final static Class<?>[] paramTypes = { org.maltparser.parser.DependencyParserConfig.class };
public static final int LEARN = 0;
public static final int PARSE = 1;
protected ConfigurationDir configDir;
protected Logger configLogger;
protected int optionContainerIndex;
protected ParsingAlgorithm parsingAlgorithm = null;
protected int mode;
protected SymbolTableHandler symbolTableHandler;
protected DataFormatInstance dataFormatInstance;
protected FeatureModelManager featureModelManager;
protected long startTime;
protected long endTime;
protected int nIterations = 0;
protected PropagationManager propagationManager;
private Parser parser;
private Trainer trainer;
private AbstractParserFactory parserFactory;
public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTableHandler, ConfigurationDir configDir, int mode) throws MaltChainedException {
this.optionContainerIndex = containerIndex;
this.mode = mode;
setConfigurationDir(configDir);
startTime = System.currentTimeMillis();
configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
this.dataFormatInstance = dataFormatInstance;
this.symbolTableHandler = symbolTableHandler;
this.parserFactory = makeParserFactory();
if (mode == SingleMalt.LEARN) {
checkOptionDependency();
}
initPropagation();
initFeatureSystem();
initParsingAlgorithm();
if (configLogger.isInfoEnabled()) {
URL inputFormatURL = configDir.getInputFormatURL();
URL outputFormatURL = configDir.getOutputFormatURL();
if (inputFormatURL != null) {
if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) {
int index = inputFormatURL.toString().indexOf('!');
if (index == -1) {
configLogger.info(" Data Format : "+inputFormatURL.toString()+"\n");
} else {
configLogger.info(" Data Format : "+inputFormatURL.toString().substring(index+1)+"\n");
}
} else {
int indexIn = inputFormatURL.toString().indexOf('!');
int indexOut = outputFormatURL.toString().indexOf('!');
if (indexIn == -1) {
configLogger.info(" Input Data Format : "+inputFormatURL.toString()+"\n");
} else {
configLogger.info(" Input Data Format : "+inputFormatURL.toString().substring(indexIn+1)+"\n");
}
if (indexOut == -1) {
configLogger.info(" Output Data Format : "+outputFormatURL.toString()+"\n");
} else {
configLogger.info(" Output Data Format : "+outputFormatURL.toString().substring(indexOut+1)+"\n");
}
}
}
}
}
private void initPropagation() throws MaltChainedException {
String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
return;
}
propagationManager = new PropagationManager();
if (mode == SingleMalt.LEARN) {
propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
}
if (isLoggerInfoEnabled()) {
logInfoMessage(" Propagation : " + propagationSpecFileName+"\n");
}
propagationManager.loadSpecification(findURL(propagationSpecFileName));
propagationManager.createPropagations(dataFormatInstance, symbolTableHandler);
}
/**
* Initialize the parsing algorithm
*
* @throws MaltChainedException
*/
protected void initParsingAlgorithm() throws MaltChainedException {
boolean diagnostics = (Boolean)getOptionValue("singlemalt", "diagnostics");
if (mode == LEARN) {
if (!diagnostics) {
parsingAlgorithm = trainer = new BatchTrainer(this, symbolTableHandler);
} else {
parsingAlgorithm = trainer = new BatchTrainerWithDiagnostics(this, symbolTableHandler);
}
} else if (mode == PARSE) {
if (!diagnostics) {
parsingAlgorithm = parser = new DeterministicParser(this, symbolTableHandler);
} else {
parsingAlgorithm = parser = new DeterministicParserWithDiagnostics(this, symbolTableHandler);
}
}
}
protected void initFeatureSystem() throws MaltChainedException {
final FeatureEngine system = new FeatureEngine();
system.load("/appdata/features/ParserFeatureSystem.xml");
system.load(PluginLoader.instance());
featureModelManager = new FeatureModelManager(system);
String featureModelFileName = getOptionValue("guide", "features").toString().trim();
if (featureModelFileName.endsWith(".par")) {
String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
featureModelManager.loadParSpecification(findURL(featureModelFileName), markingStrategy, coveredRoot);
} else {
featureModelManager.loadSpecification(findURL(featureModelFileName));
}
}
/**
* Creates a parser factory specified by the --singlemalt-parsing_algorithm option
*
* @return a parser factory
* @throws MaltChainedException
*/
private AbstractParserFactory makeParserFactory() throws MaltChainedException {
Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "parsing_algorithm");
try {
Object[] arguments = { this };
return (AbstractParserFactory)clazz.getConstructor(paramTypes).newInstance(arguments);
} catch (NoSuchMethodException e) {
throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
} catch (InstantiationException e) {
throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
} catch (IllegalAccessException e) {
throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
} catch (InvocationTargetException e) {
throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
}
}
public AbstractParserFactory getParserFactory() {
return parserFactory;
}
public FeatureModelManager getFeatureModelManager() {
return featureModelManager;
}
public void process(Object[] arguments) throws MaltChainedException {
if (mode == LEARN) {
if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
}
DependencyStructure systemGraph = (DependencyStructure)arguments[0];
DependencyStructure goldGraph = (DependencyStructure)arguments[1];
if (systemGraph.hasTokens() && getGuide() != null) {
getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
}
} else if (mode == PARSE) {
if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
}
DependencyStructure processGraph = (DependencyStructure)arguments[0];
if (processGraph.hasTokens()) {
parser.parse(processGraph);
// ((Parser)getAlgorithm()).parse(processGraph);
}
}
}
public void parse(DependencyStructure graph) throws MaltChainedException {
if (graph.hasTokens()) {
// ((Parser)getAlgorithm()).parse(graph);
parser.parse(graph);
}
}
public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
if (oracleGraph.hasTokens()) {
if (getGuide() != null) {
getGuide().finalizeSentence(trainer.parse(goldGraph, oracleGraph));
} else {
trainer.parse(goldGraph, oracleGraph);
}
}
}
public void train() throws MaltChainedException {
if (getGuide() == null) {
((Trainer)getAlgorithm()).train();
}
}
public void terminate(Object[] arguments) throws MaltChainedException {
// if (getAlgorithm() instanceof Trainer) {
// ((Trainer)getAlgorithm()).terminate();
// }
getAlgorithm().terminate();
if (getGuide() != null) {
getGuide().terminate();
}
if (mode == LEARN) {
endTime = System.currentTimeMillis();
long elapsed = endTime - startTime;
if (configLogger.isInfoEnabled()) {
configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
}
} else if (mode == PARSE) {
endTime = System.currentTimeMillis();
long elapsed = endTime - startTime;
if (configLogger.isInfoEnabled()) {
configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
}
}
if (SystemLogger.logger() != configLogger && configLogger != null) {
configLogger.removeAllAppenders();
}
}
/**
* Initialize the configuration logger
*
* @return the configuration logger
* @throws MaltChainedException
*/
public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
configLogger = Logger.getLogger(logfile);
FileAppender fileAppender = null;
try {
fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
} catch(IOException e) {
throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
}
fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
configLogger.addAppender(fileAppender);
configLogger.setLevel(Level.toLevel(level, Level.INFO));
} else {
configLogger = SystemLogger.logger();
}
return configLogger;
}
public boolean isLoggerInfoEnabled() {
return configLogger != null && configLogger.isInfoEnabled();
}
public boolean isLoggerDebugEnabled() {
return configLogger != null && configLogger.isDebugEnabled();
}
public void logErrorMessage(String message) {
configLogger.error(message);
}
public void logInfoMessage(String message) {
configLogger.info(message);
}
public void logInfoMessage(char character) {
configLogger.info(character);
}
public void logDebugMessage(String message) {
configLogger.debug(message);
}
public void writeInfoToConfigFile(String message) throws MaltChainedException {
try {
configDir.getInfoFileWriter().write(message);
configDir.getInfoFileWriter().flush();
} catch (IOException e) {
throw new ConfigurationException("Could not write to the configuration information file. ", e);
}
}
public Logger getConfigLogger() {
return configLogger;
}
public void setConfigLogger(Logger logger) {
configLogger = logger;
}
public ConfigurationDir getConfigurationDir() {
return configDir;
}
public void setConfigurationDir(ConfigurationDir configDir) {
this.configDir = configDir;
}
public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException {
return configDir.getOutputStreamWriter(fileName);
}
public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException {
return configDir.getAppendOutputStreamWriter(fileName);
}
public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException {
return configDir.getInputStreamReader(fileName);
}
public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException {
return configDir.getInputStreamFromConfigFileEntry(fileName);
}
public URL getConfigFileEntryURL(String fileName) throws MaltChainedException {
return configDir.getConfigFileEntryURL(fileName);
}
public File getFile(String fileName) throws MaltChainedException {
return configDir.getFile(fileName);
}
public Object getConfigFileEntryObject(String fileName) throws MaltChainedException {
Object object = null;
try {
ObjectInputStream input = new ObjectInputStream(getInputStreamFromConfigFileEntry(fileName));
try {
object = input.readObject();
} catch (ClassNotFoundException e) {
throw new ConfigurationException("Could not load object '"+fileName+"' from mco-file", e);
} catch (Exception e) {
throw new ConfigurationException("Could not load object '"+fileName+"' from mco-file", e);
} finally {
input.close();
}
} catch (IOException e) {
throw new ConfigurationException("Could not load object from '"+fileName+"' in mco-file", e);
}
return object;
}
public String getConfigFileEntryString(String fileName) throws MaltChainedException {
StringBuilder sb = new StringBuilder();
try {
final BufferedReader in = new BufferedReader(new InputStreamReader(getInputStreamFromConfigFileEntry(fileName), "UTF-8"));
String line;
while((line = in.readLine()) != null) {
sb.append(line);
sb.append('\n');
}
} catch (IOException e) {
throw new ConfigurationException("Could not load string from '"+fileName+"' in mco-file", e);
}
return sb.toString();
}
public int getMode() {
return mode;
}
public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
}
public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
}
public OptionManager getOptionManager() throws MaltChainedException {
return OptionManager.instance();
}
/******************************** MaltParserConfiguration specific ********************************/
/**
* Returns the list of symbol tables
*
* @return the list of symbol tables
*/
public SymbolTableHandler getSymbolTables() {
return symbolTableHandler;
}
public DataFormatInstance getDataFormatInstance() {
return dataFormatInstance;
}
public PropagationManager getPropagationManager() {
return propagationManager;
}
public ParsingAlgorithm getAlgorithm() {
return parsingAlgorithm;
}
/**
* Returns the guide
*
* @return the guide
*/
public ClassifierGuide getGuide() {
return parsingAlgorithm.getGuide();
}
public void checkOptionDependency() throws MaltChainedException {
try {
if (configDir.getInfoFileWriter() != null) {
configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
}
// Copy the feature model file into the configuration directory
String featureModelFileName = getOptionValue("guide", "features").toString().trim();
if (featureModelFileName.equals("")) {
// use default feature model depending on the selected parser algorithm
OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
featureModelFileName = getOptionValue("guide", "features").toString().trim();
/* START: Temp fix during development of new liblinear and libsvm interface */
String learner = getOptionValueString("guide", "learner");
if (!learner.startsWith("lib")) {
learner = "lib"+learner;
}
/* END: Temp fix during development of new liblinear and libsvm interface */
featureModelFileName = featureModelFileName.replace("{learner}", learner);
featureModelFileName = featureModelFileName.replace("{dataformat}", getOptionValue("input", "format").toString().trim().replace(".xml", ""));
final URLFinder f = new URLFinder();
featureModelFileName = configDir.copyToConfig(f.findURLinJars(featureModelFileName));
} else {
featureModelFileName = configDir.copyToConfig(featureModelFileName);
}
OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
if (configDir.getInfoFileWriter() != null) {
configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n");
}
if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
if (configDir.getInfoFileWriter() != null) {
configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n");
}
}
if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
if (configDir.getInfoFileWriter() != null) {
configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n");
}
}
String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
StringBuilder newDecisionSettings = new StringBuilder();
if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
decisionSettings = "T.TRANS+A.DEPREL";
} else {
decisionSettings = decisionSettings.toUpperCase();
}
if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
newDecisionSettings.append("+A.PPLIFTED");
}
}
if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
newDecisionSettings.append("+A.PPPATH");
}
}
if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
newDecisionSettings.append("+A.PPCOVERED");
}
if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
if (configDir.getInfoFileWriter() != null) {
configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n");
}
}
if (configDir.getInfoFileWriter() != null) {
configDir.getInfoFileWriter().flush();
}
} catch (IOException e) {
throw new ConfigurationException("Could not write to the configuration information file. ", e);
}
}
private URL findURL(String propagationSpecFileName) throws MaltChainedException {
URL url = null;
File specFile = configDir.getFile(propagationSpecFileName);
if (specFile.exists()) {
try {
url = new URL("file:///"+specFile.getAbsolutePath());
} catch (MalformedURLException e) {
throw new PropagationException("Malformed URL: "+specFile, e);
}
} else {
url = configDir.getConfigFileEntryURL(propagationSpecFileName);
}
return url;
}
}