|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "ExpectedBleuOptimizer.h" |
|
|
#include "util/file_stream.hh" |
|
|
#include "util/file_piece.hh" |
|
|
#include "util/string_piece.hh" |
|
|
#include "util/tokenize_piece.hh" |
|
|
|
|
|
#include <sstream> |
|
|
#include <boost/program_options.hpp> |
|
|
|
|
|
using namespace ExpectedBleuTraining; |
|
|
namespace po = boost::program_options; |
|
|
|
|
|
|
|
|
int main(int argc, char **argv) { |
|
|
|
|
|
util::FileStream out(1); |
|
|
util::FileStream err(2); |
|
|
|
|
|
size_t maxNBestSize; |
|
|
size_t iterationLimit; |
|
|
std::string filenameSBleu, filenameNBestList, filenameFeatureNames, filenameInitialWeights; |
|
|
|
|
|
bool ignoreDecoderScore; |
|
|
|
|
|
float learningRate; |
|
|
float initialStepSize; |
|
|
float decreaseRate; |
|
|
float increaseRate; |
|
|
float minStepSize; |
|
|
float maxStepSize; |
|
|
float floorAbsScalingFactor; |
|
|
float regularizationParameter; |
|
|
bool printZeroWeights; |
|
|
bool miniBatches; |
|
|
std::string optimizerTypeStr; |
|
|
size_t optimizerType = 0; |
|
|
#define EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP 1 |
|
|
#define EXPECTED_BLEU_OPTIMIZER_TYPE_SGD 2 |
|
|
|
|
|
try { |
|
|
|
|
|
po::options_description descr("Usage"); |
|
|
descr.add_options() |
|
|
("help,h", "produce help message") |
|
|
("n-best-size-limit,l", po::value<size_t>(&maxNBestSize)->default_value(100), |
|
|
"limit of n-best list entries to be considered for training") |
|
|
("iterations,i", po::value<size_t>(&iterationLimit)->default_value(50), |
|
|
"number of training iterations") |
|
|
("sbleu-file,b", po::value<std::string>(&filenameSBleu)->required(), |
|
|
"file containing sentence-level BLEU scores for all n-best list entries") |
|
|
("prepared-n-best-list,n", po::value<std::string>(&filenameNBestList)->required(), |
|
|
"input n-best list file, in prepared format for expected BLEU training") |
|
|
("feature-name-file,f", po::value<std::string>(&filenameFeatureNames)->required(), |
|
|
"file containing mapping between feature names and indices") |
|
|
("initial-weights-file,w", po::value<std::string>(&filenameInitialWeights)->default_value(""), |
|
|
"file containing start values for scaling factors (optional)") |
|
|
("ignore-decoder-score", boost::program_options::value<bool>(&ignoreDecoderScore)->default_value(0), |
|
|
"exclude decoder score from computation of posterior probability") |
|
|
("regularization", boost::program_options::value<float>(®ularizationParameter)->default_value(0), |
|
|
"regularization parameter; suggested value range: [1e-8,1e-5]") |
|
|
("learning-rate", boost::program_options::value<float>(&learningRate)->default_value(1), |
|
|
"learning rate for the SGD optimizer") |
|
|
("floor", boost::program_options::value<float>(&floorAbsScalingFactor)->default_value(0), |
|
|
"set scaling factor to 0 if below this absolute value after update") |
|
|
("initial-step-size", boost::program_options::value<float>(&initialStepSize)->default_value(0.001), |
|
|
"initial step size for the RPROP optimizer") |
|
|
("decrease-rate", boost::program_options::value<float>(&decreaseRate)->default_value(0.5), |
|
|
"decrease rate for the RPROP optimizer") |
|
|
("increase-rate", boost::program_options::value<float>(&increaseRate)->default_value(1.2), |
|
|
"increase rate for the RPROP optimizer") |
|
|
("min-step-size", boost::program_options::value<float>(&minStepSize)->default_value(1e-7), |
|
|
"minimum step size for the RPROP optimizer") |
|
|
("max-step-size", boost::program_options::value<float>(&maxStepSize)->default_value(1), |
|
|
"maximum step size for the RPROP optimizer") |
|
|
("print-zero-weights", boost::program_options::value<bool>(&printZeroWeights)->default_value(0), |
|
|
"output scaling factors even if they are trained to 0") |
|
|
("optimizer", po::value<std::string>(&optimizerTypeStr)->default_value("RPROP"), |
|
|
"optimizer type used for training (known algorithms: RPROP, SGD)") |
|
|
("mini-batches", boost::program_options::value<bool>(&miniBatches)->default_value(0), |
|
|
"update after every single sentence (SGD only)") |
|
|
; |
|
|
|
|
|
po::variables_map vm; |
|
|
po::store(po::parse_command_line(argc, argv, descr), vm); |
|
|
|
|
|
if (vm.count("help")) { |
|
|
std::ostringstream os; |
|
|
os << descr; |
|
|
out << os.str() << '\n'; |
|
|
out.flush(); |
|
|
exit(0); |
|
|
} |
|
|
|
|
|
po::notify(vm); |
|
|
|
|
|
} catch(std::exception& e) { |
|
|
|
|
|
err << "Error: " << e.what() << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
if ( !optimizerTypeStr.compare("rprop") || !optimizerTypeStr.compare("RPROP") ) { |
|
|
optimizerType = EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP; |
|
|
} else if ( !optimizerTypeStr.compare("sgd") || !optimizerTypeStr.compare("SGD") ) { |
|
|
optimizerType = EXPECTED_BLEU_OPTIMIZER_TYPE_SGD; |
|
|
} else { |
|
|
err << "Error: unknown optimizer type: \"" << optimizerTypeStr << "\" (known optimizers: rprop, sgd) " << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
util::FilePiece ifsFeatureNames(filenameFeatureNames.c_str()); |
|
|
|
|
|
StringPiece lineFeatureName; |
|
|
if ( !ifsFeatureNames.ReadLineOrEOF(lineFeatureName) ) |
|
|
{ |
|
|
err << "Error: flawed content in " << filenameFeatureNames << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
size_t maxFeatureNamesIdx = atol( lineFeatureName.as_string().c_str() ); |
|
|
|
|
|
std::vector<std::string> featureNames(maxFeatureNamesIdx); |
|
|
boost::unordered_map<std::string, size_t> featureIndexes; |
|
|
for (size_t i=0; i<maxFeatureNamesIdx; ++i) |
|
|
{ |
|
|
if ( !ifsFeatureNames.ReadLineOrEOF(lineFeatureName) ) { |
|
|
err << "Error: flawed content in " << filenameFeatureNames << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
util::TokenIter<util::SingleCharacter> token(lineFeatureName, ' '); |
|
|
size_t featureIndexCurrent = atol( token->as_string().c_str() ); |
|
|
token++; |
|
|
featureNames[featureIndexCurrent] = token->as_string(); |
|
|
featureIndexes[token->as_string()] = featureIndexCurrent; |
|
|
} |
|
|
|
|
|
|
|
|
std::vector<float> sparseScalingFactor(maxFeatureNamesIdx); |
|
|
std::vector< boost::unordered_map<size_t, float> > sparseScore(maxNBestSize); |
|
|
|
|
|
|
|
|
|
|
|
if ( filenameInitialWeights.length() != 0 ) |
|
|
{ |
|
|
util::FilePiece ifsInitialWeights(filenameInitialWeights.c_str()); |
|
|
|
|
|
StringPiece lineInitialWeight; |
|
|
if ( !ifsInitialWeights.ReadLineOrEOF(lineInitialWeight) ) { |
|
|
err << "Error: flawed content in " << filenameInitialWeights << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
do { |
|
|
util::TokenIter<util::SingleCharacter> token(lineInitialWeight, ' '); |
|
|
boost::unordered_map<std::string, size_t>::const_iterator found = featureIndexes.find(token->as_string()); |
|
|
if ( found == featureIndexes.end() ) { |
|
|
err << "Error: flawed content in " << filenameInitialWeights << " (unkown feature name \"" << token->as_string() << "\")" << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
token++; |
|
|
sparseScalingFactor[found->second] = atof( token->as_string().c_str() ); |
|
|
} while ( ifsInitialWeights.ReadLineOrEOF(lineInitialWeight) ); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
ExpectedBleuOptimizer optimizer(err, |
|
|
learningRate, |
|
|
initialStepSize, |
|
|
decreaseRate, |
|
|
increaseRate, |
|
|
minStepSize, |
|
|
maxStepSize, |
|
|
floorAbsScalingFactor, |
|
|
regularizationParameter); |
|
|
|
|
|
if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP ) |
|
|
{ |
|
|
optimizer.InitRPROP(sparseScalingFactor); |
|
|
} else if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_SGD ) { |
|
|
optimizer.InitRPROP(sparseScalingFactor); |
|
|
} else { |
|
|
err << "Error: unknown optimizer type" << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
for (size_t nIteration=1; nIteration<=iterationLimit; ++nIteration) |
|
|
{ |
|
|
util::FilePiece ifsSBleu(filenameSBleu.c_str()); |
|
|
util::FilePiece ifsNBest(filenameNBestList.c_str()); |
|
|
|
|
|
out << "### ITERATION " << nIteration << '\n' << '\n'; |
|
|
|
|
|
size_t sentenceIndex = 0; |
|
|
size_t batchSize = 0; |
|
|
size_t nBestSizeCount = 0; |
|
|
size_t globalIndex = 0; |
|
|
StringPiece lineNBest; |
|
|
std::vector<double> overallScoreUntransformed; |
|
|
std::vector<float> sBleu; |
|
|
float xBleu = 0; |
|
|
|
|
|
|
|
|
while ( ifsNBest.ReadLineOrEOF(lineNBest) ) |
|
|
{ |
|
|
|
|
|
util::TokenIter<util::SingleCharacter> token(lineNBest, ' '); |
|
|
|
|
|
if ( token == token.end() ) |
|
|
{ |
|
|
err << "Error: flawed content in " << filenameNBestList << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
size_t sentenceIndexCurrent = atol( token->as_string().c_str() ); |
|
|
token++; |
|
|
|
|
|
if ( sentenceIndex != sentenceIndexCurrent ) |
|
|
{ |
|
|
|
|
|
if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP ) |
|
|
{ |
|
|
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore ); |
|
|
} else if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_SGD ) { |
|
|
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore, miniBatches ); |
|
|
|
|
|
if ( miniBatches ) { |
|
|
xBleu += optimizer.UpdateSGD( sparseScalingFactor, 1 ); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
} else { |
|
|
err << "Error: unknown optimizer type" << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
for (size_t i=0; i<nBestSizeCount; ++i) { |
|
|
sparseScore[i].clear(); |
|
|
} |
|
|
nBestSizeCount = 0; |
|
|
overallScoreUntransformed.clear(); |
|
|
sBleu.clear(); |
|
|
sentenceIndex = sentenceIndexCurrent; |
|
|
++batchSize; |
|
|
} |
|
|
|
|
|
StringPiece lineSBleu; |
|
|
if ( !ifsSBleu.ReadLineOrEOF(lineSBleu) ) |
|
|
{ |
|
|
err << "Error: insufficient number of lines in " << filenameSBleu << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
if ( nBestSizeCount < maxNBestSize ) |
|
|
{ |
|
|
|
|
|
|
|
|
float sBleuCurrent = atof( lineSBleu.as_string().c_str() ); |
|
|
sBleu.push_back(sBleuCurrent); |
|
|
|
|
|
|
|
|
|
|
|
if ( token == token.end() ) |
|
|
{ |
|
|
err << "Error: flawed content in " << filenameNBestList << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
double scoreCurrent = 0; |
|
|
if ( !ignoreDecoderScore ) |
|
|
{ |
|
|
scoreCurrent = atof( token->as_string().c_str() ); |
|
|
} |
|
|
token++; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (token != token.end()) |
|
|
{ |
|
|
size_t featureNameCurrent = atol( token->as_string().c_str() ); |
|
|
token++; |
|
|
float featureValueCurrent = atof( token->as_string().c_str() ); |
|
|
sparseScore[nBestSizeCount].insert(std::make_pair(featureNameCurrent, featureValueCurrent)); |
|
|
scoreCurrent += sparseScalingFactor[featureNameCurrent] * featureValueCurrent; |
|
|
token++; |
|
|
} |
|
|
|
|
|
|
|
|
overallScoreUntransformed.push_back( std::exp(scoreCurrent) ); |
|
|
|
|
|
++nBestSizeCount; |
|
|
} |
|
|
++globalIndex; |
|
|
} |
|
|
|
|
|
if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_RPROP ) |
|
|
{ |
|
|
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore ); |
|
|
xBleu = optimizer.UpdateRPROP( sparseScalingFactor, batchSize ); |
|
|
out << "xBLEU= " << xBleu << '\n'; |
|
|
} else if ( optimizerType == EXPECTED_BLEU_OPTIMIZER_TYPE_SGD ) { |
|
|
optimizer.AddTrainingInstance( nBestSizeCount, sBleu, overallScoreUntransformed, sparseScore, miniBatches ); |
|
|
if ( miniBatches ) { |
|
|
xBleu += optimizer.UpdateSGD( sparseScalingFactor, 1 ); |
|
|
xBleu /= batchSize; |
|
|
} else { |
|
|
xBleu = optimizer.UpdateSGD( sparseScalingFactor, batchSize ); |
|
|
} |
|
|
out << "xBLEU= " << xBleu << '\n'; |
|
|
} else { |
|
|
err << "Error: unknown optimizer type" << '\n'; |
|
|
err.flush(); |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
for (size_t i=0; i<nBestSizeCount; ++i) { |
|
|
sparseScore[i].clear(); |
|
|
} |
|
|
nBestSizeCount = 0; |
|
|
overallScoreUntransformed.clear(); |
|
|
sBleu.clear(); |
|
|
|
|
|
out << '\n'; |
|
|
|
|
|
for (size_t i=0; i<sparseScalingFactor.size(); ++i) |
|
|
{ |
|
|
if ( (sparseScalingFactor[i] != 0) || printZeroWeights ) |
|
|
{ |
|
|
out << "ITERATION " << nIteration << " WEIGHT " << featureNames[i] << " " << sparseScalingFactor[i] << '\n'; |
|
|
} |
|
|
} |
|
|
|
|
|
out << '\n'; |
|
|
out.flush(); |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|