|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <string> |
|
|
#include <iostream> |
|
|
#include <boost/foreach.hpp> |
|
|
#include <boost/thread.hpp> |
|
|
#include <boost/thread/mutex.hpp> |
|
|
#include "System.h" |
|
|
#include "FF/FeatureFunction.h" |
|
|
#include "TranslationModel/UnknownWordPenalty.h" |
|
|
#include "legacy/Util2.h" |
|
|
#include "util/exception.hh" |
|
|
|
|
|
using namespace std; |
|
|
|
|
|
namespace Moses2 |
|
|
{ |
|
|
#ifndef WIN32 |
|
|
thread_local MemPool System::m_managerPool; |
|
|
thread_local MemPool System::m_systemPool; |
|
|
thread_local Recycler<HypothesisBase*> System::m_hypoRecycler; |
|
|
#endif |
|
|
|
|
|
System::System(const Parameter ¶msArg) : |
|
|
params(paramsArg), featureFunctions(*this) |
|
|
{ |
|
|
options.init(paramsArg); |
|
|
IsPb(); |
|
|
|
|
|
bestCollector.reset(new OutputCollector()); |
|
|
|
|
|
params.SetParameter(cpuAffinityOffset, "cpu-affinity-offset", -1); |
|
|
params.SetParameter(cpuAffinityOffsetIncr, "cpu-affinity-increment", 1); |
|
|
|
|
|
const PARAM_VEC *section; |
|
|
|
|
|
|
|
|
if (options.nbest.nbest_size && options.nbest.output_file_path != "-") { |
|
|
nbestCollector.reset(new OutputCollector(options.nbest.output_file_path)); |
|
|
} |
|
|
|
|
|
if (!options.output.detailed_transrep_filepath.empty()) { |
|
|
detailedTranslationCollector.reset(new OutputCollector(options.output.detailed_transrep_filepath)); |
|
|
} |
|
|
|
|
|
featureFunctions.Create(); |
|
|
LoadWeights(); |
|
|
|
|
|
if (params.GetParam("show-weights")) { |
|
|
cerr << "Showing weights then exit" << endl; |
|
|
featureFunctions.ShowWeights(weights); |
|
|
|
|
|
} |
|
|
|
|
|
cerr << "START featureFunctions.Load()" << endl; |
|
|
featureFunctions.Load(); |
|
|
cerr << "START LoadMappings()" << endl; |
|
|
LoadMappings(); |
|
|
cerr << "END LoadMappings()" << endl; |
|
|
LoadDecodeGraphBackoff(); |
|
|
cerr << "END LoadDecodeGraphBackoff()" << endl; |
|
|
|
|
|
UTIL_THROW_IF2(options.input.xml_policy == XmlConstraint, "XmlConstraint not supported"); |
|
|
|
|
|
|
|
|
if (!isPb) { |
|
|
section = params.GetParam("max-chart-span"); |
|
|
if (section && section->size()) { |
|
|
maxChartSpans = Scan<size_t>(*section); |
|
|
maxChartSpans.resize(mappings.size(), DEFAULT_MAX_CHART_SPAN); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
System::~System() |
|
|
{ |
|
|
} |
|
|
|
|
|
void System::LoadWeights() |
|
|
{ |
|
|
weights.Init(featureFunctions); |
|
|
|
|
|
|
|
|
typedef std::map<std::string, std::vector<float> > WeightMap; |
|
|
const WeightMap &allWeights = params.GetAllWeights(); |
|
|
|
|
|
|
|
|
const std::vector<FeatureFunction*> &ffs = featureFunctions.GetFeatureFunctions(); |
|
|
BOOST_FOREACH(const FeatureFunction *ff, ffs) { |
|
|
if (ff->IsTuneable()) { |
|
|
const std::string &ffName = ff->GetName(); |
|
|
WeightMap::const_iterator iterWeight = allWeights.find(ffName); |
|
|
UTIL_THROW_IF2(iterWeight == allWeights.end(), "Must specify weight for " << ffName); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
BOOST_FOREACH(const WeightMap::value_type &valPair, allWeights) { |
|
|
const string &ffName = valPair.first; |
|
|
const std::vector<float> &ffWeights = valPair.second; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
weights.SetWeights(featureFunctions, ffName, ffWeights); |
|
|
} |
|
|
} |
|
|
|
|
|
void System::LoadMappings() |
|
|
{ |
|
|
const PARAM_VEC *vec = params.GetParam("mapping"); |
|
|
UTIL_THROW_IF2(vec == NULL, "Must have [mapping] section"); |
|
|
|
|
|
BOOST_FOREACH(const std::string &line, *vec) { |
|
|
vector<string> toks = Tokenize(line); |
|
|
assert( (toks.size() == 2 && toks[0] == "T") || (toks.size() == 3 && toks[1] == "T") ); |
|
|
|
|
|
size_t ptInd; |
|
|
if (toks.size() == 2) { |
|
|
ptInd = Scan<size_t>(toks[1]); |
|
|
} else { |
|
|
ptInd = Scan<size_t>(toks[2]); |
|
|
} |
|
|
const PhraseTable *pt = featureFunctions.GetPhraseTableExcludeUnknownWordPenalty(ptInd); |
|
|
mappings.push_back(pt); |
|
|
} |
|
|
|
|
|
|
|
|
const UnknownWordPenalty *unkWP = featureFunctions.GetUnknownWordPenalty(); |
|
|
if (unkWP) { |
|
|
mappings.push_back(unkWP); |
|
|
} |
|
|
} |
|
|
|
|
|
void System::LoadDecodeGraphBackoff() |
|
|
{ |
|
|
const PARAM_VEC *vec = params.GetParam("decoding-graph-backoff"); |
|
|
|
|
|
for (size_t i = 0; i < mappings.size(); ++i) { |
|
|
PhraseTable *pt = const_cast<PhraseTable*>(mappings[i]); |
|
|
|
|
|
if (vec && vec->size() < i) { |
|
|
pt->decodeGraphBackoff = Scan<int>((*vec)[i]); |
|
|
} else if (pt == featureFunctions.GetUnknownWordPenalty()) { |
|
|
pt->decodeGraphBackoff = 1; |
|
|
} else { |
|
|
pt->decodeGraphBackoff = 0; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
void System::IsPb() |
|
|
{ |
|
|
switch (options.search.algo) { |
|
|
case Normal: |
|
|
case NormalBatch: |
|
|
case CubePruning: |
|
|
case CubePruningPerMiniStack: |
|
|
case CubePruningPerBitmap: |
|
|
case CubePruningCardinalStack: |
|
|
case CubePruningBitmapStack: |
|
|
case CubePruningMiniStack: |
|
|
isPb = true; |
|
|
break; |
|
|
case CYKPlus: |
|
|
isPb = false; |
|
|
break; |
|
|
default: |
|
|
throw std::runtime_error("Unknown search algorithm " + options.search.algo); |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
FactorCollection& System::GetVocab() const |
|
|
{ |
|
|
return m_vocab; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Batch& System::GetBatch(MemPool& pool) const |
|
|
{ |
|
|
Batch* obj; |
|
|
obj = m_batch.get(); |
|
|
if (obj == NULL) { |
|
|
obj = new Batch(pool); |
|
|
m_batch.reset(obj); |
|
|
} |
|
|
assert(obj); |
|
|
return *obj; |
|
|
} |
|
|
|
|
|
#ifdef WIN32 |
|
|
template<class C> |
|
|
C& GetThreadSpecificObj(boost::thread_specific_ptr<C> &threadSpecificPtr) |
|
|
{ |
|
|
C* obj; |
|
|
obj = threadSpecificPtr.get(); |
|
|
if (obj == NULL) { |
|
|
obj = new C(); |
|
|
threadSpecificPtr.reset(obj); |
|
|
} |
|
|
assert(obj); |
|
|
return *obj; |
|
|
} |
|
|
|
|
|
MemPool& System::GetManagerPool() const |
|
|
{ |
|
|
MemPool &obj = GetThreadSpecificObj<MemPool>(m_managerPool); |
|
|
return obj; |
|
|
} |
|
|
|
|
|
MemPool& System::GetSystemPool() const |
|
|
{ |
|
|
MemPool& obj = GetThreadSpecificObj<MemPool>(m_systemPool); |
|
|
return obj; |
|
|
} |
|
|
|
|
|
Recycler<HypothesisBase*>& System::GetHypoRecycler() const |
|
|
{ |
|
|
Recycler<HypothesisBase*>& obj = GetThreadSpecificObj<Recycler<HypothesisBase*> >(m_hypoRecycler); |
|
|
return obj; |
|
|
} |
|
|
|
|
|
#else |
|
|
MemPool& System::GetManagerPool() const |
|
|
{ |
|
|
return m_managerPool; |
|
|
} |
|
|
|
|
|
MemPool& System::GetSystemPool() const |
|
|
{ |
|
|
return m_systemPool; |
|
|
} |
|
|
|
|
|
Recycler<HypothesisBase*>& System::GetHypoRecycler() const |
|
|
{ |
|
|
return m_hypoRecycler; |
|
|
} |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|