|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "LexicalReorderingTableCompact.h" |
|
|
#include "../../SubPhrase.h" |
|
|
#include "../../legacy/Util2.h" |
|
|
|
|
|
namespace Moses2 |
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
bool LexicalReorderingTableCompact::s_inMemoryByDefault = false; |
|
|
|
|
|
LexicalReorderingTableCompact::LexicalReorderingTableCompact( |
|
|
const std::string& filePath, const std::vector<FactorType>& f_factors, |
|
|
const std::vector<FactorType>& e_factors, |
|
|
const std::vector<FactorType>& c_factors) : |
|
|
LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory( |
|
|
s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees( |
|
|
true), m_hash(10, 16), m_scoreTrees(1) |
|
|
{ |
|
|
Load(filePath); |
|
|
} |
|
|
|
|
|
LexicalReorderingTableCompact::LexicalReorderingTableCompact( |
|
|
const std::vector<FactorType>& f_factors, |
|
|
const std::vector<FactorType>& e_factors, |
|
|
const std::vector<FactorType>& c_factors) : |
|
|
LexicalReorderingTable(f_factors, e_factors, c_factors), m_inMemory( |
|
|
s_inMemoryByDefault), m_numScoreComponent(6), m_multipleScoreTrees( |
|
|
true), m_hash(10, 16), m_scoreTrees(1) |
|
|
{ |
|
|
} |
|
|
|
|
|
LexicalReorderingTableCompact::~LexicalReorderingTableCompact() |
|
|
{ |
|
|
for (size_t i = 0; i < m_scoreTrees.size(); i++) |
|
|
delete m_scoreTrees[i]; |
|
|
} |
|
|
|
|
|
std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase<Moses2::Word>& f, |
|
|
const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) |
|
|
{ |
|
|
std::string key; |
|
|
std::vector<float> scores; |
|
|
|
|
|
if (0 == c.GetSize()) key = MakeKey(f, e, c); |
|
|
else { |
|
|
for (size_t i = 0; i <= c.GetSize(); ++i) { |
|
|
SubPhrase<Moses2::Word> sub_c = c.GetSubPhrase(i, c.GetSize() - i); |
|
|
key = MakeKey(f, e, sub_c); |
|
|
} |
|
|
} |
|
|
|
|
|
size_t index = m_hash[key]; |
|
|
if (m_hash.GetSize() != index) { |
|
|
std::string scoresString; |
|
|
if (m_inMemory) scoresString = m_scoresMemory[index].str(); |
|
|
else scoresString = m_scoresMapped[index].str(); |
|
|
|
|
|
BitWrapper<> bitStream(scoresString); |
|
|
for (size_t i = 0; i < m_numScoreComponent; i++) |
|
|
scores.push_back( |
|
|
m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream)); |
|
|
|
|
|
return scores; |
|
|
} |
|
|
|
|
|
return std::vector<float>(); |
|
|
} |
|
|
|
|
|
std::string LexicalReorderingTableCompact::MakeKey(const Phrase<Moses2::Word>& f, |
|
|
const Phrase<Moses2::Word>& e, const Phrase<Moses2::Word>& c) const |
|
|
{ |
|
|
return MakeKey(Trim(f.GetString(m_FactorsF)), Trim(e.GetString(m_FactorsE)), |
|
|
Trim(c.GetString(m_FactorsC))); |
|
|
} |
|
|
|
|
|
std::string LexicalReorderingTableCompact::MakeKey(const std::string& f, |
|
|
const std::string& e, const std::string& c) const |
|
|
{ |
|
|
std::string key; |
|
|
if (!f.empty()) key += f; |
|
|
if (!m_FactorsE.empty()) { |
|
|
if (!key.empty()) key += " ||| "; |
|
|
key += e; |
|
|
} |
|
|
if (!m_FactorsC.empty()) { |
|
|
if (!key.empty()) key += " ||| "; |
|
|
key += c; |
|
|
} |
|
|
key += " ||| "; |
|
|
return key; |
|
|
} |
|
|
|
|
|
LexicalReorderingTable* |
|
|
LexicalReorderingTableCompact::CheckAndLoad(const std::string& filePath, |
|
|
const std::vector<FactorType>& f_factors, |
|
|
const std::vector<FactorType>& e_factors, |
|
|
const std::vector<FactorType>& c_factors) |
|
|
{ |
|
|
#ifdef HAVE_CMPH |
|
|
std::string minlexr = ".minlexr"; |
|
|
|
|
|
if (FileExists(filePath + minlexr)) { |
|
|
|
|
|
std::cerr << "Using compact lexical reordering table" << std::endl; |
|
|
return new LexicalReorderingTableCompact(filePath + minlexr, f_factors, |
|
|
e_factors, c_factors); |
|
|
} |
|
|
|
|
|
if (filePath.substr(filePath.length() - minlexr.length(), minlexr.length()) |
|
|
== minlexr && FileExists(filePath)) { |
|
|
|
|
|
std::cerr << "Using compact lexical reordering table" << std::endl; |
|
|
return new LexicalReorderingTableCompact(filePath, f_factors, e_factors, |
|
|
c_factors); |
|
|
} |
|
|
#endif |
|
|
return 0; |
|
|
} |
|
|
|
|
|
void LexicalReorderingTableCompact::Load(std::string filePath) |
|
|
{ |
|
|
std::FILE* pFile = std::fopen(filePath.c_str(), "r"); |
|
|
UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened"); |
|
|
|
|
|
|
|
|
m_hash.Load(pFile); |
|
|
|
|
|
|
|
|
|
|
|
size_t read = 0; |
|
|
read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, |
|
|
pFile); |
|
|
read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, |
|
|
pFile); |
|
|
|
|
|
if (m_multipleScoreTrees) { |
|
|
m_scoreTrees.resize(m_numScoreComponent); |
|
|
for (size_t i = 0; i < m_numScoreComponent; i++) |
|
|
m_scoreTrees[i] = new CanonicalHuffman<float>(pFile); |
|
|
} else { |
|
|
m_scoreTrees.resize(1); |
|
|
m_scoreTrees[0] = new CanonicalHuffman<float>(pFile); |
|
|
} |
|
|
|
|
|
if (m_inMemory) m_scoresMemory.load(pFile, false); |
|
|
else m_scoresMapped.load(pFile, true); |
|
|
} |
|
|
|
|
|
} |
|
|
|