|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <iostream> |
|
|
#include <sstream> |
|
|
#include <algorithm> |
|
|
#include <boost/foreach.hpp> |
|
|
#include "HypothesisColl.h" |
|
|
#include "ManagerBase.h" |
|
|
#include "System.h" |
|
|
#include "MemPoolAllocator.h" |
|
|
|
|
|
using namespace std; |
|
|
|
|
|
namespace Moses2 |
|
|
{ |
|
|
|
|
|
HypothesisColl::HypothesisColl(const ManagerBase &mgr) |
|
|
:m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool())) |
|
|
,m_sortedHypos(NULL) |
|
|
{ |
|
|
m_bestScore = -std::numeric_limits<float>::infinity(); |
|
|
m_worstScore = std::numeric_limits<float>::infinity(); |
|
|
} |
|
|
|
|
|
const HypothesisBase *HypothesisColl::GetBestHypo() const |
|
|
{ |
|
|
if (GetSize() == 0) { |
|
|
return NULL; |
|
|
} |
|
|
if (m_sortedHypos) { |
|
|
return (*m_sortedHypos)[0]; |
|
|
} |
|
|
|
|
|
SCORE bestScore = -std::numeric_limits<SCORE>::infinity(); |
|
|
const HypothesisBase *bestHypo; |
|
|
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { |
|
|
if (hypo->GetFutureScore() > bestScore) { |
|
|
bestScore = hypo->GetFutureScore(); |
|
|
bestHypo = hypo; |
|
|
} |
|
|
} |
|
|
return bestHypo; |
|
|
} |
|
|
|
|
|
void HypothesisColl::Add( |
|
|
const ManagerBase &mgr, |
|
|
HypothesisBase *hypo, |
|
|
Recycler<HypothesisBase*> &hypoRecycle, |
|
|
ArcLists &arcLists) |
|
|
{ |
|
|
size_t maxStackSize = mgr.system.options.search.stack_size; |
|
|
|
|
|
if (GetSize() > maxStackSize * 2) { |
|
|
|
|
|
PruneHypos(mgr, mgr.arcLists); |
|
|
} |
|
|
|
|
|
SCORE futureScore = hypo->GetFutureScore(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (GetSize() >= maxStackSize && futureScore < m_worstScore) { |
|
|
|
|
|
|
|
|
|
|
|
hypoRecycle.Recycle(hypo); |
|
|
return; |
|
|
} |
|
|
|
|
|
StackAdd added = Add(hypo); |
|
|
|
|
|
size_t nbestSize = mgr.system.options.nbest.nbest_size; |
|
|
if (nbestSize) { |
|
|
arcLists.AddArc(added.added, hypo, added.other); |
|
|
} else { |
|
|
if (added.added) { |
|
|
if (added.other) { |
|
|
hypoRecycle.Recycle(added.other); |
|
|
} |
|
|
} else { |
|
|
hypoRecycle.Recycle(hypo); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (added.added) { |
|
|
if (futureScore > m_bestScore) { |
|
|
m_bestScore = futureScore; |
|
|
float beamWidth = mgr.system.options.search.beam_width; |
|
|
if ( m_bestScore + beamWidth > m_worstScore ) { |
|
|
m_worstScore = m_bestScore + beamWidth; |
|
|
} |
|
|
} else if (GetSize() <= maxStackSize && futureScore < m_worstScore) { |
|
|
m_worstScore = futureScore; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
StackAdd HypothesisColl::Add(const HypothesisBase *hypo) |
|
|
{ |
|
|
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo); |
|
|
|
|
|
|
|
|
|
|
|
if (addRet.second) { |
|
|
|
|
|
|
|
|
return StackAdd(true, NULL); |
|
|
} else { |
|
|
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first); |
|
|
|
|
|
|
|
|
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const HypothesisBase * const &hypoExisting1 = *addRet.first; |
|
|
const HypothesisBase *&hypoExisting2 = |
|
|
const_cast<const HypothesisBase *&>(hypoExisting1); |
|
|
hypoExisting2 = hypo; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return StackAdd(true, hypoExisting); |
|
|
} else { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return StackAdd(false, hypoExisting); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos( |
|
|
const ManagerBase &mgr, |
|
|
ArcLists &arcLists) const |
|
|
{ |
|
|
if (m_sortedHypos == NULL) { |
|
|
|
|
|
MemPool &pool = mgr.GetPool(); |
|
|
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool, |
|
|
m_coll.size()); |
|
|
|
|
|
SortHypos(mgr, m_sortedHypos->GetArray()); |
|
|
|
|
|
|
|
|
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle(); |
|
|
|
|
|
size_t maxStackSize = mgr.system.options.search.stack_size; |
|
|
if (maxStackSize && m_sortedHypos->size() > maxStackSize) { |
|
|
for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) { |
|
|
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]); |
|
|
recycler.Recycle(hypo); |
|
|
|
|
|
|
|
|
if (mgr.system.options.nbest.nbest_size) { |
|
|
arcLists.Delete(hypo); |
|
|
} |
|
|
} |
|
|
m_sortedHypos->resize(maxStackSize); |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
return *m_sortedHypos; |
|
|
} |
|
|
|
|
|
void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists) |
|
|
{ |
|
|
size_t maxStackSize = mgr.system.options.search.stack_size; |
|
|
|
|
|
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle(); |
|
|
|
|
|
const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *)); |
|
|
SortHypos(mgr, sortedHypos); |
|
|
|
|
|
|
|
|
m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore(); |
|
|
|
|
|
|
|
|
for (size_t i = maxStackSize; i < GetSize(); ++i) { |
|
|
HypothesisBase *hypo = const_cast<HypothesisBase*>(sortedHypos[i]); |
|
|
|
|
|
|
|
|
if (mgr.system.options.nbest.nbest_size) { |
|
|
arcLists.Delete(hypo); |
|
|
} |
|
|
|
|
|
|
|
|
Delete(hypo); |
|
|
|
|
|
recycler.Recycle(hypo); |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const |
|
|
{ |
|
|
size_t maxStackSize = mgr.system.options.search.stack_size; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t ind = 0; |
|
|
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) { |
|
|
sortedHypos[ind] = hypo; |
|
|
++ind; |
|
|
} |
|
|
|
|
|
size_t indMiddle; |
|
|
if (maxStackSize == 0) { |
|
|
indMiddle = GetSize(); |
|
|
} else if (GetSize() > maxStackSize) { |
|
|
indMiddle = maxStackSize; |
|
|
} else { |
|
|
|
|
|
indMiddle = GetSize(); |
|
|
} |
|
|
|
|
|
const HypothesisBase **iterMiddle = sortedHypos + indMiddle; |
|
|
|
|
|
std::partial_sort( |
|
|
sortedHypos, |
|
|
iterMiddle, |
|
|
sortedHypos + GetSize(), |
|
|
HypothesisFutureScoreOrderer()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
void HypothesisColl::Delete(const HypothesisBase *hypo) |
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
size_t erased = m_coll.erase(hypo); |
|
|
UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo); |
|
|
} |
|
|
|
|
|
void HypothesisColl::Clear() |
|
|
{ |
|
|
m_sortedHypos = NULL; |
|
|
m_coll.clear(); |
|
|
|
|
|
m_bestScore = -std::numeric_limits<float>::infinity(); |
|
|
m_worstScore = std::numeric_limits<float>::infinity(); |
|
|
} |
|
|
|
|
|
std::string HypothesisColl::Debug(const System &system) const |
|
|
{ |
|
|
stringstream out; |
|
|
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) { |
|
|
out << hypo->Debug(system); |
|
|
out << std::endl << std::endl; |
|
|
} |
|
|
|
|
|
return out.str(); |
|
|
} |
|
|
|
|
|
} |
|
|
|