Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- mosesdecoder/moses/AlignmentInfoCollection.cpp +60 -0
- mosesdecoder/moses/AlignmentInfoCollection.h +81 -0
- mosesdecoder/moses/AlignmentInfoTest.cpp +71 -0
- mosesdecoder/moses/BaseManager.cpp +160 -0
- mosesdecoder/moses/Bitmaps.cpp +58 -0
- mosesdecoder/moses/ChartCell.h +128 -0
- mosesdecoder/moses/ChartCellLabel.h +89 -0
- mosesdecoder/moses/ChartHypothesis.h +204 -0
- mosesdecoder/moses/ChartKBestExtractor.cpp +332 -0
- mosesdecoder/moses/ChartKBestExtractor.h +132 -0
- mosesdecoder/moses/ChartParser.cpp +313 -0
- mosesdecoder/moses/ChartRuleLookupManager.cpp +9 -0
- mosesdecoder/moses/ChartRuleLookupManager.h +84 -0
- mosesdecoder/moses/ChartTranslationOptionList.cpp +219 -0
- mosesdecoder/moses/ChartTranslationOptions.h +104 -0
- mosesdecoder/moses/ConfusionNet.cpp +294 -0
- mosesdecoder/moses/ContextScope.h +124 -0
- mosesdecoder/moses/DecodeGraph.cpp +43 -0
- mosesdecoder/moses/DecodeStep.cpp +90 -0
- mosesdecoder/moses/DecodeStepGeneration.h +54 -0
- mosesdecoder/moses/DecodeStepTranslation.cpp +280 -0
- mosesdecoder/moses/DecodeStepTranslation.h +89 -0
- mosesdecoder/moses/Factor.cpp +48 -0
- mosesdecoder/moses/ForestInput.h +88 -0
- mosesdecoder/moses/GenerationDictionary.h +83 -0
- mosesdecoder/moses/HypothesisStackCubePruning.cpp +313 -0
- mosesdecoder/moses/HypothesisStackCubePruning.h +153 -0
- mosesdecoder/moses/Incremental.h +124 -0
- mosesdecoder/moses/Jamfile +143 -0
- mosesdecoder/moses/LVoc.cpp +7 -0
- mosesdecoder/moses/LVoc.h +93 -0
- mosesdecoder/moses/Manager.cpp +2016 -0
- mosesdecoder/moses/MockHypothesis.h +97 -0
- mosesdecoder/moses/OutputFileStream.h +81 -0
- mosesdecoder/moses/PCNTools.h +67 -0
- mosesdecoder/moses/PDTAimp.cpp +476 -0
- mosesdecoder/moses/Parameter.cpp +1690 -0
- mosesdecoder/moses/Parameter.h +173 -0
- mosesdecoder/moses/Phrase.h +244 -0
- mosesdecoder/moses/PrefixTree.h +339 -0
- mosesdecoder/moses/Range.h +107 -0
- mosesdecoder/moses/ReorderingConstraint.cpp +260 -0
- mosesdecoder/moses/ReorderingConstraint.h +113 -0
- mosesdecoder/moses/ScoreComponentCollectionTest.cpp +184 -0
- mosesdecoder/moses/Search.cpp +50 -0
- mosesdecoder/moses/Search.h +57 -0
- mosesdecoder/moses/SearchCubePruning.h +48 -0
- mosesdecoder/moses/SearchNormal.cpp +423 -0
- mosesdecoder/moses/SquareMatrix.cpp +127 -0
- mosesdecoder/moses/StaticData.cpp +966 -0
mosesdecoder/moses/AlignmentInfoCollection.cpp
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "AlignmentInfoCollection.h"
|
| 21 |
+
|
| 22 |
+
namespace Moses
|
| 23 |
+
{
|
| 24 |
+
|
| 25 |
+
AlignmentInfoCollection AlignmentInfoCollection::s_instance;
|
| 26 |
+
|
| 27 |
+
AlignmentInfoCollection::AlignmentInfoCollection()
|
| 28 |
+
{
|
| 29 |
+
std::set<std::pair<size_t,size_t> > pairs;
|
| 30 |
+
m_emptyAlignmentInfo = Add(pairs);
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
AlignmentInfoCollection::~AlignmentInfoCollection()
|
| 34 |
+
{}
|
| 35 |
+
|
| 36 |
+
const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
|
| 37 |
+
{
|
| 38 |
+
return *m_emptyAlignmentInfo;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
AlignmentInfo const *
|
| 42 |
+
AlignmentInfoCollection::
|
| 43 |
+
Add(AlignmentInfo const& ainfo)
|
| 44 |
+
{
|
| 45 |
+
#ifdef WITH_THREADS
|
| 46 |
+
{
|
| 47 |
+
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
| 48 |
+
AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
|
| 49 |
+
if (i != m_collection.end())
|
| 50 |
+
return &*i;
|
| 51 |
+
}
|
| 52 |
+
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
| 53 |
+
#endif
|
| 54 |
+
std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
|
| 55 |
+
return &(*ret.first);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
}
|
mosesdecoder/moses/AlignmentInfoCollection.h
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "AlignmentInfo.h"
|
| 23 |
+
|
| 24 |
+
#include <set>
|
| 25 |
+
|
| 26 |
+
#ifdef WITH_THREADS
|
| 27 |
+
#include <boost/thread/shared_mutex.hpp>
|
| 28 |
+
#include <boost/thread/locks.hpp>
|
| 29 |
+
#endif
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
/** Singleton collection of all AlignmentInfo objects.
|
| 35 |
+
* Used as a cache of all alignment info to save space.
|
| 36 |
+
*/
|
| 37 |
+
class AlignmentInfoCollection
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
static AlignmentInfoCollection &Instance() {
|
| 41 |
+
return s_instance;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
/** Returns a pointer to an AlignmentInfo object with the same source-target
|
| 45 |
+
* alignment pairs as given in the argument. If the collection already
|
| 46 |
+
* contains such an object then returns a pointer to it; otherwise a new
|
| 47 |
+
* one is inserted.
|
| 48 |
+
*/
|
| 49 |
+
private:
|
| 50 |
+
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
|
| 51 |
+
|
| 52 |
+
public:
|
| 53 |
+
template<typename ALNREP>
|
| 54 |
+
AlignmentInfo const *
|
| 55 |
+
Add(ALNREP const & aln) {
|
| 56 |
+
return this->Add(AlignmentInfo(aln));
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
//! Returns a pointer to an empty AlignmentInfo object.
|
| 60 |
+
const AlignmentInfo &GetEmptyAlignmentInfo() const;
|
| 61 |
+
|
| 62 |
+
private:
|
| 63 |
+
typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
//! Only a single static variable should be created.
|
| 67 |
+
AlignmentInfoCollection();
|
| 68 |
+
~AlignmentInfoCollection();
|
| 69 |
+
|
| 70 |
+
static AlignmentInfoCollection s_instance;
|
| 71 |
+
|
| 72 |
+
#ifdef WITH_THREADS
|
| 73 |
+
//reader-writer lock
|
| 74 |
+
mutable boost::shared_mutex m_accessLock;
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
+
AlignmentInfoSet m_collection;
|
| 78 |
+
const AlignmentInfo *m_emptyAlignmentInfo;
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
}
|
mosesdecoder/moses/AlignmentInfoTest.cpp
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010- University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include <boost/test/unit_test.hpp>
|
| 21 |
+
|
| 22 |
+
#include "AlignmentInfo.h"
|
| 23 |
+
#include "AlignmentInfoCollection.h"
|
| 24 |
+
|
| 25 |
+
using namespace Moses;
|
| 26 |
+
using namespace std;
|
| 27 |
+
|
| 28 |
+
BOOST_AUTO_TEST_SUITE(alignment_info)
|
| 29 |
+
|
| 30 |
+
typedef pair<size_t,size_t> IndexPair;
|
| 31 |
+
typedef set<pair<size_t,size_t> > IndexSet;
|
| 32 |
+
|
| 33 |
+
struct AlignmentInfoFixture {
|
| 34 |
+
const AlignmentInfo* ai1;
|
| 35 |
+
const AlignmentInfo* ai2;
|
| 36 |
+
const AlignmentInfo* ai3;
|
| 37 |
+
|
| 38 |
+
AlignmentInfoFixture() {
|
| 39 |
+
AlignmentInfoCollection& collection = AlignmentInfoCollection::Instance();
|
| 40 |
+
IndexSet aligns1,aligns2,aligns3;
|
| 41 |
+
aligns1.insert(IndexPair(1,1));
|
| 42 |
+
aligns1.insert(IndexPair(2,1));
|
| 43 |
+
aligns2.insert(IndexPair(1,1));
|
| 44 |
+
aligns2.insert(IndexPair(2,1));
|
| 45 |
+
aligns3.insert(IndexPair(1,2));
|
| 46 |
+
aligns3.insert(IndexPair(2,1));
|
| 47 |
+
ai1 = collection.Add(aligns1);
|
| 48 |
+
ai2 = collection.Add(aligns2);
|
| 49 |
+
ai3 = collection.Add(aligns3);
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
BOOST_FIXTURE_TEST_CASE(comparator, AlignmentInfoFixture)
|
| 55 |
+
{
|
| 56 |
+
BOOST_CHECK(*ai1 == *ai2);
|
| 57 |
+
BOOST_CHECK(*ai1 == *ai1);
|
| 58 |
+
BOOST_CHECK(*ai2 == *ai2);
|
| 59 |
+
BOOST_CHECK(*ai3 == *ai3);
|
| 60 |
+
BOOST_CHECK(!(*ai2 == *ai3));
|
| 61 |
+
BOOST_CHECK(!(*ai1 == *ai3));
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
BOOST_FIXTURE_TEST_CASE(hasher, AlignmentInfoFixture)
|
| 65 |
+
{
|
| 66 |
+
//simple test that same objects give same hash
|
| 67 |
+
AlignmentInfoHasher hash;
|
| 68 |
+
BOOST_CHECK_EQUAL(hash(*ai1), hash(*ai2));
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
BOOST_AUTO_TEST_SUITE_END()
|
mosesdecoder/moses/BaseManager.cpp
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "BaseManager.h"
|
| 2 |
+
#include "StaticData.h"
|
| 3 |
+
#include "moses/FF/StatelessFeatureFunction.h"
|
| 4 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 5 |
+
#include "moses/TranslationTask.h"
|
| 6 |
+
|
| 7 |
+
#include <vector>
|
| 8 |
+
#include <boost/algorithm/string/predicate.hpp>
|
| 9 |
+
#include <boost/iostreams/device/file.hpp>
|
| 10 |
+
#include <boost/iostreams/filter/bzip2.hpp>
|
| 11 |
+
#include <boost/iostreams/filter/gzip.hpp>
|
| 12 |
+
#include <boost/iostreams/filtering_stream.hpp>
|
| 13 |
+
#include <boost/filesystem.hpp>
|
| 14 |
+
|
| 15 |
+
using namespace std;
|
| 16 |
+
|
| 17 |
+
namespace Moses
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
BaseManager::BaseManager(ttasksptr const& ttask)
|
| 21 |
+
: m_ttask(ttask), m_source(*(ttask->GetSource().get()))
|
| 22 |
+
{ }
|
| 23 |
+
|
| 24 |
+
const InputType&
|
| 25 |
+
BaseManager::GetSource() const
|
| 26 |
+
{
|
| 27 |
+
return m_source;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
const ttasksptr
|
| 31 |
+
BaseManager::GetTtask() const
|
| 32 |
+
{
|
| 33 |
+
return m_ttask.lock();
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
void
|
| 37 |
+
BaseManager::
|
| 38 |
+
OutputSearchGraphAsHypergraph(std::ostream& out) const
|
| 39 |
+
{
|
| 40 |
+
// This virtual function that may not be implemented everywhere, but it should for
|
| 41 |
+
// derived classes that use it
|
| 42 |
+
UTIL_THROW2("Not implemented.");
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
void
|
| 46 |
+
BaseManager::
|
| 47 |
+
OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const
|
| 48 |
+
{
|
| 49 |
+
std::string odir = boost::filesystem::path(fname).parent_path().string();
|
| 50 |
+
if (! boost::filesystem::exists(odir))
|
| 51 |
+
boost::filesystem::create_directory(odir);
|
| 52 |
+
UTIL_THROW_IF2(!boost::filesystem::is_directory(odir),
|
| 53 |
+
"Cannot output hypergraphs to " << odir
|
| 54 |
+
<< " because that path exists but is not a directory.");
|
| 55 |
+
|
| 56 |
+
// not clear why we need to output the weights every time we dump a search
|
| 57 |
+
// graph into a file again, but that's what the old code did.
|
| 58 |
+
|
| 59 |
+
string weightsFile = odir + "/weights";
|
| 60 |
+
TRACE_ERR("The weights file is " << weightsFile << "\n");
|
| 61 |
+
ofstream weightsOut;
|
| 62 |
+
weightsOut.open(weightsFile.c_str());
|
| 63 |
+
weightsOut.setf(std::ios::fixed);
|
| 64 |
+
weightsOut.precision(6);
|
| 65 |
+
// just temporarily, till we've implemented weight scoring in the manager
|
| 66 |
+
// (or the translation task)
|
| 67 |
+
StaticData::Instance().GetAllWeights().Save(weightsOut);
|
| 68 |
+
weightsOut.close();
|
| 69 |
+
|
| 70 |
+
boost::iostreams::filtering_ostream file;
|
| 71 |
+
if (boost::ends_with(fname, ".gz"))
|
| 72 |
+
file.push(boost::iostreams::gzip_compressor());
|
| 73 |
+
else if (boost::ends_with(fname, ".bz2"))
|
| 74 |
+
file.push( boost::iostreams::bzip2_compressor() );
|
| 75 |
+
file.push( boost::iostreams::file_sink(fname, ios_base::out) );
|
| 76 |
+
if (file.is_complete() && file.good()) {
|
| 77 |
+
file.setf(std::ios::fixed);
|
| 78 |
+
file.precision(precision);
|
| 79 |
+
this->OutputSearchGraphAsHypergraph(file);
|
| 80 |
+
file.flush();
|
| 81 |
+
} else {
|
| 82 |
+
TRACE_ERR("Cannot output hypergraph for line "
|
| 83 |
+
<< this->GetSource().GetTranslationId()
|
| 84 |
+
<< " because the output file " << fname
|
| 85 |
+
<< " is not open or not ready for writing"
|
| 86 |
+
<< std::endl);
|
| 87 |
+
}
|
| 88 |
+
file.pop();
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
/***
|
| 95 |
+
* print surface factor only for the given phrase
|
| 96 |
+
*/
|
| 97 |
+
void
|
| 98 |
+
BaseManager::
|
| 99 |
+
OutputSurface(std::ostream &out, Phrase const& phrase) const
|
| 100 |
+
{
|
| 101 |
+
std::vector<FactorType> const& factor_order = options()->output.factor_order;
|
| 102 |
+
|
| 103 |
+
bool markUnknown = options()->unk.mark;
|
| 104 |
+
std::string const& fd = options()->output.factor_delimiter;
|
| 105 |
+
|
| 106 |
+
size_t size = phrase.GetSize();
|
| 107 |
+
for (size_t pos = 0 ; pos < size ; pos++) {
|
| 108 |
+
const Factor *factor = phrase.GetFactor(pos, factor_order[0]);
|
| 109 |
+
UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos);
|
| 110 |
+
|
| 111 |
+
const Word &word = phrase.GetWord(pos);
|
| 112 |
+
if(markUnknown && word.IsOOV()) {
|
| 113 |
+
out << options()->unk.prefix;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
out << *factor;
|
| 117 |
+
|
| 118 |
+
for (size_t i = 1 ; i < factor_order.size() ; i++) {
|
| 119 |
+
const Factor *factor = phrase.GetFactor(pos, factor_order[i]);
|
| 120 |
+
UTIL_THROW_IF2(!factor, "Empty factor " << i << " at position " << pos);
|
| 121 |
+
out << fd << *factor;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
if(markUnknown && word.IsOOV()) {
|
| 125 |
+
out << options()->unk.suffix;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
out << " ";
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
// Emulates the old operator<<(ostream &, const DottedRule &) function. The
|
| 133 |
+
// output format is a bit odd (reverse order and double spacing between symbols)
|
| 134 |
+
// but there are scripts and tools that expect the output of -T to look like
|
| 135 |
+
// that.
|
| 136 |
+
void BaseManager::WriteApplicationContext(std::ostream &out,
|
| 137 |
+
const ApplicationContext &context) const
|
| 138 |
+
{
|
| 139 |
+
assert(!context.empty());
|
| 140 |
+
ApplicationContext::const_reverse_iterator p = context.rbegin();
|
| 141 |
+
while (true) {
|
| 142 |
+
out << p->second << "=" << p->first << " ";
|
| 143 |
+
if (++p == context.rend()) {
|
| 144 |
+
break;
|
| 145 |
+
}
|
| 146 |
+
out << " ";
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
AllOptions::ptr const&
|
| 151 |
+
BaseManager::
|
| 152 |
+
options() const
|
| 153 |
+
{
|
| 154 |
+
return GetTtask()->options();
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
} // namespace
|
| 159 |
+
|
| 160 |
+
|
mosesdecoder/moses/Bitmaps.cpp
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <boost/foreach.hpp>
|
| 2 |
+
#include "Bitmaps.h"
|
| 3 |
+
#include "Util.h"
|
| 4 |
+
|
| 5 |
+
using namespace std;
|
| 6 |
+
|
| 7 |
+
namespace Moses
|
| 8 |
+
{
|
| 9 |
+
Bitmaps::Bitmaps(size_t inputSize, const std::vector<bool> &initSourceCompleted)
|
| 10 |
+
{
|
| 11 |
+
m_initBitmap = new Bitmap(inputSize, initSourceCompleted);
|
| 12 |
+
m_coll[m_initBitmap];
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
Bitmaps::~Bitmaps()
|
| 16 |
+
{
|
| 17 |
+
BOOST_FOREACH (const Coll::value_type& myPair, m_coll) {
|
| 18 |
+
const Bitmap *bm = myPair.first;
|
| 19 |
+
delete bm;
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range)
|
| 24 |
+
{
|
| 25 |
+
Bitmap *newBM = new Bitmap(bm, range);
|
| 26 |
+
|
| 27 |
+
Coll::const_iterator iter = m_coll.find(newBM);
|
| 28 |
+
if (iter == m_coll.end()) {
|
| 29 |
+
m_coll[newBM] = NextBitmaps();
|
| 30 |
+
return *newBM;
|
| 31 |
+
} else {
|
| 32 |
+
delete newBM;
|
| 33 |
+
return *iter->first;
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
const Bitmap &Bitmaps::GetBitmap(const Bitmap &bm, const Range &range)
|
| 38 |
+
{
|
| 39 |
+
Coll::iterator iter = m_coll.find(&bm);
|
| 40 |
+
assert(iter != m_coll.end());
|
| 41 |
+
|
| 42 |
+
const Bitmap *newBM;
|
| 43 |
+
NextBitmaps &next = iter->second;
|
| 44 |
+
NextBitmaps::const_iterator iterNext = next.find(range);
|
| 45 |
+
if (iterNext == next.end()) {
|
| 46 |
+
// not seen the link yet.
|
| 47 |
+
newBM = &GetNextBitmap(bm, range);
|
| 48 |
+
next[range] = newBM;
|
| 49 |
+
} else {
|
| 50 |
+
// link exist
|
| 51 |
+
//std::cerr << "link exists" << endl;
|
| 52 |
+
newBM = iterNext->second;
|
| 53 |
+
}
|
| 54 |
+
return *newBM;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
}
|
| 58 |
+
|
mosesdecoder/moses/ChartCell.h
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include <queue>
|
| 26 |
+
#include <map>
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include "Word.h"
|
| 29 |
+
#include "Range.h"
|
| 30 |
+
#include "NonTerminal.h"
|
| 31 |
+
#include "ChartHypothesis.h"
|
| 32 |
+
#include "ChartHypothesisCollection.h"
|
| 33 |
+
#include "RuleCube.h"
|
| 34 |
+
#include "ChartCellLabelSet.h"
|
| 35 |
+
|
| 36 |
+
#include <boost/scoped_ptr.hpp>
|
| 37 |
+
#include <boost/functional/hash.hpp>
|
| 38 |
+
#include <boost/unordered_map.hpp>
|
| 39 |
+
#include <boost/version.hpp>
|
| 40 |
+
|
| 41 |
+
namespace Moses
|
| 42 |
+
{
|
| 43 |
+
class ChartSearchGraphWriter;
|
| 44 |
+
class ChartTranslationOptionList;
|
| 45 |
+
class ChartCellCollection;
|
| 46 |
+
class ChartManager;
|
| 47 |
+
|
| 48 |
+
class ChartCellBase
|
| 49 |
+
{
|
| 50 |
+
public:
|
| 51 |
+
ChartCellBase(size_t startPos, size_t endPos);
|
| 52 |
+
|
| 53 |
+
virtual ~ChartCellBase();
|
| 54 |
+
|
| 55 |
+
const ChartCellLabelSet &GetTargetLabelSet() const {
|
| 56 |
+
return m_targetLabelSet;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
ChartCellLabelSet &MutableTargetLabelSet() {
|
| 60 |
+
return m_targetLabelSet;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
const Range &GetCoverage() const {
|
| 64 |
+
return m_coverage;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
protected:
|
| 68 |
+
const Range m_coverage;
|
| 69 |
+
ChartCellLabelSet m_targetLabelSet;
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
/** 1 cell in chart decoder.
|
| 73 |
+
* Doesn't directly hold hypotheses. Each cell contain a map of ChartHypothesisCollection that have different constituent labels
|
| 74 |
+
*/
|
| 75 |
+
class ChartCell : public ChartCellBase
|
| 76 |
+
{
|
| 77 |
+
friend std::ostream& operator<<(std::ostream&, const ChartCell&);
|
| 78 |
+
public:
|
| 79 |
+
#if defined(BOOST_VERSION) && (BOOST_VERSION >= 104200)
|
| 80 |
+
typedef boost::unordered_map<Word,
|
| 81 |
+
ChartHypothesisCollection,
|
| 82 |
+
NonTerminalHasher,
|
| 83 |
+
NonTerminalEqualityPred
|
| 84 |
+
> MapType;
|
| 85 |
+
#else
|
| 86 |
+
typedef std::map<Word, ChartHypothesisCollection> MapType;
|
| 87 |
+
#endif
|
| 88 |
+
|
| 89 |
+
protected:
|
| 90 |
+
MapType m_hypoColl;
|
| 91 |
+
|
| 92 |
+
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
|
| 93 |
+
ChartManager &m_manager;
|
| 94 |
+
|
| 95 |
+
public:
|
| 96 |
+
ChartCell(size_t startPos, size_t endPos, ChartManager &manager);
|
| 97 |
+
~ChartCell();
|
| 98 |
+
|
| 99 |
+
void Decode(const ChartTranslationOptionList &transOptList
|
| 100 |
+
,const ChartCellCollection &allChartCells);
|
| 101 |
+
|
| 102 |
+
//! Get all hypotheses in the cell that have the specified constituent label
|
| 103 |
+
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {
|
| 104 |
+
MapType::const_iterator p = m_hypoColl.find(constituentLabel);
|
| 105 |
+
return (p == m_hypoColl.end()) ? NULL : &(p->second.GetSortedHypotheses());
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
//! for n-best list
|
| 109 |
+
const HypoList *GetAllSortedHypotheses() const;
|
| 110 |
+
|
| 111 |
+
bool AddHypothesis(ChartHypothesis *hypo);
|
| 112 |
+
|
| 113 |
+
void SortHypotheses();
|
| 114 |
+
void PruneToSize();
|
| 115 |
+
|
| 116 |
+
const ChartHypothesis *GetBestHypothesis() const;
|
| 117 |
+
|
| 118 |
+
void CleanupArcList();
|
| 119 |
+
|
| 120 |
+
void OutputSizes(std::ostream &out) const;
|
| 121 |
+
size_t GetSize() const;
|
| 122 |
+
|
| 123 |
+
void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
|
| 124 |
+
|
| 125 |
+
};
|
| 126 |
+
|
| 127 |
+
}
|
| 128 |
+
|
mosesdecoder/moses/ChartCellLabel.h
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "HypoList.h"
|
| 23 |
+
#include "Word.h"
|
| 24 |
+
#include "Range.h"
|
| 25 |
+
#include "ChartParserCallback.h"
|
| 26 |
+
|
| 27 |
+
namespace search
|
| 28 |
+
{
|
| 29 |
+
class Vertex;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
class Word;
|
| 36 |
+
|
| 37 |
+
/** Contains a range, word (non-terms?) and a vector of hypotheses.
|
| 38 |
+
* @todo This is probably incompatible with lattice decoding when the word that spans
|
| 39 |
+
* a position (or positions) can vary.
|
| 40 |
+
* @todo is this to hold sorted hypotheses that are in the queue for creating the next hypos?
|
| 41 |
+
*/
|
| 42 |
+
class ChartCellLabel
|
| 43 |
+
{
|
| 44 |
+
public:
|
| 45 |
+
union Stack {
|
| 46 |
+
const HypoList *cube; // cube pruning
|
| 47 |
+
search::Vertex *incr; // incremental search after filling.
|
| 48 |
+
void *incr_generator; // incremental search during filling.
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
ChartCellLabel(const Range &coverage, const Word &label,
|
| 53 |
+
Stack stack=Stack())
|
| 54 |
+
: m_coverage(coverage)
|
| 55 |
+
, m_label(label)
|
| 56 |
+
, m_stack(stack)
|
| 57 |
+
, m_bestScore(0) {
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const Range &GetCoverage() const {
|
| 61 |
+
return m_coverage;
|
| 62 |
+
}
|
| 63 |
+
const Word &GetLabel() const {
|
| 64 |
+
return m_label;
|
| 65 |
+
}
|
| 66 |
+
Stack GetStack() const {
|
| 67 |
+
return m_stack;
|
| 68 |
+
}
|
| 69 |
+
Stack &MutableStack() {
|
| 70 |
+
return m_stack;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
//caching of best score on stack
|
| 74 |
+
float GetBestScore(const ChartParserCallback *outColl) const {
|
| 75 |
+
if (m_bestScore == 0) {
|
| 76 |
+
m_bestScore = outColl->GetBestScore(this);
|
| 77 |
+
}
|
| 78 |
+
return m_bestScore;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
private:
|
| 82 |
+
const Range &m_coverage;
|
| 83 |
+
const Word &m_label;
|
| 84 |
+
//const InputPath &m_inputPath;
|
| 85 |
+
Stack m_stack;
|
| 86 |
+
mutable float m_bestScore;
|
| 87 |
+
};
|
| 88 |
+
|
| 89 |
+
}
|
mosesdecoder/moses/ChartHypothesis.h
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// vim:tabstop=2
|
| 2 |
+
/***********************************************************************
|
| 3 |
+
Moses - factored phrase-based language decoder
|
| 4 |
+
Copyright (C) 2010 Hieu Hoang
|
| 5 |
+
|
| 6 |
+
This library is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU Lesser General Public
|
| 8 |
+
License as published by the Free Software Foundation; either
|
| 9 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This library is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 14 |
+
Lesser General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU Lesser General Public
|
| 17 |
+
License along with this library; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 19 |
+
***********************************************************************/
|
| 20 |
+
|
| 21 |
+
#pragma once
|
| 22 |
+
|
| 23 |
+
#include <vector>
|
| 24 |
+
#include <boost/scoped_ptr.hpp>
|
| 25 |
+
#include "Util.h"
|
| 26 |
+
#include "Range.h"
|
| 27 |
+
#include "ScoreComponentCollection.h"
|
| 28 |
+
#include "Phrase.h"
|
| 29 |
+
#include "ChartTranslationOptions.h"
|
| 30 |
+
#include "ObjectPool.h"
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
class ChartKBestExtractor;
|
| 36 |
+
class ChartHypothesis;
|
| 37 |
+
class ChartManager;
|
| 38 |
+
class RuleCubeItem;
|
| 39 |
+
class FFState;
|
| 40 |
+
|
| 41 |
+
typedef std::vector<ChartHypothesis*> ChartArcList;
|
| 42 |
+
|
| 43 |
+
/** a hypothesis in the hierarchical/syntax decoder.
|
| 44 |
+
* Contain a pointer to the current target phrase, a vector of previous hypos, and some scores
|
| 45 |
+
*/
|
| 46 |
+
class ChartHypothesis
|
| 47 |
+
{
|
| 48 |
+
friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
|
| 49 |
+
// friend class ChartKBestExtractor;
|
| 50 |
+
|
| 51 |
+
protected:
|
| 52 |
+
|
| 53 |
+
boost::shared_ptr<ChartTranslationOption> m_transOpt;
|
| 54 |
+
|
| 55 |
+
Range m_currSourceWordsRange;
|
| 56 |
+
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
|
| 57 |
+
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
|
| 58 |
+
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
|
| 59 |
+
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
|
| 60 |
+
ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
|
| 61 |
+
,m_lmNGram
|
| 62 |
+
,m_lmPrefix;
|
| 63 |
+
float m_totalScore;
|
| 64 |
+
|
| 65 |
+
ChartArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
|
| 66 |
+
const ChartHypothesis *m_winningHypo;
|
| 67 |
+
|
| 68 |
+
std::vector<const ChartHypothesis*> m_prevHypos; // always sorted by source position?
|
| 69 |
+
|
| 70 |
+
ChartManager& m_manager;
|
| 71 |
+
|
| 72 |
+
unsigned m_id; /* pkoehn wants to log the order in which hypotheses were generated */
|
| 73 |
+
|
| 74 |
+
//! not implemented
|
| 75 |
+
ChartHypothesis();
|
| 76 |
+
|
| 77 |
+
//! not implemented
|
| 78 |
+
ChartHypothesis(const ChartHypothesis ©);
|
| 79 |
+
|
| 80 |
+
public:
|
| 81 |
+
ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
|
| 82 |
+
ChartManager &manager);
|
| 83 |
+
|
| 84 |
+
//! only used by ChartKBestExtractor
|
| 85 |
+
ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
|
| 86 |
+
|
| 87 |
+
~ChartHypothesis();
|
| 88 |
+
|
| 89 |
+
unsigned GetId() const {
|
| 90 |
+
return m_id;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
const ChartTranslationOption &GetTranslationOption() const {
|
| 94 |
+
return *m_transOpt;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
//! Get the rule that created this hypothesis
|
| 98 |
+
const TargetPhrase &GetCurrTargetPhrase() const {
|
| 99 |
+
return m_transOpt->GetPhrase();
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
//! the source range that this hypothesis spans
|
| 103 |
+
const Range &GetCurrSourceRange() const {
|
| 104 |
+
return m_currSourceWordsRange;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
//! the arc list when creating n-best lists
|
| 108 |
+
inline const ChartArcList* GetArcList() const {
|
| 109 |
+
return m_arcList;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
//! the feature function states for a particular feature \param featureID
|
| 113 |
+
inline const FFState* GetFFState( size_t featureID ) const {
|
| 114 |
+
return m_ffStates[ featureID ];
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
//! reference back to the manager
|
| 118 |
+
inline const ChartManager& GetManager() const {
|
| 119 |
+
return m_manager;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
void GetOutputPhrase(Phrase &outPhrase) const;
|
| 123 |
+
Phrase GetOutputPhrase() const;
|
| 124 |
+
|
| 125 |
+
// get leftmost/rightmost words only
|
| 126 |
+
// leftRightMost: 1=left, 2=right
|
| 127 |
+
void GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const;
|
| 128 |
+
|
| 129 |
+
void EvaluateWhenApplied();
|
| 130 |
+
|
| 131 |
+
void AddArc(ChartHypothesis *loserHypo);
|
| 132 |
+
void CleanupArcList();
|
| 133 |
+
void SetWinningHypo(const ChartHypothesis *hypo);
|
| 134 |
+
|
| 135 |
+
//! get the unweighted score for each feature function
|
| 136 |
+
const ScoreComponentCollection &GetScoreBreakdown() const {
|
| 137 |
+
// Note: never call this method before m_currScoreBreakdown is fully computed
|
| 138 |
+
if (!m_scoreBreakdown.get()) {
|
| 139 |
+
m_scoreBreakdown.reset(new ScoreComponentCollection());
|
| 140 |
+
// score breakdown from current translation rule
|
| 141 |
+
if (m_transOpt) {
|
| 142 |
+
m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
|
| 143 |
+
}
|
| 144 |
+
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
| 145 |
+
// score breakdowns from prev hypos
|
| 146 |
+
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
|
| 147 |
+
const ChartHypothesis &prevHypo = **iter;
|
| 148 |
+
m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
|
| 149 |
+
}
|
| 150 |
+
}
|
| 151 |
+
return *(m_scoreBreakdown.get());
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
//! get the unweighted score delta for each feature function
|
| 155 |
+
const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
|
| 156 |
+
// Note: never call this method before m_currScoreBreakdown is fully computed
|
| 157 |
+
if (!m_deltaScoreBreakdown.get()) {
|
| 158 |
+
m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
|
| 159 |
+
// score breakdown from current translation rule
|
| 160 |
+
if (m_transOpt) {
|
| 161 |
+
m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
|
| 162 |
+
}
|
| 163 |
+
m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
| 164 |
+
// delta: score breakdowns from prev hypos _not_ added
|
| 165 |
+
}
|
| 166 |
+
return *(m_deltaScoreBreakdown.get());
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
//! Get the weighted total score
|
| 170 |
+
float GetFutureScore() const {
|
| 171 |
+
// scores from current translation rule. eg. translation models & word penalty
|
| 172 |
+
return m_totalScore;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
//! vector of previous hypotheses this hypo is built on
|
| 176 |
+
const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
|
| 177 |
+
return m_prevHypos;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
//! get a particular previous hypos
|
| 181 |
+
const ChartHypothesis* GetPrevHypo(size_t pos) const {
|
| 182 |
+
return m_prevHypos[pos];
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
//! get the constituency label that covers this hypo
|
| 186 |
+
const Word &GetTargetLHS() const {
|
| 187 |
+
return GetCurrTargetPhrase().GetTargetLHS();
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
//! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
|
| 191 |
+
const ChartHypothesis* GetWinningHypothesis() const {
|
| 192 |
+
return m_winningHypo;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
// for unordered_set in stack
|
| 196 |
+
size_t hash() const;
|
| 197 |
+
bool operator==(const ChartHypothesis& other) const;
|
| 198 |
+
|
| 199 |
+
TO_STRING();
|
| 200 |
+
|
| 201 |
+
}; // class ChartHypothesis
|
| 202 |
+
|
| 203 |
+
}
|
| 204 |
+
|
mosesdecoder/moses/ChartKBestExtractor.cpp
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2014 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "ChartKBestExtractor.h"
|
| 21 |
+
|
| 22 |
+
#include "ChartHypothesis.h"
|
| 23 |
+
#include "ScoreComponentCollection.h"
|
| 24 |
+
#include "StaticData.h"
|
| 25 |
+
|
| 26 |
+
#include <boost/scoped_ptr.hpp>
|
| 27 |
+
|
| 28 |
+
#include <vector>
|
| 29 |
+
|
| 30 |
+
using namespace std;
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
// Extract the k-best list from the search graph.
|
| 36 |
+
void ChartKBestExtractor::Extract(
|
| 37 |
+
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
|
| 38 |
+
KBestVec &kBestList)
|
| 39 |
+
{
|
| 40 |
+
kBestList.clear();
|
| 41 |
+
if (topLevelHypos.empty()) {
|
| 42 |
+
return;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
// Create a new ChartHypothesis object, supremeHypo, that has the best
|
| 46 |
+
// top-level hypothesis as its predecessor and has the same score.
|
| 47 |
+
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
|
| 48 |
+
const ChartHypothesis &bestTopLevelHypo = **p;
|
| 49 |
+
boost::scoped_ptr<ChartHypothesis> supremeHypo(
|
| 50 |
+
new ChartHypothesis(bestTopLevelHypo, *this));
|
| 51 |
+
|
| 52 |
+
// Do the same for each alternative top-level hypothesis, but add the new
|
| 53 |
+
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
|
| 54 |
+
// recombined.
|
| 55 |
+
for (++p; p != topLevelHypos.end(); ++p) {
|
| 56 |
+
// Check that the first item in topLevelHypos really was the best.
|
| 57 |
+
UTIL_THROW_IF2((*p)->GetFutureScore() > bestTopLevelHypo.GetFutureScore(),
|
| 58 |
+
"top-level hypotheses are not correctly sorted");
|
| 59 |
+
// Note: there's no need for a smart pointer here: supremeHypo will take
|
| 60 |
+
// ownership of altHypo.
|
| 61 |
+
ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
|
| 62 |
+
supremeHypo->AddArc(altHypo);
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
// Create the target vertex then lazily fill its k-best list.
|
| 66 |
+
boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
|
| 67 |
+
LazyKthBest(*targetVertex, k, k);
|
| 68 |
+
|
| 69 |
+
// Copy the k-best list from the target vertex, but drop the top edge from
|
| 70 |
+
// each derivation.
|
| 71 |
+
kBestList.reserve(targetVertex->kBestList.size());
|
| 72 |
+
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
|
| 73 |
+
q = targetVertex->kBestList.begin();
|
| 74 |
+
q != targetVertex->kBestList.end(); ++q) {
|
| 75 |
+
const boost::shared_ptr<Derivation> d(*q);
|
| 76 |
+
assert(d);
|
| 77 |
+
assert(d->subderivations.size() == 1);
|
| 78 |
+
kBestList.push_back(d->subderivations[0]);
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// Generate the target-side yield of the derivation d.
|
| 83 |
+
Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
|
| 84 |
+
{
|
| 85 |
+
FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor;
|
| 86 |
+
|
| 87 |
+
Phrase ret(ARRAY_SIZE_INCR);
|
| 88 |
+
|
| 89 |
+
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
| 90 |
+
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
| 91 |
+
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
| 92 |
+
phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
| 93 |
+
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
| 94 |
+
const Word &word = phrase.GetWord(pos);
|
| 95 |
+
if (word.IsNonTerminal()) {
|
| 96 |
+
std::size_t nonTermInd = nonTermIndexMap[pos];
|
| 97 |
+
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
| 98 |
+
Phrase subPhrase = GetOutputPhrase(subderivation);
|
| 99 |
+
ret.Append(subPhrase);
|
| 100 |
+
} else {
|
| 101 |
+
ret.AddWord(word);
|
| 102 |
+
if (placeholderFactor == NOT_FOUND) {
|
| 103 |
+
continue;
|
| 104 |
+
}
|
| 105 |
+
std::set<std::size_t> sourcePosSet =
|
| 106 |
+
phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
|
| 107 |
+
if (sourcePosSet.size() == 1) {
|
| 108 |
+
const std::vector<const Word*> *ruleSourceFromInputPath =
|
| 109 |
+
hypo.GetTranslationOption().GetSourceRuleFromInputPath();
|
| 110 |
+
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
| 111 |
+
"Source Words in of the rules hasn't been filled out");
|
| 112 |
+
std::size_t sourcePos = *sourcePosSet.begin();
|
| 113 |
+
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
|
| 114 |
+
UTIL_THROW_IF2(sourceWord == NULL,
|
| 115 |
+
"Null source word at position " << sourcePos);
|
| 116 |
+
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
| 117 |
+
if (factor) {
|
| 118 |
+
ret.Back()[0] = factor;
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
return ret;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
// Generate the score breakdown of the derivation d.
|
| 128 |
+
boost::shared_ptr<ScoreComponentCollection>
|
| 129 |
+
ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
|
| 130 |
+
{
|
| 131 |
+
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
| 132 |
+
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown(new ScoreComponentCollection());
|
| 133 |
+
scoreBreakdown->PlusEquals(hypo.GetDeltaScoreBreakdown());
|
| 134 |
+
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
| 135 |
+
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
| 136 |
+
phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
| 137 |
+
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
| 138 |
+
const Word &word = phrase.GetWord(pos);
|
| 139 |
+
if (word.IsNonTerminal()) {
|
| 140 |
+
std::size_t nonTermInd = nonTermIndexMap[pos];
|
| 141 |
+
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
| 142 |
+
scoreBreakdown->PlusEquals(*GetOutputScoreBreakdown(subderivation));
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
return scoreBreakdown;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// Generate the target tree of the derivation d.
|
| 150 |
+
TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
|
| 151 |
+
{
|
| 152 |
+
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
| 153 |
+
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
| 154 |
+
if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
|
| 155 |
+
const std::string *tree = property->GetValueString();
|
| 156 |
+
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
|
| 157 |
+
|
| 158 |
+
//get subtrees (in target order)
|
| 159 |
+
std::vector<TreePointer> previous_trees;
|
| 160 |
+
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
| 161 |
+
const Word &word = phrase.GetWord(pos);
|
| 162 |
+
if (word.IsNonTerminal()) {
|
| 163 |
+
size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
| 164 |
+
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
| 165 |
+
const TreePointer prev_tree = GetOutputTree(subderivation);
|
| 166 |
+
previous_trees.push_back(prev_tree);
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
mytree->Combine(previous_trees);
|
| 171 |
+
mytree->Unbinarize();
|
| 172 |
+
return mytree;
|
| 173 |
+
} else {
|
| 174 |
+
UTIL_THROW2("Error: k-best tree output active, but no internal tree structure found");
|
| 175 |
+
}
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
|
| 179 |
+
ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
|
| 180 |
+
const ChartHypothesis &h)
|
| 181 |
+
{
|
| 182 |
+
UnweightedHyperarc edge;
|
| 183 |
+
edge.head = FindOrCreateVertex(h);
|
| 184 |
+
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
| 185 |
+
edge.tail.resize(prevHypos.size());
|
| 186 |
+
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
| 187 |
+
const ChartHypothesis *prevHypo = prevHypos[i];
|
| 188 |
+
edge.tail[i] = FindOrCreateVertex(*prevHypo);
|
| 189 |
+
}
|
| 190 |
+
return edge;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
// Look for the vertex corresponding to a given ChartHypothesis, creating
|
| 194 |
+
// a new one if necessary.
|
| 195 |
+
boost::shared_ptr<ChartKBestExtractor::Vertex>
|
| 196 |
+
ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
|
| 197 |
+
{
|
| 198 |
+
VertexMap::value_type element(&h, boost::shared_ptr<Vertex>());
|
| 199 |
+
std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element);
|
| 200 |
+
boost::shared_ptr<Vertex> &sp = p.first->second;
|
| 201 |
+
if (!p.second) {
|
| 202 |
+
return sp; // Vertex was already in m_vertexMap.
|
| 203 |
+
}
|
| 204 |
+
sp.reset(new Vertex(h));
|
| 205 |
+
// Create the 1-best derivation and add it to the vertex's kBestList.
|
| 206 |
+
UnweightedHyperarc bestEdge;
|
| 207 |
+
bestEdge.head = sp;
|
| 208 |
+
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
| 209 |
+
bestEdge.tail.resize(prevHypos.size());
|
| 210 |
+
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
| 211 |
+
const ChartHypothesis *prevHypo = prevHypos[i];
|
| 212 |
+
bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
|
| 213 |
+
}
|
| 214 |
+
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
|
| 215 |
+
#ifndef NDEBUG
|
| 216 |
+
std::pair<DerivationSet::iterator, bool> q =
|
| 217 |
+
#endif
|
| 218 |
+
m_derivations.insert(bestDerivation);
|
| 219 |
+
assert(q.second);
|
| 220 |
+
sp->kBestList.push_back(bestDerivation);
|
| 221 |
+
return sp;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
// Create the 1-best derivation for each edge in BS(v) (except the best one)
|
| 225 |
+
// and add it to v's candidate queue.
|
| 226 |
+
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
|
| 227 |
+
{
|
| 228 |
+
// Create derivations for all of v's incoming edges except the best. This
|
| 229 |
+
// means everything in v.hypothesis.GetArcList() and not the edge defined
|
| 230 |
+
// by v.hypothesis itself. The 1-best derivation for that edge will already
|
| 231 |
+
// have been created.
|
| 232 |
+
const ChartArcList *arcList = v.hypothesis.GetArcList();
|
| 233 |
+
if (arcList) {
|
| 234 |
+
for (std::size_t i = 0; i < arcList->size(); ++i) {
|
| 235 |
+
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
|
| 236 |
+
boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
|
| 237 |
+
assert(w->kBestList.size() == 1);
|
| 238 |
+
v.candidates.push(w->kBestList[0]);
|
| 239 |
+
}
|
| 240 |
+
}
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// Lazily fill v's k-best list.
|
| 244 |
+
void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
|
| 245 |
+
std::size_t globalK)
|
| 246 |
+
{
|
| 247 |
+
// If this is the first visit to vertex v then initialize the priority queue.
|
| 248 |
+
if (v.visited == false) {
|
| 249 |
+
// The 1-best derivation should already be in v's k-best list.
|
| 250 |
+
assert(v.kBestList.size() == 1);
|
| 251 |
+
// Initialize v's priority queue.
|
| 252 |
+
GetCandidates(v, globalK);
|
| 253 |
+
v.visited = true;
|
| 254 |
+
}
|
| 255 |
+
// Add derivations to the k-best list until it contains k or there are none
|
| 256 |
+
// left to add.
|
| 257 |
+
while (v.kBestList.size() < k) {
|
| 258 |
+
assert(!v.kBestList.empty());
|
| 259 |
+
// Update the priority queue by adding the successors of the last
|
| 260 |
+
// derivation (unless they've been seen before).
|
| 261 |
+
boost::shared_ptr<Derivation> d(v.kBestList.back());
|
| 262 |
+
LazyNext(v, *d, globalK);
|
| 263 |
+
// Check if there are any derivations left in the queue.
|
| 264 |
+
if (v.candidates.empty()) {
|
| 265 |
+
break;
|
| 266 |
+
}
|
| 267 |
+
// Get the next best derivation and delete it from the queue.
|
| 268 |
+
boost::weak_ptr<Derivation> next = v.candidates.top();
|
| 269 |
+
v.candidates.pop();
|
| 270 |
+
// Add it to the k-best list.
|
| 271 |
+
v.kBestList.push_back(next);
|
| 272 |
+
}
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
// Create the neighbours of Derivation d and add them to v's candidate queue.
|
| 276 |
+
void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
|
| 277 |
+
std::size_t globalK)
|
| 278 |
+
{
|
| 279 |
+
for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
|
| 280 |
+
Vertex &pred = *d.edge.tail[i];
|
| 281 |
+
// Ensure that pred's k-best list contains enough derivations.
|
| 282 |
+
std::size_t k = d.backPointers[i] + 2;
|
| 283 |
+
LazyKthBest(pred, k, globalK);
|
| 284 |
+
if (pred.kBestList.size() < k) {
|
| 285 |
+
// pred's derivations have been exhausted.
|
| 286 |
+
continue;
|
| 287 |
+
}
|
| 288 |
+
// Create the neighbour.
|
| 289 |
+
boost::shared_ptr<Derivation> next(new Derivation(d, i));
|
| 290 |
+
// Check if it has been created before.
|
| 291 |
+
std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
|
| 292 |
+
if (p.second) {
|
| 293 |
+
v.candidates.push(next); // Haven't previously seen it.
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
// Construct the 1-best Derivation that ends at edge e.
|
| 299 |
+
ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
|
| 300 |
+
{
|
| 301 |
+
edge = e;
|
| 302 |
+
std::size_t arity = edge.tail.size();
|
| 303 |
+
backPointers.resize(arity, 0);
|
| 304 |
+
subderivations.reserve(arity);
|
| 305 |
+
for (std::size_t i = 0; i < arity; ++i) {
|
| 306 |
+
const Vertex &pred = *edge.tail[i];
|
| 307 |
+
assert(pred.kBestList.size() >= 1);
|
| 308 |
+
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
|
| 309 |
+
subderivations.push_back(sub);
|
| 310 |
+
}
|
| 311 |
+
score = edge.head->hypothesis.GetFutureScore();
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
// Construct a Derivation that neighbours an existing Derivation.
|
| 315 |
+
ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
|
| 316 |
+
{
|
| 317 |
+
edge.head = d.edge.head;
|
| 318 |
+
edge.tail = d.edge.tail;
|
| 319 |
+
backPointers = d.backPointers;
|
| 320 |
+
subderivations = d.subderivations;
|
| 321 |
+
std::size_t j = ++backPointers[i];
|
| 322 |
+
score = d.score;
|
| 323 |
+
// Deduct the score of the old subderivation.
|
| 324 |
+
score -= subderivations[i]->score;
|
| 325 |
+
// Update the subderivation pointer.
|
| 326 |
+
boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
|
| 327 |
+
subderivations[i] = newSub;
|
| 328 |
+
// Add the score of the new subderivation.
|
| 329 |
+
score += subderivations[i]->score;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartKBestExtractor.h
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2014 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include <cassert>
|
| 23 |
+
#include "ChartHypothesis.h"
|
| 24 |
+
#include "ScoreComponentCollection.h"
|
| 25 |
+
#include "FF/InternalTree.h"
|
| 26 |
+
|
| 27 |
+
#include <boost/unordered_set.hpp>
|
| 28 |
+
#include <boost/weak_ptr.hpp>
|
| 29 |
+
#include <boost/shared_ptr.hpp>
|
| 30 |
+
|
| 31 |
+
#include <queue>
|
| 32 |
+
#include <vector>
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
// k-best list extractor that implements algorithm 3 from this paper:
|
| 38 |
+
//
|
| 39 |
+
// Liang Huang and David Chiang
|
| 40 |
+
// "Better k-best parsing"
|
| 41 |
+
// In Proceedings of IWPT 2005
|
| 42 |
+
//
|
| 43 |
+
class ChartKBestExtractor
|
| 44 |
+
{
|
| 45 |
+
public:
|
| 46 |
+
struct Vertex;
|
| 47 |
+
|
| 48 |
+
struct UnweightedHyperarc {
|
| 49 |
+
boost::shared_ptr<Vertex> head;
|
| 50 |
+
std::vector<boost::shared_ptr<Vertex> > tail;
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
struct Derivation {
|
| 54 |
+
Derivation(const UnweightedHyperarc &);
|
| 55 |
+
Derivation(const Derivation &, std::size_t);
|
| 56 |
+
|
| 57 |
+
UnweightedHyperarc edge;
|
| 58 |
+
std::vector<std::size_t> backPointers;
|
| 59 |
+
std::vector<boost::shared_ptr<Derivation> > subderivations;
|
| 60 |
+
float score;
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
struct DerivationOrderer {
|
| 64 |
+
bool operator()(const boost::weak_ptr<Derivation> &d1,
|
| 65 |
+
const boost::weak_ptr<Derivation> &d2) const {
|
| 66 |
+
boost::shared_ptr<Derivation> s1(d1);
|
| 67 |
+
boost::shared_ptr<Derivation> s2(d2);
|
| 68 |
+
return s1->score < s2->score;
|
| 69 |
+
}
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
struct Vertex {
|
| 73 |
+
typedef std::priority_queue<boost::weak_ptr<Derivation>,
|
| 74 |
+
std::vector<boost::weak_ptr<Derivation> >,
|
| 75 |
+
DerivationOrderer> DerivationQueue;
|
| 76 |
+
|
| 77 |
+
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
|
| 78 |
+
|
| 79 |
+
const ChartHypothesis &hypothesis;
|
| 80 |
+
std::vector<boost::weak_ptr<Derivation> > kBestList;
|
| 81 |
+
DerivationQueue candidates;
|
| 82 |
+
bool visited;
|
| 83 |
+
};
|
| 84 |
+
|
| 85 |
+
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
|
| 86 |
+
|
| 87 |
+
// Extract the k-best list from the search hypergraph given the full, sorted
|
| 88 |
+
// list of top-level vertices.
|
| 89 |
+
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
|
| 90 |
+
std::size_t k, KBestVec &);
|
| 91 |
+
|
| 92 |
+
static Phrase GetOutputPhrase(const Derivation &);
|
| 93 |
+
static boost::shared_ptr<ScoreComponentCollection> GetOutputScoreBreakdown(const Derivation &);
|
| 94 |
+
static TreePointer GetOutputTree(const Derivation &);
|
| 95 |
+
|
| 96 |
+
private:
|
| 97 |
+
typedef boost::unordered_map<const ChartHypothesis *,
|
| 98 |
+
boost::shared_ptr<Vertex> > VertexMap;
|
| 99 |
+
|
| 100 |
+
struct DerivationHasher {
|
| 101 |
+
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
|
| 102 |
+
std::size_t seed = 0;
|
| 103 |
+
boost::hash_combine(seed, d->edge.head);
|
| 104 |
+
boost::hash_combine(seed, d->edge.tail);
|
| 105 |
+
boost::hash_combine(seed, d->backPointers);
|
| 106 |
+
return seed;
|
| 107 |
+
}
|
| 108 |
+
};
|
| 109 |
+
|
| 110 |
+
struct DerivationEqualityPred {
|
| 111 |
+
bool operator()(const boost::shared_ptr<Derivation> &d1,
|
| 112 |
+
const boost::shared_ptr<Derivation> &d2) const {
|
| 113 |
+
return d1->edge.head == d2->edge.head &&
|
| 114 |
+
d1->edge.tail == d2->edge.tail &&
|
| 115 |
+
d1->backPointers == d2->backPointers;
|
| 116 |
+
}
|
| 117 |
+
};
|
| 118 |
+
|
| 119 |
+
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
|
| 120 |
+
DerivationEqualityPred> DerivationSet;
|
| 121 |
+
|
| 122 |
+
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
|
| 123 |
+
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
|
| 124 |
+
void GetCandidates(Vertex &, std::size_t);
|
| 125 |
+
void LazyKthBest(Vertex &, std::size_t, std::size_t);
|
| 126 |
+
void LazyNext(Vertex &, const Derivation &, std::size_t);
|
| 127 |
+
|
| 128 |
+
VertexMap m_vertexMap;
|
| 129 |
+
DerivationSet m_derivations;
|
| 130 |
+
};
|
| 131 |
+
|
| 132 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartParser.cpp
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "ChartParser.h"
|
| 23 |
+
#include "ChartParserCallback.h"
|
| 24 |
+
#include "ChartRuleLookupManager.h"
|
| 25 |
+
#include "StaticData.h"
|
| 26 |
+
#include "TreeInput.h"
|
| 27 |
+
#include "Sentence.h"
|
| 28 |
+
#include "DecodeGraph.h"
|
| 29 |
+
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
| 30 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 31 |
+
#include "moses/TranslationTask.h"
|
| 32 |
+
|
| 33 |
+
using namespace std;
|
| 34 |
+
using namespace Moses;
|
| 35 |
+
|
| 36 |
+
namespace Moses
|
| 37 |
+
{
|
| 38 |
+
|
| 39 |
+
ChartParserUnknown
|
| 40 |
+
::ChartParserUnknown(ttasksptr const& ttask)
|
| 41 |
+
: m_ttask(ttask)
|
| 42 |
+
{ }
|
| 43 |
+
|
| 44 |
+
ChartParserUnknown::~ChartParserUnknown()
|
| 45 |
+
{
|
| 46 |
+
RemoveAllInColl(m_unksrcs);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
AllOptions::ptr const&
|
| 50 |
+
ChartParserUnknown::
|
| 51 |
+
options() const
|
| 52 |
+
{
|
| 53 |
+
return m_ttask.lock()->options();
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
void
|
| 57 |
+
ChartParserUnknown::
|
| 58 |
+
Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
|
| 59 |
+
{
|
| 60 |
+
// unknown word, add as trans opt
|
| 61 |
+
const StaticData &staticData = StaticData::Instance();
|
| 62 |
+
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer
|
| 63 |
+
= UnknownWordPenaltyProducer::Instance();
|
| 64 |
+
|
| 65 |
+
size_t isDigit = 0;
|
| 66 |
+
if (options()->unk.drop) {
|
| 67 |
+
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
|
| 68 |
+
const StringPiece s = f->GetString();
|
| 69 |
+
isDigit = s.find_first_of("0123456789");
|
| 70 |
+
if (isDigit == string::npos)
|
| 71 |
+
isDigit = 0;
|
| 72 |
+
else
|
| 73 |
+
isDigit = 1;
|
| 74 |
+
// modify the starting bitmap
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
Phrase* unksrc = new Phrase(1);
|
| 78 |
+
unksrc->AddWord() = sourceWord;
|
| 79 |
+
Word &newWord = unksrc->GetWord(0);
|
| 80 |
+
newWord.SetIsOOV(true);
|
| 81 |
+
|
| 82 |
+
m_unksrcs.push_back(unksrc);
|
| 83 |
+
|
| 84 |
+
// hack. Once the OOV FF is a phrase table, get rid of this
|
| 85 |
+
PhraseDictionary *firstPt = NULL;
|
| 86 |
+
if (PhraseDictionary::GetColl().size() == 0) {
|
| 87 |
+
firstPt = PhraseDictionary::GetColl()[0];
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
//TranslationOption *transOpt;
|
| 91 |
+
if (! options()->unk.drop || isDigit) {
|
| 92 |
+
// loop
|
| 93 |
+
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs; // staticData.GetUnknownLHS();
|
| 94 |
+
UnknownLHSList::const_iterator iterLHS;
|
| 95 |
+
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
| 96 |
+
const string &targetLHSStr = iterLHS->first;
|
| 97 |
+
float prob = iterLHS->second;
|
| 98 |
+
|
| 99 |
+
// lhs
|
| 100 |
+
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
|
| 101 |
+
Word *targetLHS = new Word(true);
|
| 102 |
+
|
| 103 |
+
targetLHS->CreateFromString(Output, options()->output.factor_order,
|
| 104 |
+
targetLHSStr, true);
|
| 105 |
+
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
| 106 |
+
|
| 107 |
+
// add to dictionary
|
| 108 |
+
TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
|
| 109 |
+
Word &targetWord = targetPhrase->AddWord();
|
| 110 |
+
targetWord.CreateUnknownWord(sourceWord);
|
| 111 |
+
|
| 112 |
+
// scores
|
| 113 |
+
float unknownScore = FloorScore(TransformScore(prob));
|
| 114 |
+
|
| 115 |
+
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
|
| 116 |
+
targetPhrase->SetTargetLHS(targetLHS);
|
| 117 |
+
targetPhrase->SetAlignmentInfo("0-0");
|
| 118 |
+
targetPhrase->EvaluateInIsolation(*unksrc);
|
| 119 |
+
|
| 120 |
+
if (!options()->output.detailed_tree_transrep_filepath.empty() ||
|
| 121 |
+
options()->nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
| 122 |
+
std::string prop = "[ ";
|
| 123 |
+
prop += (*targetLHS)[0]->GetString().as_string() + " ";
|
| 124 |
+
prop += sourceWord[0]->GetString().as_string() + " ]";
|
| 125 |
+
targetPhrase->SetProperty("Tree", prop);
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
// chart rule
|
| 129 |
+
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
|
| 130 |
+
} // for (iterLHS
|
| 131 |
+
} else {
|
| 132 |
+
// drop source word. create blank trans opt
|
| 133 |
+
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
|
| 134 |
+
|
| 135 |
+
TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
|
| 136 |
+
// loop
|
| 137 |
+
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs;//staticData.GetUnknownLHS();
|
| 138 |
+
UnknownLHSList::const_iterator iterLHS;
|
| 139 |
+
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
| 140 |
+
const string &targetLHSStr = iterLHS->first;
|
| 141 |
+
//float prob = iterLHS->second;
|
| 142 |
+
|
| 143 |
+
Word *targetLHS = new Word(true);
|
| 144 |
+
targetLHS->CreateFromString(Output, staticData.options()->output.factor_order,
|
| 145 |
+
targetLHSStr, true);
|
| 146 |
+
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
| 147 |
+
|
| 148 |
+
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
|
| 149 |
+
targetPhrase->EvaluateInIsolation(*unksrc);
|
| 150 |
+
|
| 151 |
+
targetPhrase->SetTargetLHS(targetLHS);
|
| 152 |
+
|
| 153 |
+
// chart rule
|
| 154 |
+
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
ChartParser
|
| 160 |
+
::ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells)
|
| 161 |
+
: m_ttask(ttask)
|
| 162 |
+
, m_unknown(ttask)
|
| 163 |
+
, m_decodeGraphList(StaticData::Instance().GetDecodeGraphs())
|
| 164 |
+
, m_source(*(ttask->GetSource().get()))
|
| 165 |
+
{
|
| 166 |
+
const StaticData &staticData = StaticData::Instance();
|
| 167 |
+
|
| 168 |
+
staticData.InitializeForInput(ttask);
|
| 169 |
+
CreateInputPaths(m_source);
|
| 170 |
+
|
| 171 |
+
const std::vector<PhraseDictionary*> &dictionaries = PhraseDictionary::GetColl();
|
| 172 |
+
assert(dictionaries.size() == m_decodeGraphList.size());
|
| 173 |
+
m_ruleLookupManagers.reserve(dictionaries.size());
|
| 174 |
+
for (std::size_t i = 0; i < dictionaries.size(); ++i) {
|
| 175 |
+
const PhraseDictionary *dict = dictionaries[i];
|
| 176 |
+
PhraseDictionary *nonConstDict = const_cast<PhraseDictionary*>(dict);
|
| 177 |
+
std::size_t maxChartSpan = m_decodeGraphList[i]->GetMaxChartSpan();
|
| 178 |
+
ChartRuleLookupManager *lookupMgr = nonConstDict->CreateRuleLookupManager(*this, cells, maxChartSpan);
|
| 179 |
+
m_ruleLookupManagers.push_back(lookupMgr);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
ChartParser::~ChartParser()
|
| 185 |
+
{
|
| 186 |
+
RemoveAllInColl(m_ruleLookupManagers);
|
| 187 |
+
StaticData::Instance().CleanUpAfterSentenceProcessing(m_ttask.lock());
|
| 188 |
+
|
| 189 |
+
InputPathMatrix::const_iterator iterOuter;
|
| 190 |
+
for (iterOuter = m_inputPathMatrix.begin(); iterOuter != m_inputPathMatrix.end(); ++iterOuter) {
|
| 191 |
+
const std::vector<InputPath*> &outer = *iterOuter;
|
| 192 |
+
|
| 193 |
+
std::vector<InputPath*>::const_iterator iterInner;
|
| 194 |
+
for (iterInner = outer.begin(); iterInner != outer.end(); ++iterInner) {
|
| 195 |
+
InputPath *path = *iterInner;
|
| 196 |
+
delete path;
|
| 197 |
+
}
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
void ChartParser::Create(const Range &range, ChartParserCallback &to)
|
| 202 |
+
{
|
| 203 |
+
assert(m_decodeGraphList.size() == m_ruleLookupManagers.size());
|
| 204 |
+
|
| 205 |
+
std::vector <DecodeGraph*>::const_iterator iterDecodeGraph;
|
| 206 |
+
std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin();
|
| 207 |
+
for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) {
|
| 208 |
+
const DecodeGraph &decodeGraph = **iterDecodeGraph;
|
| 209 |
+
assert(decodeGraph.GetSize() == 1);
|
| 210 |
+
ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers;
|
| 211 |
+
size_t maxSpan = decodeGraph.GetMaxChartSpan();
|
| 212 |
+
size_t last = m_source.GetSize()-1;
|
| 213 |
+
if (maxSpan != 0) {
|
| 214 |
+
last = min(last, range.GetStartPos()+maxSpan);
|
| 215 |
+
}
|
| 216 |
+
if (maxSpan == 0 || range.GetNumWordsCovered() <= maxSpan) {
|
| 217 |
+
const InputPath &inputPath = GetInputPath(range);
|
| 218 |
+
ruleLookupManager.GetChartRuleCollection(inputPath, last, to);
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
if (range.GetNumWordsCovered() == 1
|
| 223 |
+
&& range.GetStartPos() != 0
|
| 224 |
+
&& range.GetStartPos() != m_source.GetSize()-1) {
|
| 225 |
+
bool always = options()->unk.always_create_direct_transopt;
|
| 226 |
+
if (to.Empty() || always) {
|
| 227 |
+
// create unknown words for 1 word coverage where we don't have any trans options
|
| 228 |
+
const Word &sourceWord = m_source.GetWord(range.GetStartPos());
|
| 229 |
+
m_unknown.Process(sourceWord, range, to);
|
| 230 |
+
}
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
void ChartParser::CreateInputPaths(const InputType &input)
|
| 235 |
+
{
|
| 236 |
+
size_t size = input.GetSize();
|
| 237 |
+
m_inputPathMatrix.resize(size);
|
| 238 |
+
|
| 239 |
+
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
|
| 240 |
+
"Input must be a sentence or a tree, " <<
|
| 241 |
+
"not lattice or confusion networks");
|
| 242 |
+
|
| 243 |
+
TranslationTask const* ttask = m_ttask.lock().get();
|
| 244 |
+
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
| 245 |
+
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
| 246 |
+
size_t endPos = startPos + phaseSize -1;
|
| 247 |
+
vector<InputPath*> &vec = m_inputPathMatrix[startPos];
|
| 248 |
+
|
| 249 |
+
Range range(startPos, endPos);
|
| 250 |
+
Phrase subphrase(input.GetSubString(Range(startPos, endPos)));
|
| 251 |
+
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
| 252 |
+
|
| 253 |
+
InputPath *node;
|
| 254 |
+
if (range.GetNumWordsCovered() == 1) {
|
| 255 |
+
node = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
|
| 256 |
+
vec.push_back(node);
|
| 257 |
+
} else {
|
| 258 |
+
const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
|
| 259 |
+
node = new InputPath(ttask, subphrase, labels, range, &prevNode, NULL);
|
| 260 |
+
vec.push_back(node);
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
//m_inputPathQueue.push_back(node);
|
| 264 |
+
}
|
| 265 |
+
}
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
const InputPath &ChartParser::GetInputPath(const Range &range) const
|
| 269 |
+
{
|
| 270 |
+
return GetInputPath(range.GetStartPos(), range.GetEndPos());
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
|
| 274 |
+
{
|
| 275 |
+
size_t offset = endPos - startPos;
|
| 276 |
+
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
|
| 277 |
+
"Out of bound: " << offset);
|
| 278 |
+
return *m_inputPathMatrix[startPos][offset];
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos)
|
| 282 |
+
{
|
| 283 |
+
size_t offset = endPos - startPos;
|
| 284 |
+
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
|
| 285 |
+
"Out of bound: " << offset);
|
| 286 |
+
return *m_inputPathMatrix[startPos][offset];
|
| 287 |
+
}
|
| 288 |
+
/*
|
| 289 |
+
const Sentence &ChartParser::GetSentence() const {
|
| 290 |
+
const Sentence &sentence = static_cast<const Sentence&>(m_source);
|
| 291 |
+
return sentence;
|
| 292 |
+
}
|
| 293 |
+
*/
|
| 294 |
+
size_t ChartParser::GetSize() const
|
| 295 |
+
{
|
| 296 |
+
return m_source.GetSize();
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
long ChartParser::GetTranslationId() const
|
| 300 |
+
{
|
| 301 |
+
return m_source.GetTranslationId();
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
AllOptions::ptr const&
|
| 306 |
+
ChartParser::
|
| 307 |
+
options() const
|
| 308 |
+
{
|
| 309 |
+
return m_ttask.lock()->options();
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartRuleLookupManager.cpp
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "ChartRuleLookupManager.h"
|
| 2 |
+
#include "ChartParser.h"
|
| 3 |
+
|
| 4 |
+
namespace Moses
|
| 5 |
+
{
|
| 6 |
+
ChartRuleLookupManager::~ChartRuleLookupManager()
|
| 7 |
+
{}
|
| 8 |
+
} // namespace Moses
|
| 9 |
+
|
mosesdecoder/moses/ChartRuleLookupManager.h
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
#ifndef moses_ChartRuleLookupManager_h
|
| 22 |
+
#define moses_ChartRuleLookupManager_h
|
| 23 |
+
|
| 24 |
+
#include "ChartCellCollection.h"
|
| 25 |
+
#include "InputType.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
class ChartParser;
|
| 30 |
+
class ChartParserCallback;
|
| 31 |
+
class Range;
|
| 32 |
+
class Sentence;
|
| 33 |
+
|
| 34 |
+
/** Defines an interface for looking up rules in a rule table. Concrete
|
| 35 |
+
* implementation classes should correspond to specific PhraseDictionary
|
| 36 |
+
* subclasses (memory or on-disk). Since a ChartRuleLookupManager object
|
| 37 |
+
* maintains sentence-specific state, exactly one should be created for
|
| 38 |
+
* each sentence that is to be decoded.
|
| 39 |
+
*/
|
| 40 |
+
class ChartRuleLookupManager
|
| 41 |
+
{
|
| 42 |
+
public:
|
| 43 |
+
ChartRuleLookupManager(const ChartParser &parser,
|
| 44 |
+
const ChartCellCollectionBase &cellColl)
|
| 45 |
+
: m_parser(parser)
|
| 46 |
+
, m_cellCollection(cellColl) {}
|
| 47 |
+
|
| 48 |
+
virtual ~ChartRuleLookupManager();
|
| 49 |
+
|
| 50 |
+
const ChartCellLabelSet &GetTargetLabelSet(size_t begin, size_t end) const {
|
| 51 |
+
return m_cellCollection.GetBase(Range(begin, end)).GetTargetLabelSet();
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
const ChartParser &GetParser() const {
|
| 55 |
+
return m_parser;
|
| 56 |
+
}
|
| 57 |
+
//const Sentence &GetSentence() const;
|
| 58 |
+
|
| 59 |
+
const ChartCellLabel &GetSourceAt(size_t at) const {
|
| 60 |
+
return m_cellCollection.GetSourceWordLabel(at);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/** abstract function. Return a vector of translation options for given a range in the input sentence
|
| 64 |
+
* \param range source range for which you want the translation options
|
| 65 |
+
* \param outColl return argument
|
| 66 |
+
*/
|
| 67 |
+
virtual void GetChartRuleCollection(
|
| 68 |
+
const InputPath &inputPath,
|
| 69 |
+
size_t lastPos, // last position to consider if using lookahead
|
| 70 |
+
ChartParserCallback &outColl) = 0;
|
| 71 |
+
|
| 72 |
+
private:
|
| 73 |
+
//! Non-copyable: copy constructor and assignment operator not implemented.
|
| 74 |
+
ChartRuleLookupManager(const ChartRuleLookupManager &);
|
| 75 |
+
//! Non-copyable: copy constructor and assignment operator not implemented.
|
| 76 |
+
ChartRuleLookupManager &operator=(const ChartRuleLookupManager &);
|
| 77 |
+
|
| 78 |
+
const ChartParser &m_parser;
|
| 79 |
+
const ChartCellCollectionBase &m_cellCollection;
|
| 80 |
+
};
|
| 81 |
+
|
| 82 |
+
} // namespace Moses
|
| 83 |
+
|
| 84 |
+
#endif
|
mosesdecoder/moses/ChartTranslationOptionList.cpp
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 Hieu Hoang
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include <algorithm>
|
| 21 |
+
#include <iostream>
|
| 22 |
+
#include <vector>
|
| 23 |
+
#include "StaticData.h"
|
| 24 |
+
#include "ChartTranslationOptionList.h"
|
| 25 |
+
#include "ChartTranslationOptions.h"
|
| 26 |
+
#include "ChartCellCollection.h"
|
| 27 |
+
#include "Range.h"
|
| 28 |
+
#include "InputType.h"
|
| 29 |
+
#include "InputPath.h"
|
| 30 |
+
|
| 31 |
+
using namespace std;
|
| 32 |
+
|
| 33 |
+
namespace Moses
|
| 34 |
+
{
|
| 35 |
+
|
| 36 |
+
ChartTranslationOptionList::
|
| 37 |
+
ChartTranslationOptionList(size_t ruleLimit, const InputType &input)
|
| 38 |
+
: m_size(0)
|
| 39 |
+
, m_ruleLimit(ruleLimit)
|
| 40 |
+
{
|
| 41 |
+
m_scoreThreshold = std::numeric_limits<float>::infinity();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
ChartTranslationOptionList::~ChartTranslationOptionList()
|
| 45 |
+
{
|
| 46 |
+
RemoveAllInColl(m_collection);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
void ChartTranslationOptionList::Clear()
|
| 50 |
+
{
|
| 51 |
+
m_size = 0;
|
| 52 |
+
m_scoreThreshold = std::numeric_limits<float>::infinity();
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
class ChartTranslationOptionOrderer
|
| 56 |
+
{
|
| 57 |
+
public:
|
| 58 |
+
bool operator()(const ChartTranslationOptions* itemA, const ChartTranslationOptions* itemB) const {
|
| 59 |
+
return itemA->GetEstimateOfBestScore() > itemB->GetEstimateOfBestScore();
|
| 60 |
+
}
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
|
| 64 |
+
const StackVec &stackVec,
|
| 65 |
+
const Range &range)
|
| 66 |
+
{
|
| 67 |
+
if (tpc.IsEmpty()) {
|
| 68 |
+
return;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
for (size_t i = 0; i < stackVec.size(); ++i) {
|
| 72 |
+
const ChartCellLabel &chartCellLabel = *stackVec[i];
|
| 73 |
+
size_t numHypos = chartCellLabel.GetStack().cube->size();
|
| 74 |
+
if (numHypos == 0) {
|
| 75 |
+
return; // empty stack. These rules can't be used
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
const TargetPhrase &targetPhrase = **(tpc.begin());
|
| 80 |
+
float score = targetPhrase.GetFutureScore();
|
| 81 |
+
for (StackVec::const_iterator p = stackVec.begin(); p != stackVec.end(); ++p) {
|
| 82 |
+
score += (*p)->GetBestScore(this);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
// If the rule limit has already been reached then don't add the option
|
| 86 |
+
// unless it is better than at least one existing option.
|
| 87 |
+
if (m_ruleLimit && m_size > m_ruleLimit && score < m_scoreThreshold) {
|
| 88 |
+
return;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
// Add the option to the list.
|
| 92 |
+
if (m_size == m_collection.size()) {
|
| 93 |
+
// m_collection has reached capacity: create a new object.
|
| 94 |
+
m_collection.push_back(new ChartTranslationOptions(tpc, stackVec,
|
| 95 |
+
range, score));
|
| 96 |
+
} else {
|
| 97 |
+
// Overwrite an unused object.
|
| 98 |
+
*(m_collection[m_size]) = ChartTranslationOptions(tpc, stackVec,
|
| 99 |
+
range, score);
|
| 100 |
+
}
|
| 101 |
+
++m_size;
|
| 102 |
+
|
| 103 |
+
// If the rule limit hasn't been exceeded then update the threshold.
|
| 104 |
+
if (!m_ruleLimit || m_size <= m_ruleLimit) {
|
| 105 |
+
m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// Prune if bursting
|
| 109 |
+
if (m_ruleLimit && m_size == m_ruleLimit * 2) {
|
| 110 |
+
NTH_ELEMENT4(m_collection.begin(),
|
| 111 |
+
m_collection.begin() + m_ruleLimit - 1,
|
| 112 |
+
m_collection.begin() + m_size,
|
| 113 |
+
ChartTranslationOptionOrderer());
|
| 114 |
+
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetEstimateOfBestScore();
|
| 115 |
+
m_size = m_ruleLimit;
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
void
|
| 120 |
+
ChartTranslationOptionList::
|
| 121 |
+
AddPhraseOOV(TargetPhrase &phrase,
|
| 122 |
+
std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
|
| 123 |
+
const Range &range)
|
| 124 |
+
{
|
| 125 |
+
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
| 126 |
+
tpc->Add(&phrase);
|
| 127 |
+
waste_memory.push_back(tpc);
|
| 128 |
+
StackVec empty;
|
| 129 |
+
Add(*tpc, empty, range);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
void ChartTranslationOptionList::ApplyThreshold(float const threshold)
|
| 133 |
+
{
|
| 134 |
+
if (m_ruleLimit && m_size > m_ruleLimit) {
|
| 135 |
+
// Something's gone wrong if the list has grown to m_ruleLimit * 2
|
| 136 |
+
// without being pruned.
|
| 137 |
+
assert(m_size < m_ruleLimit * 2);
|
| 138 |
+
// Reduce the list to the best m_ruleLimit options. The remaining
|
| 139 |
+
// options can be overwritten on subsequent calls to Add().
|
| 140 |
+
NTH_ELEMENT4(m_collection.begin(),
|
| 141 |
+
m_collection.begin()+m_ruleLimit,
|
| 142 |
+
m_collection.begin()+m_size,
|
| 143 |
+
ChartTranslationOptionOrderer());
|
| 144 |
+
m_size = m_ruleLimit;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
// keep only those over best + threshold
|
| 148 |
+
|
| 149 |
+
float scoreThreshold = -std::numeric_limits<float>::infinity();
|
| 150 |
+
|
| 151 |
+
CollType::const_iterator iter;
|
| 152 |
+
for (iter = m_collection.begin(); iter != m_collection.begin()+m_size; ++iter) {
|
| 153 |
+
const ChartTranslationOptions *transOpt = *iter;
|
| 154 |
+
float score = transOpt->GetEstimateOfBestScore();
|
| 155 |
+
scoreThreshold = (score > scoreThreshold) ? score : scoreThreshold;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
scoreThreshold += threshold; // StaticData::Instance().GetTranslationOptionThreshold();
|
| 159 |
+
|
| 160 |
+
CollType::iterator bound = std::partition(m_collection.begin(),
|
| 161 |
+
m_collection.begin()+m_size,
|
| 162 |
+
ScoreThresholdPred(scoreThreshold));
|
| 163 |
+
|
| 164 |
+
m_size = std::distance(m_collection.begin(), bound);
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell) const
|
| 168 |
+
{
|
| 169 |
+
const HypoList *stack = chartCell->GetStack().cube;
|
| 170 |
+
assert(stack);
|
| 171 |
+
assert(!stack->empty());
|
| 172 |
+
const ChartHypothesis &bestHypo = **(stack->begin());
|
| 173 |
+
return bestHypo.GetFutureScore();
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
|
| 177 |
+
{
|
| 178 |
+
// NEVER iterate over ALL of the collection. Just over the first m_size
|
| 179 |
+
CollType::iterator iter;
|
| 180 |
+
for (iter = m_collection.begin(); iter != m_collection.begin() + m_size; ++iter) {
|
| 181 |
+
ChartTranslationOptions &transOpts = **iter;
|
| 182 |
+
transOpts.EvaluateWithSourceContext(input, inputPath);
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// get rid of empty trans opts
|
| 186 |
+
size_t numDiscard = 0;
|
| 187 |
+
for (size_t i = 0; i < m_size; ++i) {
|
| 188 |
+
ChartTranslationOptions *transOpts = m_collection[i];
|
| 189 |
+
if (transOpts->GetSize() == 0) {
|
| 190 |
+
//delete transOpts;
|
| 191 |
+
++numDiscard;
|
| 192 |
+
} else if (numDiscard) {
|
| 193 |
+
SwapTranslationOptions(i - numDiscard, i);
|
| 194 |
+
//m_collection[] = transOpts;
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
size_t newSize = m_size - numDiscard;
|
| 199 |
+
m_size = newSize;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
void ChartTranslationOptionList::SwapTranslationOptions(size_t a, size_t b)
|
| 203 |
+
{
|
| 204 |
+
ChartTranslationOptions *transOptsA = m_collection[a];
|
| 205 |
+
ChartTranslationOptions *transOptsB = m_collection[b];
|
| 206 |
+
m_collection[a] = transOptsB;
|
| 207 |
+
m_collection[b] = transOptsA;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptionList &obj)
|
| 211 |
+
{
|
| 212 |
+
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
| 213 |
+
const ChartTranslationOptions &transOpts = *obj.m_collection[i];
|
| 214 |
+
out << transOpts << endl;
|
| 215 |
+
}
|
| 216 |
+
return out;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
}
|
mosesdecoder/moses/ChartTranslationOptions.h
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 Hieu Hoang
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "StackVec.h"
|
| 23 |
+
#include "TargetPhrase.h"
|
| 24 |
+
#include "TargetPhraseCollection.h"
|
| 25 |
+
#include "Range.h"
|
| 26 |
+
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include <boost/shared_ptr.hpp>
|
| 29 |
+
#include "ChartTranslationOption.h"
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
class ChartTranslationOption;
|
| 34 |
+
class InputPath;
|
| 35 |
+
class InputType;
|
| 36 |
+
|
| 37 |
+
/** Similar to a DottedRule, but contains a direct reference to a list
|
| 38 |
+
* of translations and provdes an estimate of the best score. For a specific range in the input sentence
|
| 39 |
+
*/
|
| 40 |
+
class ChartTranslationOptions
|
| 41 |
+
{
|
| 42 |
+
friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptions&);
|
| 43 |
+
|
| 44 |
+
public:
|
| 45 |
+
typedef std::vector<boost::shared_ptr<ChartTranslationOption> > CollType;
|
| 46 |
+
|
| 47 |
+
/** Constructor
|
| 48 |
+
\param targetPhraseColl @todo dunno
|
| 49 |
+
\param stackVec @todo dunno
|
| 50 |
+
\param range the range in the source sentence this translation option covers
|
| 51 |
+
\param score @todo dunno
|
| 52 |
+
*/
|
| 53 |
+
ChartTranslationOptions(const TargetPhraseCollection &targetPhraseColl,
|
| 54 |
+
const StackVec &stackVec,
|
| 55 |
+
const Range &range,
|
| 56 |
+
float score);
|
| 57 |
+
~ChartTranslationOptions();
|
| 58 |
+
|
| 59 |
+
static float CalcEstimateOfBestScore(const TargetPhraseCollection &,
|
| 60 |
+
const StackVec &);
|
| 61 |
+
|
| 62 |
+
size_t GetSize() const {
|
| 63 |
+
return m_collection.size();
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
//! @todo dunno
|
| 67 |
+
const StackVec &GetStackVec() const {
|
| 68 |
+
return m_stackVec;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
//! @todo isn't the translation suppose to just contain 1 target phrase, not a whole collection of them?
|
| 72 |
+
const CollType &GetTargetPhrases() const {
|
| 73 |
+
return m_collection;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
//! the range in the source sentence this translation option covers
|
| 77 |
+
const Range &GetSourceWordsRange() const {
|
| 78 |
+
return *m_wordsRange;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
/** return an estimate of the best score possible with this translation option.
|
| 82 |
+
* the estimate is the sum of the top target phrase's estimated score plus the
|
| 83 |
+
* scores of the best child hypotheses.
|
| 84 |
+
*/
|
| 85 |
+
inline float GetEstimateOfBestScore() const {
|
| 86 |
+
return m_estimateOfBestScore;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
|
| 90 |
+
|
| 91 |
+
void SetInputPath(const InputPath *inputPath);
|
| 92 |
+
|
| 93 |
+
void CreateSourceRuleFromInputPath();
|
| 94 |
+
|
| 95 |
+
private:
|
| 96 |
+
|
| 97 |
+
StackVec m_stackVec; //! vector of hypothesis list!
|
| 98 |
+
CollType m_collection;
|
| 99 |
+
|
| 100 |
+
const Range *m_wordsRange;
|
| 101 |
+
float m_estimateOfBestScore;
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
}
|
mosesdecoder/moses/ConfusionNet.cpp
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
| 2 |
+
// $Id$
|
| 3 |
+
|
| 4 |
+
#include "ConfusionNet.h"
|
| 5 |
+
#include <sstream>
|
| 6 |
+
|
| 7 |
+
#include "FactorCollection.h"
|
| 8 |
+
#include "Util.h"
|
| 9 |
+
#include "TranslationOptionCollectionConfusionNet.h"
|
| 10 |
+
#include "StaticData.h"
|
| 11 |
+
#include "Sentence.h"
|
| 12 |
+
#include "moses/FF/InputFeature.h"
|
| 13 |
+
#include "util/exception.hh"
|
| 14 |
+
#include "moses/TranslationTask.h"
|
| 15 |
+
namespace Moses
|
| 16 |
+
{
|
| 17 |
+
struct CNStats {
|
| 18 |
+
size_t created,destr,read,colls,words;
|
| 19 |
+
|
| 20 |
+
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
|
| 21 |
+
~CNStats() {
|
| 22 |
+
print(std::cerr);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
void createOne() {
|
| 26 |
+
++created;
|
| 27 |
+
}
|
| 28 |
+
void destroyOne() {
|
| 29 |
+
++destr;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
void collect(const ConfusionNet& cn) {
|
| 33 |
+
++read;
|
| 34 |
+
colls+=cn.GetSize();
|
| 35 |
+
for(size_t i=0; i<cn.GetSize(); ++i)
|
| 36 |
+
words+=cn[i].size();
|
| 37 |
+
}
|
| 38 |
+
void print(std::ostream& out) const {
|
| 39 |
+
if(created>0) {
|
| 40 |
+
out<<"confusion net statistics:\n"
|
| 41 |
+
" created:\t"<<created<<"\n"
|
| 42 |
+
" destroyed:\t"<<destr<<"\n"
|
| 43 |
+
" succ. read:\t"<<read<<"\n"
|
| 44 |
+
" columns:\t"<<colls<<"\n"
|
| 45 |
+
" words:\t"<<words<<"\n"
|
| 46 |
+
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
|
| 47 |
+
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
|
| 48 |
+
"\n\n";
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
CNStats stats;
|
| 54 |
+
|
| 55 |
+
size_t
|
| 56 |
+
ConfusionNet::
|
| 57 |
+
GetColumnIncrement(size_t i, size_t j) const
|
| 58 |
+
{
|
| 59 |
+
(void) i;
|
| 60 |
+
(void) j;
|
| 61 |
+
return 1;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
ConfusionNet::
|
| 65 |
+
ConfusionNet(AllOptions::ptr const& opts) : InputType(opts)
|
| 66 |
+
{
|
| 67 |
+
stats.createOne();
|
| 68 |
+
|
| 69 |
+
if (is_syntax(opts->search.algo)) {
|
| 70 |
+
m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
|
| 71 |
+
}
|
| 72 |
+
UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified");
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
ConfusionNet::
|
| 76 |
+
~ConfusionNet()
|
| 77 |
+
{
|
| 78 |
+
stats.destroyOne();
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
ConfusionNet::
|
| 82 |
+
ConfusionNet(Sentence const& s) : InputType(s.options())
|
| 83 |
+
{
|
| 84 |
+
data.resize(s.GetSize());
|
| 85 |
+
for(size_t i=0; i<s.GetSize(); ++i) {
|
| 86 |
+
ScorePair scorePair;
|
| 87 |
+
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
|
| 88 |
+
data[i].push_back(temp);
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
bool
|
| 93 |
+
ConfusionNet::
|
| 94 |
+
ReadF(std::istream& in, int format)
|
| 95 |
+
{
|
| 96 |
+
VERBOSE(2, "read confusion net with format "<<format<<"\n");
|
| 97 |
+
switch(format) {
|
| 98 |
+
case 0:
|
| 99 |
+
return ReadFormat0(in);
|
| 100 |
+
case 1:
|
| 101 |
+
return ReadFormat1(in);
|
| 102 |
+
default:
|
| 103 |
+
std::cerr << "ERROR: unknown format '"<<format
|
| 104 |
+
<<"' in ConfusionNet::Read";
|
| 105 |
+
}
|
| 106 |
+
return false;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
int
|
| 110 |
+
ConfusionNet::
|
| 111 |
+
Read(std::istream& in)
|
| 112 |
+
{
|
| 113 |
+
int rv=ReadF(in,0);
|
| 114 |
+
if(rv) stats.collect(*this);
|
| 115 |
+
return rv;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
bool
|
| 119 |
+
ConfusionNet::
|
| 120 |
+
ReadFormat0(std::istream& in)
|
| 121 |
+
{
|
| 122 |
+
Clear();
|
| 123 |
+
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
| 124 |
+
|
| 125 |
+
const InputFeature *inputFeature = InputFeature::InstancePtr();
|
| 126 |
+
size_t numInputScores = inputFeature->GetNumInputScores();
|
| 127 |
+
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
|
| 128 |
+
|
| 129 |
+
size_t totalCount = numInputScores + numRealWordCount;
|
| 130 |
+
bool addRealWordCount = (numRealWordCount > 0);
|
| 131 |
+
|
| 132 |
+
std::string line;
|
| 133 |
+
while(getline(in,line)) {
|
| 134 |
+
std::istringstream is(line);
|
| 135 |
+
std::string word;
|
| 136 |
+
|
| 137 |
+
Column col;
|
| 138 |
+
while(is>>word) {
|
| 139 |
+
Word w;
|
| 140 |
+
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
| 141 |
+
std::vector<float> probs(totalCount, 0.0);
|
| 142 |
+
for(size_t i=0; i < numInputScores; i++) {
|
| 143 |
+
double prob;
|
| 144 |
+
if (!(is>>prob)) {
|
| 145 |
+
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, "
|
| 146 |
+
<< "or wrong number of scores\n");
|
| 147 |
+
return false;
|
| 148 |
+
}
|
| 149 |
+
if(prob<0.0) {
|
| 150 |
+
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
|
| 151 |
+
prob=0.0;
|
| 152 |
+
} else if (prob>1.0) {
|
| 153 |
+
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
|
| 154 |
+
prob=1.0;
|
| 155 |
+
}
|
| 156 |
+
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
|
| 157 |
+
|
| 158 |
+
}
|
| 159 |
+
// store 'real' word count in last feature if we have one more
|
| 160 |
+
// weight than we do arc scores and not epsilon
|
| 161 |
+
if (addRealWordCount && word!=EPSILON && word!="")
|
| 162 |
+
probs.back() = -1.0;
|
| 163 |
+
|
| 164 |
+
ScorePair scorePair(probs);
|
| 165 |
+
|
| 166 |
+
col.push_back(std::make_pair(w,scorePair));
|
| 167 |
+
}
|
| 168 |
+
if(col.size()) {
|
| 169 |
+
data.push_back(col);
|
| 170 |
+
ShrinkToFit(data.back());
|
| 171 |
+
} else break;
|
| 172 |
+
}
|
| 173 |
+
return !data.empty();
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
bool
|
| 177 |
+
ConfusionNet::
|
| 178 |
+
ReadFormat1(std::istream& in)
|
| 179 |
+
{
|
| 180 |
+
Clear();
|
| 181 |
+
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
| 182 |
+
std::string line;
|
| 183 |
+
if(!getline(in,line)) return 0;
|
| 184 |
+
size_t s;
|
| 185 |
+
if(getline(in,line)) s=atoi(line.c_str());
|
| 186 |
+
else return 0;
|
| 187 |
+
data.resize(s);
|
| 188 |
+
for(size_t i=0; i<data.size(); ++i) {
|
| 189 |
+
if(!getline(in,line)) return 0;
|
| 190 |
+
std::istringstream is(line);
|
| 191 |
+
if(!(is>>s)) return 0;
|
| 192 |
+
std::string word;
|
| 193 |
+
double prob;
|
| 194 |
+
data[i].resize(s);
|
| 195 |
+
for(size_t j=0; j<s; ++j)
|
| 196 |
+
if(is>>word>>prob) {
|
| 197 |
+
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
|
| 198 |
+
data[i][j].second.denseScores = std::vector<float> (1);
|
| 199 |
+
data[i][j].second.denseScores.push_back((float) log(prob));
|
| 200 |
+
if(data[i][j].second.denseScores[0]<0) {
|
| 201 |
+
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
|
| 202 |
+
data[i][j].second.denseScores[0]=0.0;
|
| 203 |
+
}
|
| 204 |
+
// String2Word(word,data[i][j].first,factorOrder);
|
| 205 |
+
Word& w = data[i][j].first;
|
| 206 |
+
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
| 207 |
+
} else return 0;
|
| 208 |
+
}
|
| 209 |
+
return !data.empty();
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
void ConfusionNet::Print(std::ostream& out) const
|
| 213 |
+
{
|
| 214 |
+
out<<"conf net: "<<data.size()<<"\n";
|
| 215 |
+
for(size_t i=0; i<data.size(); ++i) {
|
| 216 |
+
out<<i<<" -- ";
|
| 217 |
+
for(size_t j=0; j<data[i].size(); ++j) {
|
| 218 |
+
out<<"("<<data[i][j].first.ToString()<<", ";
|
| 219 |
+
|
| 220 |
+
// dense
|
| 221 |
+
std::vector<float>::const_iterator iterDense;
|
| 222 |
+
for(iterDense = data[i][j].second.denseScores.begin();
|
| 223 |
+
iterDense < data[i][j].second.denseScores.end();
|
| 224 |
+
++iterDense) {
|
| 225 |
+
out<<", "<<*iterDense;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
// sparse
|
| 229 |
+
std::map<StringPiece, float>::const_iterator iterSparse;
|
| 230 |
+
for(iterSparse = data[i][j].second.sparseScores.begin();
|
| 231 |
+
iterSparse != data[i][j].second.sparseScores.end();
|
| 232 |
+
++iterSparse) {
|
| 233 |
+
out << ", " << iterSparse->first << "=" << iterSparse->second;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
out<<") ";
|
| 237 |
+
}
|
| 238 |
+
out<<"\n";
|
| 239 |
+
}
|
| 240 |
+
out<<"\n\n";
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
#ifdef _WIN32
|
| 244 |
+
#pragma warning(disable:4716)
|
| 245 |
+
#endif
|
| 246 |
+
Phrase
|
| 247 |
+
ConfusionNet::
|
| 248 |
+
GetSubString(const Range&) const
|
| 249 |
+
{
|
| 250 |
+
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
|
| 251 |
+
//return Phrase(Input);
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
std::string
|
| 255 |
+
ConfusionNet::
|
| 256 |
+
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
|
| 257 |
+
{
|
| 258 |
+
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
|
| 259 |
+
return "";
|
| 260 |
+
}
|
| 261 |
+
#ifdef _WIN32
|
| 262 |
+
#pragma warning(disable:4716)
|
| 263 |
+
#endif
|
| 264 |
+
const Word& ConfusionNet::GetWord(size_t) const
|
| 265 |
+
{
|
| 266 |
+
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
|
| 267 |
+
}
|
| 268 |
+
#ifdef _WIN32
|
| 269 |
+
#pragma warning(default:4716)
|
| 270 |
+
#endif
|
| 271 |
+
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
|
| 272 |
+
{
|
| 273 |
+
cn.Print(out);
|
| 274 |
+
return out;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
TranslationOptionCollection*
|
| 278 |
+
ConfusionNet::
|
| 279 |
+
CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
| 280 |
+
{
|
| 281 |
+
// size_t maxNoTransOptPerCoverage
|
| 282 |
+
// = ttask->options()->search.max_trans_opt_per_cov;
|
| 283 |
+
// float translationOptionThreshold
|
| 284 |
+
// = ttask->options()->search.trans_opt_threshold;
|
| 285 |
+
TranslationOptionCollection *rv
|
| 286 |
+
= new TranslationOptionCollectionConfusionNet(ttask, *this);
|
| 287 |
+
//, maxNoTransOptPerCoverage, translationOptionThreshold);
|
| 288 |
+
assert(rv);
|
| 289 |
+
return rv;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
|
mosesdecoder/moses/ContextScope.h
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
| 2 |
+
// A class to store "local" information (such as task-specific caches).
|
| 3 |
+
// The idea is for each translation task to have a scope, which stores
|
| 4 |
+
// shared pointers to task-specific objects such as caches and priors.
|
| 5 |
+
// Since these objects are referenced via shared pointers, sopes can
|
| 6 |
+
// share information.
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
#ifdef WITH_THREADS
|
| 10 |
+
#include <boost/thread/shared_mutex.hpp>
|
| 11 |
+
#include <boost/thread/locks.hpp>
|
| 12 |
+
#include <boost/foreach.hpp>
|
| 13 |
+
#endif
|
| 14 |
+
|
| 15 |
+
// for some reason, the xmlrpc_c headers must be included AFTER the
|
| 16 |
+
// boost thread-related ones ...
|
| 17 |
+
#include "xmlrpc-c.h"
|
| 18 |
+
|
| 19 |
+
#include <map>
|
| 20 |
+
#include <boost/shared_ptr.hpp>
|
| 21 |
+
#include "TypeDef.h"
|
| 22 |
+
#include "Util.h"
|
| 23 |
+
|
| 24 |
+
namespace Moses
|
| 25 |
+
{
|
| 26 |
+
class ContextScope
|
| 27 |
+
{
|
| 28 |
+
protected:
|
| 29 |
+
typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
|
| 30 |
+
typedef scratchpad_t::iterator iter_t;
|
| 31 |
+
typedef scratchpad_t::value_type entry_t;
|
| 32 |
+
typedef scratchpad_t::const_iterator const_iter_t;
|
| 33 |
+
scratchpad_t m_scratchpad;
|
| 34 |
+
#ifdef WITH_THREADS
|
| 35 |
+
mutable boost::shared_mutex m_lock;
|
| 36 |
+
#endif
|
| 37 |
+
SPTR<std::map<std::string,float> const> m_context_weights;
|
| 38 |
+
public:
|
| 39 |
+
typedef boost::shared_ptr<ContextScope> ptr;
|
| 40 |
+
template<typename T>
|
| 41 |
+
boost::shared_ptr<void> const&
|
| 42 |
+
set(void const* const key, boost::shared_ptr<T> const& val) {
|
| 43 |
+
#ifdef WITH_THREADS
|
| 44 |
+
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
| 45 |
+
#endif
|
| 46 |
+
return (m_scratchpad[key] = val);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
template<typename T>
|
| 50 |
+
boost::shared_ptr<T> const
|
| 51 |
+
get(void const* key, bool CreateNewIfNecessary=false) {
|
| 52 |
+
#ifdef WITH_THREADS
|
| 53 |
+
using boost::shared_mutex;
|
| 54 |
+
using boost::upgrade_lock;
|
| 55 |
+
// T const* key = reinterpret_cast<T const*>(xkey);
|
| 56 |
+
upgrade_lock<shared_mutex> lock(m_lock);
|
| 57 |
+
#endif
|
| 58 |
+
iter_t m = m_scratchpad.find(key);
|
| 59 |
+
boost::shared_ptr< T > ret;
|
| 60 |
+
if (m != m_scratchpad.end()) {
|
| 61 |
+
if (m->second == NULL && CreateNewIfNecessary) {
|
| 62 |
+
#ifdef WITH_THREADS
|
| 63 |
+
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
| 64 |
+
#endif
|
| 65 |
+
m->second.reset(new T);
|
| 66 |
+
}
|
| 67 |
+
ret = boost::static_pointer_cast< T >(m->second);
|
| 68 |
+
return ret;
|
| 69 |
+
}
|
| 70 |
+
if (!CreateNewIfNecessary) return ret;
|
| 71 |
+
#ifdef WITH_THREADS
|
| 72 |
+
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
| 73 |
+
#endif
|
| 74 |
+
ret.reset(new T);
|
| 75 |
+
m_scratchpad[key] = ret;
|
| 76 |
+
return ret;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
ContextScope() { }
|
| 80 |
+
|
| 81 |
+
ContextScope(ContextScope const& other) {
|
| 82 |
+
#ifdef WITH_THREADS
|
| 83 |
+
boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
|
| 84 |
+
boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
|
| 85 |
+
#endif
|
| 86 |
+
m_scratchpad = other.m_scratchpad;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
SPTR<std::map<std::string,float> const>
|
| 90 |
+
GetContextWeights() {
|
| 91 |
+
return m_context_weights;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
bool
|
| 95 |
+
SetContextWeights(std::string const& spec) {
|
| 96 |
+
if (m_context_weights) return false;
|
| 97 |
+
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
| 98 |
+
SPTR<std::map<std::string,float> > M(new std::map<std::string, float>);
|
| 99 |
+
|
| 100 |
+
// TO DO; This needs to be done with StringPiece.find, not Tokenize
|
| 101 |
+
// PRIORITY: low
|
| 102 |
+
std::vector<std::string> tokens = Tokenize(spec,":");
|
| 103 |
+
for (std::vector<std::string>::iterator it = tokens.begin();
|
| 104 |
+
it != tokens.end(); it++) {
|
| 105 |
+
std::vector<std::string> key_and_value = Tokenize(*it, ",");
|
| 106 |
+
(*M)[key_and_value[0]] = atof(key_and_value[1].c_str());
|
| 107 |
+
}
|
| 108 |
+
m_context_weights = M;
|
| 109 |
+
return true;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
bool
|
| 113 |
+
SetContextWeights(SPTR<std::map<std::string,float> const> const& w) {
|
| 114 |
+
if (m_context_weights) return false;
|
| 115 |
+
#ifdef WITH_THREADS
|
| 116 |
+
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
| 117 |
+
#endif
|
| 118 |
+
m_context_weights = w;
|
| 119 |
+
return true;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
};
|
| 123 |
+
|
| 124 |
+
};
|
mosesdecoder/moses/DecodeGraph.cpp
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2006 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#include "DecodeGraph.h"
|
| 24 |
+
#include "DecodeStep.h"
|
| 25 |
+
#include "TypeDef.h"
|
| 26 |
+
#include "Util.h"
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
DecodeGraph::~DecodeGraph()
|
| 31 |
+
{
|
| 32 |
+
RemoveAllInColl(m_steps);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
//! Add another decode step to the graph
|
| 36 |
+
void DecodeGraph::Add(DecodeStep *decodeStep)
|
| 37 |
+
{
|
| 38 |
+
m_steps.push_back(decodeStep);
|
| 39 |
+
decodeStep->SetContainer(this);
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
}
|
| 43 |
+
|
mosesdecoder/moses/DecodeStep.cpp
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "DecodeStep.h"
|
| 23 |
+
#include "GenerationDictionary.h"
|
| 24 |
+
#include "StaticData.h"
|
| 25 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
DecodeStep::DecodeStep(DecodeFeature *decodeFeature,
|
| 30 |
+
const DecodeStep* prev,
|
| 31 |
+
const std::vector<FeatureFunction*> &features)
|
| 32 |
+
: m_decodeFeature(decodeFeature)
|
| 33 |
+
{
|
| 34 |
+
FactorMask prevOutputFactors;
|
| 35 |
+
if (prev) prevOutputFactors = prev->m_outputFactors;
|
| 36 |
+
m_outputFactors = prevOutputFactors;
|
| 37 |
+
FactorMask conflictMask = (m_outputFactors & decodeFeature->GetOutputFactorMask());
|
| 38 |
+
m_outputFactors |= decodeFeature->GetOutputFactorMask();
|
| 39 |
+
FactorMask newOutputFactorMask = m_outputFactors ^ prevOutputFactors; //xor
|
| 40 |
+
m_newOutputFactors.resize(newOutputFactorMask.count());
|
| 41 |
+
m_conflictFactors.resize(conflictMask.count());
|
| 42 |
+
size_t j=0, k=0;
|
| 43 |
+
for (size_t i = 0; i < MAX_NUM_FACTORS; i++) {
|
| 44 |
+
if (newOutputFactorMask[i]) m_newOutputFactors[j++] = i;
|
| 45 |
+
if (conflictMask[i]) m_conflictFactors[k++] = i;
|
| 46 |
+
}
|
| 47 |
+
VERBOSE(2,"DecodeStep():\n\toutputFactors=" << m_outputFactors
|
| 48 |
+
<< "\n\tconflictFactors=" << conflictMask
|
| 49 |
+
<< "\n\tnewOutputFactors=" << newOutputFactorMask << std::endl);
|
| 50 |
+
|
| 51 |
+
// find out which feature function can be applied in this decode step
|
| 52 |
+
for (size_t i = 0; i < features.size(); ++i) {
|
| 53 |
+
FeatureFunction *feature = features[i];
|
| 54 |
+
if (feature->IsUseable(m_outputFactors)) {
|
| 55 |
+
m_featuresToApply.push_back(feature);
|
| 56 |
+
} else {
|
| 57 |
+
m_featuresRemaining.push_back(feature);
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
decodeFeature->SetContainer(this);
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
DecodeStep::~DecodeStep() {}
|
| 65 |
+
|
| 66 |
+
/** returns phrase feature (dictionary) for translation step */
|
| 67 |
+
const PhraseDictionary* DecodeStep::GetPhraseDictionaryFeature() const
|
| 68 |
+
{
|
| 69 |
+
return dynamic_cast<const PhraseDictionary*>(m_decodeFeature);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/** returns generation feature (dictionary) for generation step */
|
| 73 |
+
const GenerationDictionary* DecodeStep::GetGenerationDictionaryFeature() const
|
| 74 |
+
{
|
| 75 |
+
return dynamic_cast<const GenerationDictionary*>(m_decodeFeature);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
void DecodeStep::RemoveFeature(const FeatureFunction *ff)
|
| 79 |
+
{
|
| 80 |
+
for (size_t i = 0; i < m_featuresToApply.size(); ++i) {
|
| 81 |
+
if (ff == m_featuresToApply[i]) {
|
| 82 |
+
m_featuresToApply.erase(m_featuresToApply.begin() + i);
|
| 83 |
+
return;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
mosesdecoder/moses/DecodeStepGeneration.h
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_DecodeStepGeneration_h
|
| 23 |
+
#define moses_DecodeStepGeneration_h
|
| 24 |
+
|
| 25 |
+
#include "DecodeStep.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
|
| 30 |
+
class GenerationDictionary;
|
| 31 |
+
class Phrase;
|
| 32 |
+
class ScoreComponentCollection;
|
| 33 |
+
|
| 34 |
+
//! subclass of DecodeStep for generation step
|
| 35 |
+
class DecodeStepGeneration : public DecodeStep
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
DecodeStepGeneration(GenerationDictionary* dict,
|
| 39 |
+
const DecodeStep* prev,
|
| 40 |
+
const std::vector<FeatureFunction*> &features);
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
void Process(const TranslationOption &inputPartialTranslOpt
|
| 44 |
+
, const DecodeStep &decodeStep
|
| 45 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 46 |
+
, TranslationOptionCollection *toc
|
| 47 |
+
, bool adhereTableLimit) const;
|
| 48 |
+
|
| 49 |
+
private:
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
#endif
|
mosesdecoder/moses/DecodeStepTranslation.cpp
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "DecodeStepTranslation.h"
|
| 23 |
+
#include "TranslationOption.h"
|
| 24 |
+
#include "TranslationOptionCollection.h"
|
| 25 |
+
#include "PartialTranslOptColl.h"
|
| 26 |
+
#include "FactorCollection.h"
|
| 27 |
+
#include "util/exception.hh"
|
| 28 |
+
|
| 29 |
+
using namespace std;
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* pdf,
|
| 34 |
+
const DecodeStep* prev,
|
| 35 |
+
const std::vector<FeatureFunction*> &features)
|
| 36 |
+
: DecodeStep(pdf, prev, features)
|
| 37 |
+
{
|
| 38 |
+
// don't apply feature functions that are from current phrase table.It should already have been
|
| 39 |
+
// dont by the phrase table.
|
| 40 |
+
const std::vector<FeatureFunction*> &pdfFeatures = pdf->GetFeaturesToApply();
|
| 41 |
+
for (size_t i = 0; i < pdfFeatures.size(); ++i) {
|
| 42 |
+
FeatureFunction *ff = pdfFeatures[i];
|
| 43 |
+
RemoveFeature(ff);
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
|
| 48 |
+
, const DecodeStep &decodeStep
|
| 49 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 50 |
+
, TranslationOptionCollection *toc
|
| 51 |
+
, bool adhereTableLimit
|
| 52 |
+
, TargetPhraseCollection::shared_ptr phraseColl) const
|
| 53 |
+
{
|
| 54 |
+
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
| 55 |
+
// word deletion
|
| 56 |
+
outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
|
| 57 |
+
return;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
// normal trans step
|
| 61 |
+
const Range &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
|
| 62 |
+
const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
|
| 63 |
+
const PhraseDictionary* phraseDictionary =
|
| 64 |
+
decodeStep.GetPhraseDictionaryFeature();
|
| 65 |
+
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
| 66 |
+
const size_t currSize = inPhrase.GetSize();
|
| 67 |
+
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
| 68 |
+
|
| 69 |
+
if (phraseColl != NULL) {
|
| 70 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 71 |
+
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
|
| 72 |
+
|
| 73 |
+
for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase) {
|
| 74 |
+
const TargetPhrase& targetPhrase = **iterTargetPhrase;
|
| 75 |
+
// const ScoreComponentCollection &transScores = targetPhrase.GetScoreBreakdown();
|
| 76 |
+
// skip if the
|
| 77 |
+
if (targetPhrase.GetSize() != currSize) continue;
|
| 78 |
+
|
| 79 |
+
TargetPhrase outPhrase(inPhrase);
|
| 80 |
+
|
| 81 |
+
if (IsFilteringStep()) {
|
| 82 |
+
if (!inputPartialTranslOpt.IsCompatible(targetPhrase, m_conflictFactors))
|
| 83 |
+
continue;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
| 87 |
+
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
| 88 |
+
|
| 89 |
+
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
|
| 90 |
+
assert(newTransOpt != NULL);
|
| 91 |
+
|
| 92 |
+
newTransOpt->SetInputPath(inputPath);
|
| 93 |
+
|
| 94 |
+
outputPartialTranslOptColl.Add(newTransOpt );
|
| 95 |
+
|
| 96 |
+
}
|
| 97 |
+
} else if (sourceWordsRange.GetNumWordsCovered() == 1) {
|
| 98 |
+
// unknown handler
|
| 99 |
+
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
void
|
| 104 |
+
DecodeStepTranslation::
|
| 105 |
+
ProcessInitialTranslation(InputType const& source,
|
| 106 |
+
PartialTranslOptColl &outputPartialTranslOptColl,
|
| 107 |
+
size_t startPos, size_t endPos,
|
| 108 |
+
bool adhereTableLimit,
|
| 109 |
+
InputPath const& inputPath,
|
| 110 |
+
TargetPhraseCollection::shared_ptr phraseColl) const
|
| 111 |
+
{
|
| 112 |
+
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
| 113 |
+
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
| 114 |
+
|
| 115 |
+
const Range range(startPos, endPos);
|
| 116 |
+
|
| 117 |
+
if (phraseColl != NULL) {
|
| 118 |
+
IFVERBOSE(3) {
|
| 119 |
+
if(source.GetType() == SentenceInput)
|
| 120 |
+
TRACE_ERR("[" << source.GetSubString(range) << "; "
|
| 121 |
+
<< startPos << "-" << endPos << "]\n");
|
| 122 |
+
else
|
| 123 |
+
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 127 |
+
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
|
| 128 |
+
|
| 129 |
+
for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase) {
|
| 130 |
+
const TargetPhrase &targetPhrase = **iterTargetPhrase;
|
| 131 |
+
TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
|
| 132 |
+
|
| 133 |
+
transOpt->SetInputPath(inputPath);
|
| 134 |
+
|
| 135 |
+
outputPartialTranslOptColl.Add (transOpt);
|
| 136 |
+
|
| 137 |
+
VERBOSE(3,"\t" << targetPhrase << "\n");
|
| 138 |
+
}
|
| 139 |
+
VERBOSE(3,std::endl);
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
void
|
| 144 |
+
DecodeStepTranslation::
|
| 145 |
+
ProcessInitialTransLEGACY(InputType const& source,
|
| 146 |
+
PartialTranslOptColl &outputPartialTranslOptColl,
|
| 147 |
+
size_t startPos, size_t endPos,
|
| 148 |
+
bool adhereTableLimit,
|
| 149 |
+
InputPathList const& inputPathList) const
|
| 150 |
+
{
|
| 151 |
+
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
| 152 |
+
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
| 153 |
+
|
| 154 |
+
const Range range(startPos, endPos);
|
| 155 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
| 156 |
+
= phraseDictionary->GetTargetPhraseCollectionLEGACY(source,range);
|
| 157 |
+
|
| 158 |
+
if (phraseColl != NULL) {
|
| 159 |
+
IFVERBOSE(3) {
|
| 160 |
+
if(source.GetType() == SentenceInput)
|
| 161 |
+
TRACE_ERR("[" << source.GetSubString(range) << "; "
|
| 162 |
+
<< startPos << "-" << endPos << "]\n");
|
| 163 |
+
else
|
| 164 |
+
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
const std::vector<Phrase> &sourcePhrases = phraseColl->GetSourcePhrases();
|
| 168 |
+
|
| 169 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 170 |
+
std::vector<Phrase>::const_iterator iterSourcePhrase;
|
| 171 |
+
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
|
| 172 |
+
|
| 173 |
+
for (iterTargetPhrase = phraseColl->begin(), iterSourcePhrase = sourcePhrases.begin()
|
| 174 |
+
; iterTargetPhrase != iterEnd
|
| 175 |
+
; ++iterTargetPhrase, ++iterSourcePhrase) {
|
| 176 |
+
assert(iterSourcePhrase != sourcePhrases.end());
|
| 177 |
+
|
| 178 |
+
const TargetPhrase &targetPhrase = **iterTargetPhrase;
|
| 179 |
+
const Phrase &sourcePhrase = *iterSourcePhrase;
|
| 180 |
+
|
| 181 |
+
const InputPath &inputPath = GetInputPathLEGACY(targetPhrase, sourcePhrase, inputPathList);
|
| 182 |
+
|
| 183 |
+
TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
|
| 184 |
+
transOpt->SetInputPath(inputPath);
|
| 185 |
+
|
| 186 |
+
outputPartialTranslOptColl.Add (transOpt);
|
| 187 |
+
|
| 188 |
+
VERBOSE(3,"\t" << targetPhrase << "\n");
|
| 189 |
+
}
|
| 190 |
+
VERBOSE(3,std::endl);
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
|
| 195 |
+
const TargetPhrase targetPhrase,
|
| 196 |
+
const Phrase sourcePhrase,
|
| 197 |
+
const InputPathList &inputPathList) const
|
| 198 |
+
{
|
| 199 |
+
const Word &wordFromPt = sourcePhrase.GetWord(0);
|
| 200 |
+
|
| 201 |
+
InputPathList::const_iterator iter;
|
| 202 |
+
for (iter = inputPathList.begin(); iter != inputPathList.end(); ++iter) {
|
| 203 |
+
const InputPath &inputPath = **iter;
|
| 204 |
+
const Phrase &phraseFromIP = inputPath.GetPhrase();
|
| 205 |
+
|
| 206 |
+
const Word *wordIP = NULL;
|
| 207 |
+
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
|
| 208 |
+
const Word &tempWord = phraseFromIP.GetWord(i);
|
| 209 |
+
if (!tempWord.IsEpsilon()) {
|
| 210 |
+
wordIP = &tempWord;
|
| 211 |
+
break;
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
// const Range &range = inputPath.GetWordsRange();
|
| 216 |
+
|
| 217 |
+
if (wordIP && *wordIP == wordFromPt) {
|
| 218 |
+
return inputPath;
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
UTIL_THROW(util::Exception, "Input path not found");
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
void
|
| 226 |
+
DecodeStepTranslation::
|
| 227 |
+
ProcessLEGACY(TranslationOption const& in,
|
| 228 |
+
DecodeStep const& decodeStep,
|
| 229 |
+
PartialTranslOptColl &out,
|
| 230 |
+
TranslationOptionCollection *toc,
|
| 231 |
+
bool adhereTableLimit) const
|
| 232 |
+
{
|
| 233 |
+
if (in.GetTargetPhrase().GetSize() == 0) {
|
| 234 |
+
// word deletion
|
| 235 |
+
out.Add(new TranslationOption(in));
|
| 236 |
+
return;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
// normal trans step
|
| 240 |
+
Range const& srcRange = in.GetSourceWordsRange();
|
| 241 |
+
InputPath const& inputPath = in.GetInputPath();
|
| 242 |
+
PhraseDictionary const* pdict = decodeStep.GetPhraseDictionaryFeature();
|
| 243 |
+
TargetPhrase const& inPhrase = in.GetTargetPhrase();
|
| 244 |
+
size_t const currSize = inPhrase.GetSize();
|
| 245 |
+
size_t const tableLimit = pdict->GetTableLimit();
|
| 246 |
+
|
| 247 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
| 248 |
+
= pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
| 249 |
+
|
| 250 |
+
if (phraseColl != NULL) {
|
| 251 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 252 |
+
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
|
| 253 |
+
? phraseColl->begin() + tableLimit : phraseColl->end());
|
| 254 |
+
|
| 255 |
+
for (iterTargetPhrase = phraseColl->begin();
|
| 256 |
+
iterTargetPhrase != iterEnd;
|
| 257 |
+
++iterTargetPhrase) {
|
| 258 |
+
TargetPhrase const& targetPhrase = **iterTargetPhrase;
|
| 259 |
+
if (targetPhrase.GetSize() != currSize ||
|
| 260 |
+
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
|
| 261 |
+
continue;
|
| 262 |
+
|
| 263 |
+
TargetPhrase outPhrase(inPhrase);
|
| 264 |
+
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
| 265 |
+
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
| 266 |
+
|
| 267 |
+
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
|
| 268 |
+
assert(newTransOpt != NULL);
|
| 269 |
+
|
| 270 |
+
newTransOpt->SetInputPath(inputPath);
|
| 271 |
+
|
| 272 |
+
out.Add(newTransOpt);
|
| 273 |
+
|
| 274 |
+
}
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
|
mosesdecoder/moses/DecodeStepTranslation.h
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_DecodeStepTranslation_h
|
| 23 |
+
#define moses_DecodeStepTranslation_h
|
| 24 |
+
|
| 25 |
+
#include "DecodeStep.h"
|
| 26 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 27 |
+
#include "InputPath.h"
|
| 28 |
+
|
| 29 |
+
namespace Moses
|
| 30 |
+
{
|
| 31 |
+
|
| 32 |
+
class PhraseDictionary;
|
| 33 |
+
class TargetPhrase;
|
| 34 |
+
class InputPath;
|
| 35 |
+
|
| 36 |
+
//! subclass of DecodeStep for translation step
|
| 37 |
+
class DecodeStepTranslation : public DecodeStep
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
DecodeStepTranslation(); //! not implemented
|
| 41 |
+
DecodeStepTranslation(PhraseDictionary* phraseFeature,
|
| 42 |
+
const DecodeStep* prev,
|
| 43 |
+
const std::vector<FeatureFunction*> &features);
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
virtual void Process(const TranslationOption &inputPartialTranslOpt
|
| 47 |
+
, const DecodeStep &decodeStep
|
| 48 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 49 |
+
, TranslationOptionCollection *toc
|
| 50 |
+
, bool adhereTableLimit
|
| 51 |
+
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
/*! initialize list of partial translation options by applying the first translation step
|
| 55 |
+
* Ideally, this function should be in DecodeStepTranslation class
|
| 56 |
+
*/
|
| 57 |
+
void ProcessInitialTranslation(const InputType &source
|
| 58 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 59 |
+
, size_t startPos, size_t endPos, bool adhereTableLimit
|
| 60 |
+
, const InputPath &inputPath
|
| 61 |
+
, TargetPhraseCollection::shared_ptr phraseColl) const;
|
| 62 |
+
|
| 63 |
+
// legacy
|
| 64 |
+
void
|
| 65 |
+
ProcessInitialTransLEGACY(InputType const& source,
|
| 66 |
+
PartialTranslOptColl &outputPartialTranslOptColl,
|
| 67 |
+
size_t startPos, size_t endPos,
|
| 68 |
+
bool adhereTableLimit,
|
| 69 |
+
InputPathList const& inputPathList) const;
|
| 70 |
+
|
| 71 |
+
void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt
|
| 72 |
+
, const DecodeStep &decodeStep
|
| 73 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 74 |
+
, TranslationOptionCollection *toc
|
| 75 |
+
, bool adhereTableLimit) const;
|
| 76 |
+
|
| 77 |
+
private:
|
| 78 |
+
// I'm not sure whether this actually works or not for binary phrase table.
|
| 79 |
+
// The source phrase only appears to contain the 1st word, therefore, this function
|
| 80 |
+
// only compares the 1st word
|
| 81 |
+
const InputPath &GetInputPathLEGACY(const TargetPhrase targetPhrase,
|
| 82 |
+
const Phrase sourcePhrase,
|
| 83 |
+
const InputPathList &inputPathList) const;
|
| 84 |
+
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
}
|
| 89 |
+
#endif
|
mosesdecoder/moses/Factor.cpp
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "Factor.h"
|
| 23 |
+
|
| 24 |
+
#include <boost/functional/hash.hpp>
|
| 25 |
+
|
| 26 |
+
using namespace std;
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
TO_STRING_BODY(Factor)
|
| 32 |
+
|
| 33 |
+
// friend
|
| 34 |
+
ostream& operator<<(ostream& out, const Factor& factor)
|
| 35 |
+
{
|
| 36 |
+
out << factor.GetString();
|
| 37 |
+
return out;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
size_t hash_value(const Factor& f)
|
| 41 |
+
{
|
| 42 |
+
boost::hash<size_t> hasher;
|
| 43 |
+
return hasher(f.GetId());
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
mosesdecoder/moses/ForestInput.h
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- c++ -*-
|
| 2 |
+
#ifndef moses_ForestInput_h
|
| 3 |
+
#define moses_ForestInput_h
|
| 4 |
+
|
| 5 |
+
#include <string>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
#include <boost/shared_ptr.hpp>
|
| 9 |
+
|
| 10 |
+
#include <util/string_piece.hh>
|
| 11 |
+
|
| 12 |
+
#include "moses/Syntax/F2S/Forest.h"
|
| 13 |
+
|
| 14 |
+
#include "Sentence.h"
|
| 15 |
+
|
| 16 |
+
namespace Moses
|
| 17 |
+
{
|
| 18 |
+
class TranslationTask;
|
| 19 |
+
class ForestInput : public Sentence
|
| 20 |
+
{
|
| 21 |
+
public:
|
| 22 |
+
friend std::ostream &operator<<(std::ostream&, const ForestInput &);
|
| 23 |
+
|
| 24 |
+
ForestInput(AllOptions::ptr const& opts) : Sentence(opts), m_rootVertex(NULL) {}
|
| 25 |
+
|
| 26 |
+
InputTypeEnum GetType() const {
|
| 27 |
+
return ForestInputType;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
//! populate this InputType with data from in stream
|
| 31 |
+
virtual int
|
| 32 |
+
Read(std::istream& in);
|
| 33 |
+
|
| 34 |
+
//! Output debugging info to stream out
|
| 35 |
+
virtual void Print(std::ostream&) const;
|
| 36 |
+
|
| 37 |
+
//! create trans options specific to this InputType
|
| 38 |
+
virtual TranslationOptionCollection*
|
| 39 |
+
CreateTranslationOptionCollection() const;
|
| 40 |
+
|
| 41 |
+
boost::shared_ptr<const Syntax::F2S::Forest> GetForest() const {
|
| 42 |
+
return m_forest;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
const Syntax::F2S::Forest::Vertex *GetRootVertex() const {
|
| 46 |
+
return m_rootVertex;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
private:
|
| 50 |
+
typedef Syntax::F2S::Forest Forest;
|
| 51 |
+
|
| 52 |
+
struct VertexSetHash {
|
| 53 |
+
std::size_t operator()(const Forest::Vertex *v) const {
|
| 54 |
+
std::size_t seed = 0;
|
| 55 |
+
boost::hash_combine(seed, v->pvertex.symbol);
|
| 56 |
+
boost::hash_combine(seed, v->pvertex.span.GetStartPos());
|
| 57 |
+
boost::hash_combine(seed, v->pvertex.span.GetEndPos());
|
| 58 |
+
return seed;
|
| 59 |
+
}
|
| 60 |
+
};
|
| 61 |
+
|
| 62 |
+
struct VertexSetPred {
|
| 63 |
+
bool operator()(const Forest::Vertex *v, const Forest::Vertex *w) const {
|
| 64 |
+
return v->pvertex == w->pvertex;
|
| 65 |
+
}
|
| 66 |
+
};
|
| 67 |
+
|
| 68 |
+
typedef boost::unordered_set<Forest::Vertex *, VertexSetHash,
|
| 69 |
+
VertexSetPred> VertexSet;
|
| 70 |
+
|
| 71 |
+
Forest::Vertex *AddOrDeleteVertex(Forest::Vertex *);
|
| 72 |
+
|
| 73 |
+
std::size_t FindMaxEnd(const Forest &);
|
| 74 |
+
|
| 75 |
+
void FindTopVertices(Forest &, std::vector<Forest::Vertex *> &);
|
| 76 |
+
|
| 77 |
+
void ParseHyperedgeLine(const std::string &);
|
| 78 |
+
|
| 79 |
+
Forest::Vertex *ParseVertex(const StringPiece &);
|
| 80 |
+
|
| 81 |
+
boost::shared_ptr<Forest> m_forest;
|
| 82 |
+
Forest::Vertex *m_rootVertex;
|
| 83 |
+
VertexSet m_vertexSet;
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
} // namespace Moses
|
| 87 |
+
|
| 88 |
+
#endif
|
mosesdecoder/moses/GenerationDictionary.h
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_GenerationDictionary_h
|
| 23 |
+
#define moses_GenerationDictionary_h
|
| 24 |
+
|
| 25 |
+
#include <list>
|
| 26 |
+
#include <stdexcept>
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include <boost/unordered_map.hpp>
|
| 29 |
+
#include "ScoreComponentCollection.h"
|
| 30 |
+
#include "Phrase.h"
|
| 31 |
+
#include "TypeDef.h"
|
| 32 |
+
#include "moses/FF/DecodeFeature.h"
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
class FactorCollection;
|
| 38 |
+
|
| 39 |
+
typedef boost::unordered_map < Word , ScoreComponentCollection > OutputWordCollection;
|
| 40 |
+
// 1st = output phrase
|
| 41 |
+
// 2nd = log probability (score)
|
| 42 |
+
|
| 43 |
+
/** Implementation of a generation table in a trie.
|
| 44 |
+
*/
|
| 45 |
+
class GenerationDictionary : public DecodeFeature
|
| 46 |
+
{
|
| 47 |
+
typedef boost::unordered_map<const Word* , OutputWordCollection, UnorderedComparer<Word>, UnorderedComparer<Word> > Collection;
|
| 48 |
+
protected:
|
| 49 |
+
static std::vector<GenerationDictionary*> s_staticColl;
|
| 50 |
+
|
| 51 |
+
Collection m_collection;
|
| 52 |
+
// 1st = source
|
| 53 |
+
// 2nd = target
|
| 54 |
+
std::string m_filePath;
|
| 55 |
+
|
| 56 |
+
public:
|
| 57 |
+
static const std::vector<GenerationDictionary*>& GetColl() {
|
| 58 |
+
return s_staticColl;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
GenerationDictionary(const std::string &line);
|
| 62 |
+
virtual ~GenerationDictionary();
|
| 63 |
+
|
| 64 |
+
//! load data file
|
| 65 |
+
void Load(AllOptions::ptr const& opts);
|
| 66 |
+
|
| 67 |
+
/** number of unique input entries in the generation table.
|
| 68 |
+
* NOT the number of lines in the generation table
|
| 69 |
+
*/
|
| 70 |
+
size_t GetSize() const {
|
| 71 |
+
return m_collection.size();
|
| 72 |
+
}
|
| 73 |
+
/** returns a bag of output words, OutputWordCollection, for a particular input word.
|
| 74 |
+
* Or NULL if the input word isn't found. The search function used is the WordComparer functor
|
| 75 |
+
*/
|
| 76 |
+
const OutputWordCollection *FindWord(const Word &word) const;
|
| 77 |
+
void SetParameter(const std::string& key, const std::string& value);
|
| 78 |
+
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
}
|
| 83 |
+
#endif
|
mosesdecoder/moses/HypothesisStackCubePruning.cpp
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <set>
|
| 24 |
+
#include <queue>
|
| 25 |
+
#include "HypothesisStackCubePruning.h"
|
| 26 |
+
#include "TypeDef.h"
|
| 27 |
+
#include "Util.h"
|
| 28 |
+
#include "StaticData.h"
|
| 29 |
+
#include "Manager.h"
|
| 30 |
+
#include "util/exception.hh"
|
| 31 |
+
|
| 32 |
+
using namespace std;
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
|
| 37 |
+
HypothesisStack(manager)
|
| 38 |
+
{
|
| 39 |
+
m_nBestIsEnabled = manager.options()->nbest.enabled;
|
| 40 |
+
m_bestScore = -std::numeric_limits<float>::infinity();
|
| 41 |
+
m_worstScore = -std::numeric_limits<float>::infinity();
|
| 42 |
+
m_deterministic = manager.options()->cube.deterministic_search;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/** remove all hypotheses from the collection */
|
| 46 |
+
void HypothesisStackCubePruning::RemoveAll()
|
| 47 |
+
{
|
| 48 |
+
// delete all bitmap accessors;
|
| 49 |
+
_BMType::iterator iter;
|
| 50 |
+
for (iter = m_bitmapAccessor.begin(); iter != m_bitmapAccessor.end(); ++iter) {
|
| 51 |
+
delete iter->second;
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
pair<HypothesisStackCubePruning::iterator, bool> HypothesisStackCubePruning::Add(Hypothesis *hypo)
|
| 56 |
+
{
|
| 57 |
+
std::pair<iterator, bool> ret = m_hypos.insert(hypo);
|
| 58 |
+
|
| 59 |
+
if (ret.second) {
|
| 60 |
+
// equiv hypo doesn't exists
|
| 61 |
+
VERBOSE(3,"added hyp to stack");
|
| 62 |
+
|
| 63 |
+
// Update best score, if this hypothesis is new best
|
| 64 |
+
if (hypo->GetFutureScore() > m_bestScore) {
|
| 65 |
+
VERBOSE(3,", best on stack");
|
| 66 |
+
m_bestScore = hypo->GetFutureScore();
|
| 67 |
+
// this may also affect the worst score
|
| 68 |
+
if ( m_bestScore + m_beamWidth > m_worstScore )
|
| 69 |
+
m_worstScore = m_bestScore + m_beamWidth;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
// Prune only if stack is twice as big as needed (lazy pruning)
|
| 73 |
+
VERBOSE(3,", now size " << m_hypos.size());
|
| 74 |
+
if (m_hypos.size() > 2*m_maxHypoStackSize-1) {
|
| 75 |
+
PruneToSize(m_maxHypoStackSize);
|
| 76 |
+
} else {
|
| 77 |
+
VERBOSE(3,std::endl);
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
return ret;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
|
| 85 |
+
{
|
| 86 |
+
if (hypo->GetFutureScore() == - std::numeric_limits<float>::infinity()) {
|
| 87 |
+
m_manager.GetSentenceStats().AddDiscarded();
|
| 88 |
+
VERBOSE(3,"discarded, constraint" << std::endl);
|
| 89 |
+
delete hypo;
|
| 90 |
+
return false;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
if (hypo->GetFutureScore() < m_worstScore) {
|
| 94 |
+
// too bad for stack. don't bother adding hypo into collection
|
| 95 |
+
m_manager.GetSentenceStats().AddDiscarded();
|
| 96 |
+
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
| 97 |
+
delete hypo;
|
| 98 |
+
return false;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
// over threshold, try to add to collection
|
| 102 |
+
std::pair<iterator, bool> addRet = Add(hypo);
|
| 103 |
+
if (addRet.second) {
|
| 104 |
+
// nothing found. add to collection
|
| 105 |
+
return true;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// equiv hypo exists, recombine with other hypo
|
| 109 |
+
iterator &iterExisting = addRet.first;
|
| 110 |
+
assert(iterExisting != m_hypos.end());
|
| 111 |
+
Hypothesis *hypoExisting = *iterExisting;
|
| 112 |
+
|
| 113 |
+
m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
| 114 |
+
|
| 115 |
+
// found existing hypo with same target ending.
|
| 116 |
+
// keep the best 1
|
| 117 |
+
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
| 118 |
+
// incoming hypo is better than the one we have
|
| 119 |
+
VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
|
| 120 |
+
if (m_nBestIsEnabled) {
|
| 121 |
+
hypo->AddArc(hypoExisting);
|
| 122 |
+
Detach(iterExisting);
|
| 123 |
+
} else {
|
| 124 |
+
Remove(iterExisting);
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
bool added = Add(hypo).second;
|
| 128 |
+
if (!added) {
|
| 129 |
+
iterExisting = m_hypos.find(hypo);
|
| 130 |
+
UTIL_THROW(util::Exception, "Should have added hypothesis " << **iterExisting);
|
| 131 |
+
}
|
| 132 |
+
return false;
|
| 133 |
+
} else {
|
| 134 |
+
// already storing the best hypo. discard current hypo
|
| 135 |
+
VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
|
| 136 |
+
if (m_nBestIsEnabled) {
|
| 137 |
+
hypoExisting->AddArc(hypo);
|
| 138 |
+
} else {
|
| 139 |
+
delete hypo;
|
| 140 |
+
}
|
| 141 |
+
return false;
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
void HypothesisStackCubePruning::AddInitial(Hypothesis *hypo)
|
| 146 |
+
{
|
| 147 |
+
std::pair<iterator, bool> addRet = Add(hypo);
|
| 148 |
+
UTIL_THROW_IF2(!addRet.second,
|
| 149 |
+
"Should have added hypothesis " << *hypo);
|
| 150 |
+
|
| 151 |
+
const Bitmap &bitmap = hypo->GetWordsBitmap();
|
| 152 |
+
AddBitmapContainer(bitmap, *this);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
void HypothesisStackCubePruning::PruneToSize(size_t newSize)
|
| 156 |
+
{
|
| 157 |
+
if ( newSize == 0) return; // no limit
|
| 158 |
+
|
| 159 |
+
if (m_hypos.size() > newSize) { // ok, if not over the limit
|
| 160 |
+
priority_queue<float> bestScores;
|
| 161 |
+
|
| 162 |
+
// push all scores to a heap
|
| 163 |
+
// (but never push scores below m_bestScore+m_beamWidth)
|
| 164 |
+
iterator iter = m_hypos.begin();
|
| 165 |
+
float score = 0;
|
| 166 |
+
while (iter != m_hypos.end()) {
|
| 167 |
+
Hypothesis *hypo = *iter;
|
| 168 |
+
score = hypo->GetFutureScore();
|
| 169 |
+
if (score > m_bestScore+m_beamWidth) {
|
| 170 |
+
bestScores.push(score);
|
| 171 |
+
}
|
| 172 |
+
++iter;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
// pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)
|
| 176 |
+
// ensure to never pop beyond heap size
|
| 177 |
+
size_t minNewSizeHeapSize = newSize > bestScores.size() ? bestScores.size() : newSize;
|
| 178 |
+
for (size_t i = 1 ; i < minNewSizeHeapSize ; i++)
|
| 179 |
+
bestScores.pop();
|
| 180 |
+
|
| 181 |
+
// and remember the threshold
|
| 182 |
+
float scoreThreshold = bestScores.top();
|
| 183 |
+
|
| 184 |
+
// delete all hypos under score threshold
|
| 185 |
+
iter = m_hypos.begin();
|
| 186 |
+
while (iter != m_hypos.end()) {
|
| 187 |
+
Hypothesis *hypo = *iter;
|
| 188 |
+
float score = hypo->GetFutureScore();
|
| 189 |
+
if (score < scoreThreshold) {
|
| 190 |
+
iterator iterRemove = iter++;
|
| 191 |
+
Remove(iterRemove);
|
| 192 |
+
m_manager.GetSentenceStats().AddPruning();
|
| 193 |
+
} else {
|
| 194 |
+
++iter;
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
VERBOSE(3,", pruned to size " << size() << endl);
|
| 198 |
+
|
| 199 |
+
IFVERBOSE(3) {
|
| 200 |
+
TRACE_ERR("stack now contains: ");
|
| 201 |
+
for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) {
|
| 202 |
+
Hypothesis *hypo = *iter;
|
| 203 |
+
TRACE_ERR( hypo->GetId() << " (" << hypo->GetFutureScore() << ") ");
|
| 204 |
+
}
|
| 205 |
+
TRACE_ERR( endl);
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
|
| 209 |
+
m_worstScore = scoreThreshold;
|
| 210 |
+
}
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
const Hypothesis *HypothesisStackCubePruning::GetBestHypothesis() const
|
| 214 |
+
{
|
| 215 |
+
if (!m_hypos.empty()) {
|
| 216 |
+
const_iterator iter = m_hypos.begin();
|
| 217 |
+
Hypothesis *bestHypo = *iter;
|
| 218 |
+
while (++iter != m_hypos.end()) {
|
| 219 |
+
Hypothesis *hypo = *iter;
|
| 220 |
+
if (hypo->GetFutureScore() > bestHypo->GetFutureScore())
|
| 221 |
+
bestHypo = hypo;
|
| 222 |
+
}
|
| 223 |
+
return bestHypo;
|
| 224 |
+
}
|
| 225 |
+
return NULL;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
vector<const Hypothesis*> HypothesisStackCubePruning::GetSortedList() const
|
| 229 |
+
{
|
| 230 |
+
vector<const Hypothesis*> ret;
|
| 231 |
+
ret.reserve(m_hypos.size());
|
| 232 |
+
std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
|
| 233 |
+
sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
|
| 234 |
+
|
| 235 |
+
return ret;
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
void HypothesisStackCubePruning::CleanupArcList()
|
| 240 |
+
{
|
| 241 |
+
// only necessary if n-best calculations are enabled
|
| 242 |
+
if (!m_nBestIsEnabled) return;
|
| 243 |
+
|
| 244 |
+
iterator iter;
|
| 245 |
+
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
|
| 246 |
+
Hypothesis *mainHypo = *iter;
|
| 247 |
+
mainHypo->CleanupArcList(this->m_manager.options()->nbest.nbest_size, this->m_manager.options()->NBestDistinct());
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
void HypothesisStackCubePruning::SetBitmapAccessor(const Bitmap &newBitmap
|
| 252 |
+
, HypothesisStackCubePruning &stack
|
| 253 |
+
, const Range &/*range*/
|
| 254 |
+
, BitmapContainer &bitmapContainer
|
| 255 |
+
, const SquareMatrix &estimatedScores
|
| 256 |
+
, const TranslationOptionList &transOptList)
|
| 257 |
+
{
|
| 258 |
+
BitmapContainer *bmContainer = AddBitmapContainer(newBitmap, stack);
|
| 259 |
+
BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
|
| 260 |
+
, *bmContainer
|
| 261 |
+
, transOptList
|
| 262 |
+
, estimatedScores
|
| 263 |
+
, m_manager.GetSource()
|
| 264 |
+
, m_deterministic);
|
| 265 |
+
bmContainer->AddBackwardsEdge(edge);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
TO_STRING_BODY(HypothesisStackCubePruning);
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
// friend
|
| 273 |
+
std::ostream& operator<<(std::ostream& out, const HypothesisStackCubePruning& hypoColl)
|
| 274 |
+
{
|
| 275 |
+
HypothesisStackCubePruning::const_iterator iter;
|
| 276 |
+
|
| 277 |
+
for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter) {
|
| 278 |
+
const Hypothesis &hypo = **iter;
|
| 279 |
+
out << hypo << endl;
|
| 280 |
+
|
| 281 |
+
}
|
| 282 |
+
return out;
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
void
|
| 286 |
+
HypothesisStackCubePruning::AddHypothesesToBitmapContainers()
|
| 287 |
+
{
|
| 288 |
+
HypothesisStackCubePruning::const_iterator iter;
|
| 289 |
+
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
|
| 290 |
+
Hypothesis *h = *iter;
|
| 291 |
+
const Bitmap &bitmap = h->GetWordsBitmap();
|
| 292 |
+
BitmapContainer *container = m_bitmapAccessor[&bitmap];
|
| 293 |
+
container->AddHypothesis(h);
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
BitmapContainer *HypothesisStackCubePruning::AddBitmapContainer(const Bitmap &bitmap, HypothesisStackCubePruning &stack)
|
| 298 |
+
{
|
| 299 |
+
_BMType::iterator iter = m_bitmapAccessor.find(&bitmap);
|
| 300 |
+
|
| 301 |
+
BitmapContainer *bmContainer;
|
| 302 |
+
if (iter == m_bitmapAccessor.end()) {
|
| 303 |
+
bmContainer = new BitmapContainer(bitmap, stack, m_deterministic);
|
| 304 |
+
m_bitmapAccessor[&bitmap] = bmContainer;
|
| 305 |
+
} else {
|
| 306 |
+
bmContainer = iter->second;
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
return bmContainer;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
}
|
| 313 |
+
|
mosesdecoder/moses/HypothesisStackCubePruning.h
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_HypothesisStackCubePruning_h
|
| 23 |
+
#define moses_HypothesisStackCubePruning_h
|
| 24 |
+
|
| 25 |
+
#include <limits>
|
| 26 |
+
#include <set>
|
| 27 |
+
#include <boost/unordered_map.hpp>
|
| 28 |
+
#include "Hypothesis.h"
|
| 29 |
+
#include "BitmapContainer.h"
|
| 30 |
+
#include "HypothesisStack.h"
|
| 31 |
+
#include "Util.h"
|
| 32 |
+
|
| 33 |
+
namespace Moses
|
| 34 |
+
{
|
| 35 |
+
|
| 36 |
+
class BitmapContainer;
|
| 37 |
+
class TranslationOptionList;
|
| 38 |
+
class Manager;
|
| 39 |
+
|
| 40 |
+
//typedef boost::unordered_map<Bitmap, BitmapContainer*, UnorderedComparer<Bitmap>, UnorderedComparer<Bitmap> > _BMType;
|
| 41 |
+
typedef boost::unordered_map<const Bitmap*, BitmapContainer*> _BMType;
|
| 42 |
+
// can compare Bitmap* 'cos all bitmaps are created from bitmaps factory class. MUST ensure this is the case
|
| 43 |
+
|
| 44 |
+
/** A stack for phrase-based decoding with cube-pruning. */
|
| 45 |
+
class HypothesisStackCubePruning : public HypothesisStack
|
| 46 |
+
{
|
| 47 |
+
public:
|
| 48 |
+
friend std::ostream& operator<<(std::ostream&, const HypothesisStackCubePruning&);
|
| 49 |
+
|
| 50 |
+
protected:
|
| 51 |
+
_BMType m_bitmapAccessor;
|
| 52 |
+
|
| 53 |
+
float m_bestScore; /**< score of the best hypothesis in collection */
|
| 54 |
+
float m_worstScore; /**< score of the worse hypthesis in collection */
|
| 55 |
+
float m_beamWidth; /**< minimum score due to threashold pruning */
|
| 56 |
+
size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
|
| 57 |
+
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
|
| 58 |
+
bool m_deterministic; /**< flag to determine whether to sort hypotheses deterministically */
|
| 59 |
+
|
| 60 |
+
/** add hypothesis to stack. Prune if necessary.
|
| 61 |
+
* Returns false if equiv hypo exists in collection, otherwise returns true
|
| 62 |
+
*/
|
| 63 |
+
std::pair<HypothesisStackCubePruning::iterator, bool> Add(Hypothesis *hypothesis);
|
| 64 |
+
|
| 65 |
+
/** destroy all instances of Hypothesis in this collection */
|
| 66 |
+
void RemoveAll();
|
| 67 |
+
|
| 68 |
+
BitmapContainer *AddBitmapContainer(const Bitmap &bitmap, HypothesisStackCubePruning &stack);
|
| 69 |
+
|
| 70 |
+
public:
|
| 71 |
+
HypothesisStackCubePruning(Manager& manager);
|
| 72 |
+
~HypothesisStackCubePruning() {
|
| 73 |
+
RemoveAll();
|
| 74 |
+
m_bitmapAccessor.clear();
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/** adds the hypo, but only if within thresholds (beamThr, stackSize).
|
| 78 |
+
* This function will recombine hypotheses silently! There is no record
|
| 79 |
+
* (could affect n-best list generation...TODO)
|
| 80 |
+
* Call stack for adding hypothesis is
|
| 81 |
+
AddPrune()
|
| 82 |
+
Add()
|
| 83 |
+
AddNoPrune()
|
| 84 |
+
*/
|
| 85 |
+
bool AddPrune(Hypothesis *hypothesis);
|
| 86 |
+
|
| 87 |
+
void AddInitial(Hypothesis *hypo);
|
| 88 |
+
|
| 89 |
+
/** set maximum number of hypotheses in the collection
|
| 90 |
+
* \param maxHypoStackSize maximum number (typical number: 100)
|
| 91 |
+
*/
|
| 92 |
+
inline void SetMaxHypoStackSize(size_t maxHypoStackSize) {
|
| 93 |
+
m_maxHypoStackSize = maxHypoStackSize;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
inline size_t GetMaxHypoStackSize() const {
|
| 97 |
+
return m_maxHypoStackSize;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
/** set beam threshold, hypotheses in the stack must not be worse than
|
| 101 |
+
* this factor times the best score to be allowed in the stack
|
| 102 |
+
* \param beamThreshold minimum factor (typical number: 0.03)
|
| 103 |
+
*/
|
| 104 |
+
inline void SetBeamWidth(float beamWidth) {
|
| 105 |
+
m_beamWidth = beamWidth;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
/** return score of the best hypothesis in the stack */
|
| 109 |
+
inline float GetBestScore() const {
|
| 110 |
+
return m_bestScore;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
/** return worst score allowed for the stack */
|
| 114 |
+
inline float GetWorstScore() const {
|
| 115 |
+
return m_worstScore;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
void AddHypothesesToBitmapContainers();
|
| 119 |
+
|
| 120 |
+
const _BMType& GetBitmapAccessor() const {
|
| 121 |
+
return m_bitmapAccessor;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
void SetBitmapAccessor(const Bitmap &newBitmap
|
| 125 |
+
, HypothesisStackCubePruning &stack
|
| 126 |
+
, const Range &range
|
| 127 |
+
, BitmapContainer &bitmapContainer
|
| 128 |
+
, const SquareMatrix &estimatedScores
|
| 129 |
+
, const TranslationOptionList &transOptList);
|
| 130 |
+
|
| 131 |
+
/** pruning, if too large.
|
| 132 |
+
* Pruning algorithm: find a threshold and delete all hypothesis below it.
|
| 133 |
+
* The threshold is chosen so that exactly newSize top items remain on the
|
| 134 |
+
* stack in fact, in situations where some of the hypothesis fell below
|
| 135 |
+
* m_beamWidth, the stack will contain less items.
|
| 136 |
+
* \param newSize maximum size */
|
| 137 |
+
void PruneToSize(size_t newSize);
|
| 138 |
+
|
| 139 |
+
//! return the hypothesis with best score. Used to get the translated at end of decoding
|
| 140 |
+
const Hypothesis *GetBestHypothesis() const;
|
| 141 |
+
//! return all hypothesis, sorted by descending score. Used in creation of N best list
|
| 142 |
+
std::vector<const Hypothesis*> GetSortedList() const;
|
| 143 |
+
|
| 144 |
+
/** make all arcs in point to the equiv hypothesis that contains them.
|
| 145 |
+
* Ie update doubly linked list be hypo & arcs
|
| 146 |
+
*/
|
| 147 |
+
void CleanupArcList();
|
| 148 |
+
|
| 149 |
+
TO_STRING();
|
| 150 |
+
};
|
| 151 |
+
|
| 152 |
+
}
|
| 153 |
+
#endif
|
mosesdecoder/moses/Incremental.h
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- c++ -*-
|
| 2 |
+
#pragma once
|
| 3 |
+
|
| 4 |
+
#include "lm/word_index.hh"
|
| 5 |
+
#include "search/applied.hh"
|
| 6 |
+
#include "search/nbest.hh"
|
| 7 |
+
|
| 8 |
+
#include "moses/ChartCellCollection.h"
|
| 9 |
+
#include "moses/ChartParser.h"
|
| 10 |
+
|
| 11 |
+
#include "BaseManager.h"
|
| 12 |
+
|
| 13 |
+
#include <vector>
|
| 14 |
+
#include <string>
|
| 15 |
+
|
| 16 |
+
namespace Moses
|
| 17 |
+
{
|
| 18 |
+
class ScoreComponentCollection;
|
| 19 |
+
class InputType;
|
| 20 |
+
class LanguageModel;
|
| 21 |
+
|
| 22 |
+
namespace Incremental
|
| 23 |
+
{
|
| 24 |
+
|
| 25 |
+
class Manager : public BaseManager
|
| 26 |
+
{
|
| 27 |
+
public:
|
| 28 |
+
Manager(ttasksptr const& ttask);
|
| 29 |
+
|
| 30 |
+
~Manager();
|
| 31 |
+
|
| 32 |
+
template <class Model> void LMCallback(const Model &model, const std::vector<lm::WordIndex> &words);
|
| 33 |
+
|
| 34 |
+
void Decode();
|
| 35 |
+
|
| 36 |
+
const std::vector<search::Applied> &GetNBest() const;
|
| 37 |
+
|
| 38 |
+
// Call to get the same value as ProcessSentence returned.
|
| 39 |
+
const std::vector<search::Applied> &Completed() const {
|
| 40 |
+
return *completed_nbest_;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
// output
|
| 44 |
+
void OutputBest(OutputCollector *collector) const;
|
| 45 |
+
void OutputNBest(OutputCollector *collector) const;
|
| 46 |
+
void OutputDetailedTranslationReport(OutputCollector *collector) const;
|
| 47 |
+
void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
|
| 48 |
+
void OutputLatticeSamples(OutputCollector *collector) const {
|
| 49 |
+
}
|
| 50 |
+
void OutputAlignment(OutputCollector *collector) const {
|
| 51 |
+
}
|
| 52 |
+
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
|
| 53 |
+
void OutputWordGraph(OutputCollector *collector) const {
|
| 54 |
+
}
|
| 55 |
+
void OutputSearchGraph(OutputCollector *collector) const {
|
| 56 |
+
}
|
| 57 |
+
void OutputSearchGraphSLF() const {
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
void
|
| 61 |
+
OutputSearchGraphAsHypergraph
|
| 62 |
+
( std::string const& fname, size_t const precision ) const
|
| 63 |
+
{ }
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
private:
|
| 67 |
+
template <class Model, class Best> search::History PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out);
|
| 68 |
+
|
| 69 |
+
ChartCellCollectionBase cells_;
|
| 70 |
+
ChartParser parser_;
|
| 71 |
+
|
| 72 |
+
// Only one of single_best_ or n_best_ will be used, but it was easier to do this than a template.
|
| 73 |
+
search::SingleBest single_best_;
|
| 74 |
+
// ProcessSentence returns a reference to a vector. ProcessSentence
|
| 75 |
+
// doesn't have one, so this is populated and returned.
|
| 76 |
+
std::vector<search::Applied> backing_for_single_;
|
| 77 |
+
|
| 78 |
+
search::NBest n_best_;
|
| 79 |
+
|
| 80 |
+
const std::vector<search::Applied> *completed_nbest_;
|
| 81 |
+
|
| 82 |
+
// outputs
|
| 83 |
+
void OutputDetailedTranslationReport(
|
| 84 |
+
OutputCollector *collector,
|
| 85 |
+
const search::Applied *applied,
|
| 86 |
+
const Sentence &sentence,
|
| 87 |
+
long translationId) const;
|
| 88 |
+
void OutputTranslationOptions(std::ostream &out,
|
| 89 |
+
ApplicationContext &applicationContext,
|
| 90 |
+
const search::Applied *applied,
|
| 91 |
+
const Sentence &sentence,
|
| 92 |
+
long translationId) const;
|
| 93 |
+
void OutputTranslationOption(std::ostream &out,
|
| 94 |
+
ApplicationContext &applicationContext,
|
| 95 |
+
const search::Applied *applied,
|
| 96 |
+
const Sentence &sentence,
|
| 97 |
+
long translationId) const;
|
| 98 |
+
void ReconstructApplicationContext(const search::Applied *applied,
|
| 99 |
+
const Sentence &sentence,
|
| 100 |
+
ApplicationContext &context) const;
|
| 101 |
+
void OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
| 102 |
+
ApplicationContext &applicationContext,
|
| 103 |
+
const search::Applied *applied,
|
| 104 |
+
const Sentence &sentence,
|
| 105 |
+
long translationId) const;
|
| 106 |
+
void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const;
|
| 107 |
+
void OutputBestNone(OutputCollector *collector, long translationId) const;
|
| 108 |
+
|
| 109 |
+
void OutputUnknowns(OutputCollector *collector) const {
|
| 110 |
+
}
|
| 111 |
+
void CalcDecoderStatistics() const {
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
// Just get the phrase.
|
| 117 |
+
void ToPhrase(const search::Applied final, Phrase &out);
|
| 118 |
+
// Get the phrase and the features.
|
| 119 |
+
void PhraseAndFeatures(const search::Applied final, Phrase &phrase, ScoreComponentCollection &features);
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
} // namespace Incremental
|
| 123 |
+
} // namespace Moses
|
| 124 |
+
|
mosesdecoder/moses/Jamfile
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- jam -*-
|
| 2 |
+
max-factors = [ option.get "max-factors" : 4 : 4 ] ;
|
| 3 |
+
path-constant FACTOR-LOG : bin/factor.log ;
|
| 4 |
+
update-if-changed $(FACTOR-LOG) $(max-factors) ;
|
| 5 |
+
max-factors = <define>MAX_NUM_FACTORS=$(max-factors) <dependency>$(FACTOR-LOG) ;
|
| 6 |
+
|
| 7 |
+
with-dlib = [ option.get "with-dlib" ] ;
|
| 8 |
+
if $(with-dlib) {
|
| 9 |
+
dlib = <define>WITH_DLIB <include>$(with-dlib) ;
|
| 10 |
+
} else {
|
| 11 |
+
dlib = ;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
with-oxlm = [ option.get "with-oxlm" ] ;
|
| 15 |
+
if $(with-oxlm) {
|
| 16 |
+
oxlm = <cxxflags>-std=c++0x <define>LM_OXLM <include>$(with-oxlm)/src <include>$(with-oxlm)/third_party/eigen ;
|
| 17 |
+
} else {
|
| 18 |
+
oxlm = ;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
local classifier = ;
|
| 22 |
+
if [ option.get "with-vw" ] {
|
| 23 |
+
classifier += ..//vw//classifier ;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
alias headers : ../util//kenutil $(classifier) : : : $(max-factors) $(dlib) $(oxlm) ;
|
| 27 |
+
alias ThreadPool : ThreadPool.cpp ;
|
| 28 |
+
alias Util : Util.cpp Timer.cpp ;
|
| 29 |
+
|
| 30 |
+
if [ option.get "with-synlm" : no : yes ] = yes
|
| 31 |
+
{
|
| 32 |
+
lib m ;
|
| 33 |
+
obj SyntacticLanguageModel.o : SyntacticLanguageModel.cpp headers : <include>$(TOP)/synlm/hhmm/rvtl/include <include>$(TOP)/synlm/hhmm/wsjparse/include ;
|
| 34 |
+
alias synlm : SyntacticLanguageModel.o m : : : <define>HAVE_SYNLM ;
|
| 35 |
+
} else {
|
| 36 |
+
alias synlm ;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
#This is a kludge to force rebuilding if different --with options are passed.
|
| 40 |
+
#Could have used features like <srilm>on but getting these to apply only to
|
| 41 |
+
#linking was ugly and it still didn't trigger an install (since the install
|
| 42 |
+
#path doesn't encode features). It stores a file lm.log with the previous
|
| 43 |
+
#options and forces a rebuild if the current options differ.
|
| 44 |
+
local current = ;
|
| 45 |
+
for local i in srilm irstlm randlm {
|
| 46 |
+
local optval = [ option.get "with-$(i)" ] ;
|
| 47 |
+
if $(optval) {
|
| 48 |
+
current += "--with-$(i)=$(optval)" ;
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
current = $(current:J=" ") ;
|
| 52 |
+
current ?= "" ;
|
| 53 |
+
path-constant LM-LOG : bin/lm.log ;
|
| 54 |
+
update-if-changed $(LM-LOG) $(current) ;
|
| 55 |
+
|
| 56 |
+
obj FF_Factory.o : FF/Factory.cpp LM//macros headers ../lm//kenlm mmlib : <dependency>$(LM-LOG) ;
|
| 57 |
+
|
| 58 |
+
# check if we have xmlrpc-c's abyss server available
|
| 59 |
+
# if yes, include server capabilities in the moses executable
|
| 60 |
+
# include $(TOP)/jam-files/server.jam ;
|
| 61 |
+
|
| 62 |
+
if [ xmlrpc ]
|
| 63 |
+
{
|
| 64 |
+
echo "BUILDING MOSES SERVER!" ;
|
| 65 |
+
alias mserver : [ glob server/*.cpp ] ;
|
| 66 |
+
}
|
| 67 |
+
else
|
| 68 |
+
{
|
| 69 |
+
echo "NOT BUILDING MOSES SERVER!" ;
|
| 70 |
+
alias mserver ;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
if [ option.get "with-mm" : no : yes ] = yes
|
| 74 |
+
{
|
| 75 |
+
alias mmlib :
|
| 76 |
+
$(TOP)/moses/TranslationModel/UG//mmsapt
|
| 77 |
+
$(TOP)/moses/TranslationModel/UG/generic//generic
|
| 78 |
+
$(TOP)/moses/TranslationModel/UG/mm//mm
|
| 79 |
+
;
|
| 80 |
+
} else {
|
| 81 |
+
alias mmlib ;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
local with-vw = [ option.get "with-vw" ] ;
|
| 85 |
+
if $(with-vw) {
|
| 86 |
+
alias vwfiles : [ glob FF/VW/*.cpp ] ;
|
| 87 |
+
} else {
|
| 88 |
+
alias vwfiles ;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
lib moses :
|
| 92 |
+
[ glob
|
| 93 |
+
*.cpp
|
| 94 |
+
parameters/*.cpp
|
| 95 |
+
Syntax/*.cpp
|
| 96 |
+
Syntax/F2S/*.cpp
|
| 97 |
+
Syntax/S2T/*.cpp
|
| 98 |
+
Syntax/S2T/Parsers/*.cpp
|
| 99 |
+
Syntax/S2T/Parsers/RecursiveCYKPlusParser/*.cpp
|
| 100 |
+
Syntax/S2T/Parsers/Scope3Parser/*.cpp
|
| 101 |
+
Syntax/T2S/*.cpp
|
| 102 |
+
TranslationModel/*.cpp
|
| 103 |
+
TranslationModel/fuzzy-match/*.cpp
|
| 104 |
+
TranslationModel/DynSAInclude/*.cpp
|
| 105 |
+
TranslationModel/RuleTable/*.cpp
|
| 106 |
+
TranslationModel/Scope3Parser/*.cpp
|
| 107 |
+
TranslationModel/CYKPlusParser/*.cpp
|
| 108 |
+
../phrase-extract/PhraseOrientation.cpp
|
| 109 |
+
FF/*.cpp
|
| 110 |
+
FF/bilingual-lm/*.cpp
|
| 111 |
+
FF/OSM-Feature/*.cpp
|
| 112 |
+
FF/Dsg-Feature/*.cpp
|
| 113 |
+
FF/LexicalReordering/*.cpp
|
| 114 |
+
PP/*.cpp
|
| 115 |
+
: #exceptions
|
| 116 |
+
ThreadPool.cpp
|
| 117 |
+
SyntacticLanguageModel.cpp
|
| 118 |
+
*Test.cpp Mock*.cpp FF/*Test.cpp
|
| 119 |
+
FF/Factory.cpp
|
| 120 |
+
]
|
| 121 |
+
vwfiles synlm mmlib mserver headers
|
| 122 |
+
FF_Factory.o
|
| 123 |
+
LM//LM
|
| 124 |
+
TranslationModel/CompactPT//CompactPT
|
| 125 |
+
ThreadPool
|
| 126 |
+
..//search
|
| 127 |
+
../util/double-conversion//double-conversion
|
| 128 |
+
../probingpt//probingpt
|
| 129 |
+
..//z
|
| 130 |
+
../OnDiskPt//OnDiskPt
|
| 131 |
+
$(TOP)//boost_filesystem
|
| 132 |
+
$(TOP)//boost_iostreams
|
| 133 |
+
:
|
| 134 |
+
<threading>single:<source>../util//rt
|
| 135 |
+
;
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
alias headers-to-install : [ glob-tree *.h ] ;
|
| 139 |
+
|
| 140 |
+
import testing ;
|
| 141 |
+
|
| 142 |
+
unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ../probingpt//probingpt ..//boost_unit_test_framework ;
|
| 143 |
+
|
mosesdecoder/moses/LVoc.cpp
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include<limits>
|
| 2 |
+
#include "LVoc.h"
|
| 3 |
+
|
| 4 |
+
//rather pointless file because LVoc is template all wee need here is the definitions of consts
|
| 5 |
+
|
| 6 |
+
const LabelId InvalidLabelId = std::numeric_limits<LabelId>::max();
|
| 7 |
+
const LabelId Epsilon = InvalidLabelId-1;
|
mosesdecoder/moses/LVoc.h
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef moses_LVoc_h
|
| 2 |
+
#define moses_LVoc_h
|
| 3 |
+
|
| 4 |
+
#include<map>
|
| 5 |
+
#include<vector>
|
| 6 |
+
#include<iostream>
|
| 7 |
+
#include<fstream>
|
| 8 |
+
#include <sstream>
|
| 9 |
+
|
| 10 |
+
typedef unsigned LabelId;
|
| 11 |
+
extern const LabelId InvalidLabelId;
|
| 12 |
+
extern const LabelId Epsilon;
|
| 13 |
+
|
| 14 |
+
typedef std::vector<LabelId> IPhrase;
|
| 15 |
+
|
| 16 |
+
/** class used in phrase-based binary phrase-table.
|
| 17 |
+
* @todo vocab?
|
| 18 |
+
* A = type of things to numberize, ie, std::string
|
| 19 |
+
* B = map type to use, might consider using hash_map for better performance
|
| 20 |
+
*/
|
| 21 |
+
template<typename A,typename B=std::map<A,LabelId> >
|
| 22 |
+
class LVoc
|
| 23 |
+
{
|
| 24 |
+
typedef A Key;
|
| 25 |
+
typedef B M;
|
| 26 |
+
typedef std::vector<Key> V;
|
| 27 |
+
M m;
|
| 28 |
+
V data;
|
| 29 |
+
public:
|
| 30 |
+
LVoc() {}
|
| 31 |
+
|
| 32 |
+
bool isKnown(const Key& k) const {
|
| 33 |
+
return m.find(k)!=m.end();
|
| 34 |
+
}
|
| 35 |
+
LabelId index(const Key& k) const {
|
| 36 |
+
typename M::const_iterator i=m.find(k);
|
| 37 |
+
return i!=m.end()? i->second : InvalidLabelId;
|
| 38 |
+
}
|
| 39 |
+
LabelId add(const Key& k) {
|
| 40 |
+
std::pair<typename M::iterator,bool> p
|
| 41 |
+
=m.insert(std::make_pair(k,data.size()));
|
| 42 |
+
if(p.second) data.push_back(k);
|
| 43 |
+
assert(static_cast<size_t>(p.first->second)<data.size());
|
| 44 |
+
return p.first->second;
|
| 45 |
+
}
|
| 46 |
+
Key const& symbol(LabelId i) const {
|
| 47 |
+
assert(static_cast<size_t>(i)<data.size());
|
| 48 |
+
return data[i];
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
typedef typename V::const_iterator const_iterator;
|
| 52 |
+
const_iterator begin() const {
|
| 53 |
+
return data.begin();
|
| 54 |
+
}
|
| 55 |
+
const_iterator end() const {
|
| 56 |
+
return data.end();
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
void Write(const std::string& fname) const {
|
| 60 |
+
std::ofstream out(fname.c_str());
|
| 61 |
+
// Little-known fact: ofstream tracks failures but does not, by default,
|
| 62 |
+
// report them. You have to tell it to, or check for errors yourself.
|
| 63 |
+
out.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
| 64 |
+
Write(out);
|
| 65 |
+
// Make sure the file is flushed, so that any errors are reported. If we
|
| 66 |
+
// flush implicitly in the destructor, it won't be able to throw
|
| 67 |
+
// exceptions.
|
| 68 |
+
out.close();
|
| 69 |
+
}
|
| 70 |
+
void Write(std::ostream& out) const {
|
| 71 |
+
for(int i=data.size()-1; i>=0; --i)
|
| 72 |
+
out<<i<<' '<<data[i]<<'\n';
|
| 73 |
+
}
|
| 74 |
+
void Read(const std::string& fname) {
|
| 75 |
+
std::ifstream in(fname.c_str());
|
| 76 |
+
Read(in);
|
| 77 |
+
}
|
| 78 |
+
void Read(std::istream& in) {
|
| 79 |
+
Key k;
|
| 80 |
+
size_t i;
|
| 81 |
+
std::string line;
|
| 82 |
+
while(getline(in,line)) {
|
| 83 |
+
std::istringstream is(line);
|
| 84 |
+
if(is>>i>>k) {
|
| 85 |
+
if(i>=data.size()) data.resize(i+1);
|
| 86 |
+
data[i]=k;
|
| 87 |
+
m[k]=i;
|
| 88 |
+
}
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
};
|
| 92 |
+
|
| 93 |
+
#endif
|
mosesdecoder/moses/Manager.cpp
ADDED
|
@@ -0,0 +1,2016 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
#ifdef WIN32
|
| 22 |
+
#include <hash_set>
|
| 23 |
+
#else
|
| 24 |
+
// #include <ext/hash_set>
|
| 25 |
+
#endif
|
| 26 |
+
|
| 27 |
+
#include <algorithm>
|
| 28 |
+
#include <cmath>
|
| 29 |
+
#include <limits>
|
| 30 |
+
#include <map>
|
| 31 |
+
#include <set>
|
| 32 |
+
#include "Manager.h"
|
| 33 |
+
#include "TypeDef.h"
|
| 34 |
+
#include "Util.h"
|
| 35 |
+
#include "TargetPhrase.h"
|
| 36 |
+
#include "TrellisPath.h"
|
| 37 |
+
#include "TrellisPathCollection.h"
|
| 38 |
+
#include "TranslationOption.h"
|
| 39 |
+
#include "TranslationOptionCollection.h"
|
| 40 |
+
#include "Timer.h"
|
| 41 |
+
#include "moses/OutputCollector.h"
|
| 42 |
+
#include "moses/FF/DistortionScoreProducer.h"
|
| 43 |
+
#include "moses/LM/Base.h"
|
| 44 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 45 |
+
#include "moses/TranslationAnalysis.h"
|
| 46 |
+
#include "moses/TranslationTask.h"
|
| 47 |
+
#include "moses/HypergraphOutput.h"
|
| 48 |
+
#include "moses/mbr.h"
|
| 49 |
+
#include "moses/LatticeMBR.h"
|
| 50 |
+
#include "moses/SearchNormal.h"
|
| 51 |
+
#include "moses/SearchCubePruning.h"
|
| 52 |
+
#include <boost/foreach.hpp>
|
| 53 |
+
|
| 54 |
+
#ifdef HAVE_PROTOBUF
|
| 55 |
+
#include "hypergraph.pb.h"
|
| 56 |
+
#include "rule.pb.h"
|
| 57 |
+
#endif
|
| 58 |
+
|
| 59 |
+
#include "util/exception.hh"
|
| 60 |
+
#include "util/random.hh"
|
| 61 |
+
#include "util/string_stream.hh"
|
| 62 |
+
|
| 63 |
+
using namespace std;
|
| 64 |
+
|
| 65 |
+
namespace Moses
|
| 66 |
+
{
|
| 67 |
+
|
| 68 |
+
Manager::Manager(ttasksptr const& ttask)
|
| 69 |
+
: BaseManager(ttask)
|
| 70 |
+
, interrupted_flag(0)
|
| 71 |
+
, m_hypoId(0)
|
| 72 |
+
{
|
| 73 |
+
boost::shared_ptr<InputType> source = ttask->GetSource();
|
| 74 |
+
m_transOptColl = source->CreateTranslationOptionCollection(ttask);
|
| 75 |
+
|
| 76 |
+
switch(options()->search.algo) {
|
| 77 |
+
case Normal:
|
| 78 |
+
m_search = new SearchNormal(*this, *m_transOptColl);
|
| 79 |
+
break;
|
| 80 |
+
case CubePruning:
|
| 81 |
+
m_search = new SearchCubePruning(*this, *m_transOptColl);
|
| 82 |
+
break;
|
| 83 |
+
default:
|
| 84 |
+
UTIL_THROW2("ERROR: search. Aborting\n");
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
StaticData::Instance().InitializeForInput(ttask);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
Manager::~Manager()
|
| 91 |
+
{
|
| 92 |
+
delete m_transOptColl;
|
| 93 |
+
delete m_search;
|
| 94 |
+
StaticData::Instance().CleanUpAfterSentenceProcessing(m_ttask.lock());
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
const InputType&
|
| 98 |
+
Manager::GetSource() const
|
| 99 |
+
{
|
| 100 |
+
return m_source ;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
/**
|
| 104 |
+
* Main decoder loop that translates a sentence by expanding
|
| 105 |
+
* hypotheses stack by stack, until the end of the sentence.
|
| 106 |
+
*/
|
| 107 |
+
void Manager::Decode()
|
| 108 |
+
{
|
| 109 |
+
|
| 110 |
+
//std::cerr << options().nbest.nbest_size << " "
|
| 111 |
+
// << options().nbest.enabled << " " << std::endl;
|
| 112 |
+
|
| 113 |
+
// initialize statistics
|
| 114 |
+
ResetSentenceStats(m_source);
|
| 115 |
+
IFVERBOSE(2) {
|
| 116 |
+
GetSentenceStats().StartTimeTotal();
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
// check if alternate weight setting is used
|
| 120 |
+
// this is not thread safe! it changes StaticData
|
| 121 |
+
if (StaticData::Instance().GetHasAlternateWeightSettings()) {
|
| 122 |
+
if (m_source.GetSpecifiesWeightSetting()) {
|
| 123 |
+
StaticData::Instance().SetWeightSetting(m_source.GetWeightSetting());
|
| 124 |
+
} else {
|
| 125 |
+
StaticData::Instance().SetWeightSetting("default");
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
// get translation options
|
| 130 |
+
IFVERBOSE(1) {
|
| 131 |
+
GetSentenceStats().StartTimeCollectOpts();
|
| 132 |
+
}
|
| 133 |
+
m_transOptColl->CreateTranslationOptions();
|
| 134 |
+
|
| 135 |
+
// some reporting on how long this took
|
| 136 |
+
IFVERBOSE(1) {
|
| 137 |
+
GetSentenceStats().StopTimeCollectOpts();
|
| 138 |
+
TRACE_ERR("Line "<< m_source.GetTranslationId()
|
| 139 |
+
<< ": Collecting options took "
|
| 140 |
+
<< GetSentenceStats().GetTimeCollectOpts() << " seconds at "
|
| 141 |
+
<< __FILE__ << " Line " << __LINE__ << endl);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
// search for best translation with the specified algorithm
|
| 145 |
+
Timer searchTime;
|
| 146 |
+
searchTime.start();
|
| 147 |
+
m_search->Decode();
|
| 148 |
+
VERBOSE(1, "Line " << m_source.GetTranslationId()
|
| 149 |
+
<< ": Search took " << searchTime << " seconds" << endl);
|
| 150 |
+
IFVERBOSE(2) {
|
| 151 |
+
GetSentenceStats().StopTimeTotal();
|
| 152 |
+
TRACE_ERR(GetSentenceStats());
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
/**
|
| 157 |
+
* Print all derivations in search graph. Note: The number of derivations is exponential in the sentence length
|
| 158 |
+
*
|
| 159 |
+
*/
|
| 160 |
+
|
| 161 |
+
void Manager::PrintAllDerivations(long translationId, ostream& outputStream) const
|
| 162 |
+
{
|
| 163 |
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
| 164 |
+
|
| 165 |
+
vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
|
| 166 |
+
|
| 167 |
+
if (sortedPureHypo.size() == 0)
|
| 168 |
+
return;
|
| 169 |
+
|
| 170 |
+
float remainingScore = 0;
|
| 171 |
+
vector<const TargetPhrase*> remainingPhrases;
|
| 172 |
+
|
| 173 |
+
// add all pure paths
|
| 174 |
+
vector<const Hypothesis*>::const_iterator iterBestHypo;
|
| 175 |
+
for (iterBestHypo = sortedPureHypo.begin()
|
| 176 |
+
; iterBestHypo != sortedPureHypo.end()
|
| 177 |
+
; ++iterBestHypo) {
|
| 178 |
+
printThisHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore, outputStream);
|
| 179 |
+
printDivergentHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore, outputStream);
|
| 180 |
+
}
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
const TranslationOptionCollection* Manager::getSntTranslationOptions()
|
| 184 |
+
{
|
| 185 |
+
return m_transOptColl;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore , ostream& outputStream ) const
|
| 189 |
+
{
|
| 190 |
+
//Backtrack from the predecessor
|
| 191 |
+
if (hypo->GetId() > 0) {
|
| 192 |
+
vector <const TargetPhrase*> followingPhrases;
|
| 193 |
+
followingPhrases.push_back(& (hypo->GetCurrTargetPhrase()));
|
| 194 |
+
///((Phrase) hypo->GetPrevHypo()->GetTargetPhrase());
|
| 195 |
+
followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
|
| 196 |
+
printDivergentHypothesis(translationId, hypo->GetPrevHypo(), followingPhrases , remainingScore + hypo->GetScore() - hypo->GetPrevHypo()->GetScore(), outputStream);
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
//Process the arcs
|
| 200 |
+
const ArcList *pAL = hypo->GetArcList();
|
| 201 |
+
if (pAL) {
|
| 202 |
+
const ArcList &arcList = *pAL;
|
| 203 |
+
// every possible Arc to replace this edge
|
| 204 |
+
ArcList::const_iterator iterArc;
|
| 205 |
+
for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc) {
|
| 206 |
+
const Hypothesis *loserHypo = *iterArc;
|
| 207 |
+
const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
|
| 208 |
+
float arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
|
| 209 |
+
vector <const TargetPhrase* > followingPhrases;
|
| 210 |
+
followingPhrases.push_back(&(loserHypo->GetCurrTargetPhrase()));
|
| 211 |
+
followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
|
| 212 |
+
printThisHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore, outputStream);
|
| 213 |
+
printDivergentHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore, outputStream);
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
void
|
| 220 |
+
Manager::
|
| 221 |
+
printThisHypothesis(long translationId, const Hypothesis* hypo,
|
| 222 |
+
const vector <const TargetPhrase*> & remainingPhrases,
|
| 223 |
+
float remainingScore, ostream& outputStream) const
|
| 224 |
+
{
|
| 225 |
+
|
| 226 |
+
outputStream << translationId << " ||| ";
|
| 227 |
+
|
| 228 |
+
//Yield of this hypothesis
|
| 229 |
+
hypo->ToStream(outputStream);
|
| 230 |
+
for (size_t p = 0; p < remainingPhrases.size(); ++p) {
|
| 231 |
+
const TargetPhrase * phrase = remainingPhrases[p];
|
| 232 |
+
size_t size = phrase->GetSize();
|
| 233 |
+
for (size_t pos = 0 ; pos < size ; pos++) {
|
| 234 |
+
const Factor *factor = phrase->GetFactor(pos, 0);
|
| 235 |
+
outputStream << *factor;
|
| 236 |
+
outputStream << " ";
|
| 237 |
+
}
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
outputStream << "||| " << hypo->GetScore() + remainingScore;
|
| 241 |
+
outputStream << endl;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
/**
|
| 248 |
+
* After decoding, the hypotheses in the stacks and additional arcs
|
| 249 |
+
* form a search graph that can be mined for n-best lists.
|
| 250 |
+
* The heavy lifting is done in the TrellisPath and TrellisPathCollection
|
| 251 |
+
* this function controls this for one sentence.
|
| 252 |
+
*
|
| 253 |
+
* \param count the number of n-best translations to produce
|
| 254 |
+
* \param ret holds the n-best list that was calculated
|
| 255 |
+
*/
|
| 256 |
+
void Manager::CalcNBest(size_t count, TrellisPathList &ret, bool onlyDistinct) const
|
| 257 |
+
{
|
| 258 |
+
if (count <= 0)
|
| 259 |
+
return;
|
| 260 |
+
|
| 261 |
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
| 262 |
+
|
| 263 |
+
vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
|
| 264 |
+
|
| 265 |
+
if (sortedPureHypo.size() == 0)
|
| 266 |
+
return;
|
| 267 |
+
|
| 268 |
+
TrellisPathCollection contenders;
|
| 269 |
+
|
| 270 |
+
set<Phrase> distinctHyps;
|
| 271 |
+
|
| 272 |
+
// add all pure paths
|
| 273 |
+
vector<const Hypothesis*>::const_iterator iterBestHypo;
|
| 274 |
+
for (iterBestHypo = sortedPureHypo.begin()
|
| 275 |
+
; iterBestHypo != sortedPureHypo.end()
|
| 276 |
+
; ++iterBestHypo) {
|
| 277 |
+
contenders.Add(new TrellisPath(*iterBestHypo));
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
// factor defines stopping point for distinct n-best list if too
|
| 281 |
+
// many candidates identical
|
| 282 |
+
size_t nBestFactor = options()->nbest.factor;
|
| 283 |
+
if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited
|
| 284 |
+
|
| 285 |
+
// MAIN loop
|
| 286 |
+
for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * nBestFactor) ; iteration++) {
|
| 287 |
+
// get next best from list of contenders
|
| 288 |
+
TrellisPath *path = contenders.pop();
|
| 289 |
+
UTIL_THROW_IF2(path == NULL, "path is NULL");
|
| 290 |
+
// create deviations from current best
|
| 291 |
+
path->CreateDeviantPaths(contenders);
|
| 292 |
+
if(onlyDistinct) {
|
| 293 |
+
Phrase tgtPhrase = path->GetSurfacePhrase();
|
| 294 |
+
if (distinctHyps.insert(tgtPhrase).second) {
|
| 295 |
+
ret.Add(path);
|
| 296 |
+
} else {
|
| 297 |
+
delete path;
|
| 298 |
+
path = NULL;
|
| 299 |
+
}
|
| 300 |
+
} else {
|
| 301 |
+
ret.Add(path);
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
if(onlyDistinct) {
|
| 306 |
+
const size_t nBestFactor = options()->nbest.factor;
|
| 307 |
+
if (nBestFactor > 0)
|
| 308 |
+
contenders.Prune(count * nBestFactor);
|
| 309 |
+
} else {
|
| 310 |
+
contenders.Prune(count);
|
| 311 |
+
}
|
| 312 |
+
}
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
struct SGNReverseCompare {
|
| 316 |
+
bool operator() (const SearchGraphNode& s1, const SearchGraphNode& s2) const {
|
| 317 |
+
return s1.hypo->GetId() > s2.hypo->GetId();
|
| 318 |
+
}
|
| 319 |
+
};
|
| 320 |
+
|
| 321 |
+
/**
|
| 322 |
+
* Implements lattice sampling, as in Chatterjee & Cancedda, emnlp 2010
|
| 323 |
+
**/
|
| 324 |
+
void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
|
| 325 |
+
{
|
| 326 |
+
|
| 327 |
+
vector<SearchGraphNode> searchGraph;
|
| 328 |
+
GetSearchGraph(searchGraph);
|
| 329 |
+
|
| 330 |
+
//Calculation of the sigmas of each hypothesis and edge. In C&C notation this is
|
| 331 |
+
//the "log of the cumulative unnormalized probability of all the paths in the
|
| 332 |
+
// lattice for the hypothesis to a final node"
|
| 333 |
+
typedef pair<int, int> Edge;
|
| 334 |
+
map<const Hypothesis*, float> sigmas;
|
| 335 |
+
map<Edge, float> edgeScores;
|
| 336 |
+
map<const Hypothesis*, set<const Hypothesis*> > outgoingHyps;
|
| 337 |
+
map<int,const Hypothesis*> idToHyp;
|
| 338 |
+
map<int,float> fscores;
|
| 339 |
+
|
| 340 |
+
//Iterating through the hypos in reverse order of id gives a reverse
|
| 341 |
+
//topological order. We rely on the fact that hypo ids are given out
|
| 342 |
+
//sequentially, as the search proceeds.
|
| 343 |
+
//NB: Could just sort by stack.
|
| 344 |
+
sort(searchGraph.begin(), searchGraph.end(), SGNReverseCompare());
|
| 345 |
+
|
| 346 |
+
//first task is to fill in the outgoing hypos and edge scores.
|
| 347 |
+
for (vector<SearchGraphNode>::const_iterator i = searchGraph.begin();
|
| 348 |
+
i != searchGraph.end(); ++i) {
|
| 349 |
+
const Hypothesis* hypo = i->hypo;
|
| 350 |
+
idToHyp[hypo->GetId()] = hypo;
|
| 351 |
+
fscores[hypo->GetId()] = i->fscore;
|
| 352 |
+
if (hypo->GetId()) {
|
| 353 |
+
//back to current
|
| 354 |
+
const Hypothesis* prevHypo = i->hypo->GetPrevHypo();
|
| 355 |
+
outgoingHyps[prevHypo].insert(hypo);
|
| 356 |
+
edgeScores[Edge(prevHypo->GetId(),hypo->GetId())] =
|
| 357 |
+
hypo->GetScore() - prevHypo->GetScore();
|
| 358 |
+
}
|
| 359 |
+
//forward from current
|
| 360 |
+
if (i->forward >= 0) {
|
| 361 |
+
map<int,const Hypothesis*>::const_iterator idToHypIter = idToHyp.find(i->forward);
|
| 362 |
+
UTIL_THROW_IF2(idToHypIter == idToHyp.end(),
|
| 363 |
+
"Couldn't find hypothesis " << i->forward);
|
| 364 |
+
const Hypothesis* nextHypo = idToHypIter->second;
|
| 365 |
+
outgoingHyps[hypo].insert(nextHypo);
|
| 366 |
+
map<int,float>::const_iterator fscoreIter = fscores.find(nextHypo->GetId());
|
| 367 |
+
UTIL_THROW_IF2(fscoreIter == fscores.end(),
|
| 368 |
+
"Couldn't find scores for hypothsis " << nextHypo->GetId());
|
| 369 |
+
edgeScores[Edge(hypo->GetId(),nextHypo->GetId())] =
|
| 370 |
+
i->fscore - fscoreIter->second;
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
//then run through again to calculate sigmas
|
| 376 |
+
for (vector<SearchGraphNode>::const_iterator i = searchGraph.begin();
|
| 377 |
+
i != searchGraph.end(); ++i) {
|
| 378 |
+
|
| 379 |
+
if (i->forward == -1) {
|
| 380 |
+
sigmas[i->hypo] = 0;
|
| 381 |
+
} else {
|
| 382 |
+
map<const Hypothesis*, set<const Hypothesis*> >::const_iterator outIter =
|
| 383 |
+
outgoingHyps.find(i->hypo);
|
| 384 |
+
|
| 385 |
+
UTIL_THROW_IF2(outIter == outgoingHyps.end(),
|
| 386 |
+
"Couldn't find hypothesis " << i->hypo->GetId());
|
| 387 |
+
float sigma = 0;
|
| 388 |
+
for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
|
| 389 |
+
j != outIter->second.end(); ++j) {
|
| 390 |
+
map<const Hypothesis*, float>::const_iterator succIter = sigmas.find(*j);
|
| 391 |
+
UTIL_THROW_IF2(succIter == sigmas.end(),
|
| 392 |
+
"Couldn't find hypothesis " << (*j)->GetId());
|
| 393 |
+
map<Edge,float>::const_iterator edgeScoreIter =
|
| 394 |
+
edgeScores.find(Edge(i->hypo->GetId(),(*j)->GetId()));
|
| 395 |
+
UTIL_THROW_IF2(edgeScoreIter == edgeScores.end(),
|
| 396 |
+
"Couldn't find edge for hypothesis " << (*j)->GetId());
|
| 397 |
+
float term = edgeScoreIter->second + succIter->second; // Add sigma(*j)
|
| 398 |
+
if (sigma == 0) {
|
| 399 |
+
sigma = term;
|
| 400 |
+
} else {
|
| 401 |
+
sigma = log_sum(sigma,term);
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
sigmas[i->hypo] = sigma;
|
| 405 |
+
}
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
//The actual sampling!
|
| 409 |
+
const Hypothesis* startHypo = searchGraph.back().hypo;
|
| 410 |
+
UTIL_THROW_IF2(startHypo->GetId() != 0, "Expecting the start hypothesis ");
|
| 411 |
+
for (size_t i = 0; i < count; ++i) {
|
| 412 |
+
vector<const Hypothesis*> path;
|
| 413 |
+
path.push_back(startHypo);
|
| 414 |
+
while(1) {
|
| 415 |
+
map<const Hypothesis*, set<const Hypothesis*> >::const_iterator outIter =
|
| 416 |
+
outgoingHyps.find(path.back());
|
| 417 |
+
if (outIter == outgoingHyps.end() || !outIter->second.size()) {
|
| 418 |
+
//end of the path
|
| 419 |
+
break;
|
| 420 |
+
}
|
| 421 |
+
//score the possibles
|
| 422 |
+
vector<const Hypothesis*> candidates;
|
| 423 |
+
vector<float> candidateScores;
|
| 424 |
+
float scoreTotal = 0;
|
| 425 |
+
for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
|
| 426 |
+
j != outIter->second.end(); ++j) {
|
| 427 |
+
candidates.push_back(*j);
|
| 428 |
+
UTIL_THROW_IF2(sigmas.find(*j) == sigmas.end(),
|
| 429 |
+
"Hypothesis " << (*j)->GetId() << " not found");
|
| 430 |
+
Edge edge(path.back()->GetId(),(*j)->GetId());
|
| 431 |
+
UTIL_THROW_IF2(edgeScores.find(edge) == edgeScores.end(),
|
| 432 |
+
"Edge not found");
|
| 433 |
+
candidateScores.push_back(sigmas[*j] + edgeScores[edge]);
|
| 434 |
+
if (scoreTotal == 0) {
|
| 435 |
+
scoreTotal = candidateScores.back();
|
| 436 |
+
} else {
|
| 437 |
+
scoreTotal = log_sum(candidateScores.back(), scoreTotal);
|
| 438 |
+
}
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
//normalise
|
| 442 |
+
transform(candidateScores.begin(), candidateScores.end(), candidateScores.begin(), bind2nd(minus<float>(),scoreTotal));
|
| 443 |
+
//copy(candidateScores.begin(),candidateScores.end(),ostream_iterator<float>(cerr," "));
|
| 444 |
+
//cerr << endl;
|
| 445 |
+
|
| 446 |
+
//draw the sample
|
| 447 |
+
const float frandom = log(util::rand_incl(0.0f, 1.0f));
|
| 448 |
+
size_t position = 1;
|
| 449 |
+
float sum = candidateScores[0];
|
| 450 |
+
for (; position < candidateScores.size() && sum < frandom; ++position) {
|
| 451 |
+
sum = log_sum(sum,candidateScores[position]);
|
| 452 |
+
}
|
| 453 |
+
//cerr << "Random: " << frandom << " Chose " << position-1 << endl;
|
| 454 |
+
const Hypothesis* chosen = candidates[position-1];
|
| 455 |
+
path.push_back(chosen);
|
| 456 |
+
}
|
| 457 |
+
//cerr << "Path: " << endl;
|
| 458 |
+
//for (size_t j = 0; j < path.size(); ++j) {
|
| 459 |
+
// cerr << path[j]->GetId() << " " << path[j]->GetScoreBreakdown() << endl;
|
| 460 |
+
//}
|
| 461 |
+
//cerr << endl;
|
| 462 |
+
|
| 463 |
+
//Convert the hypos to TrellisPath
|
| 464 |
+
ret.Add(new TrellisPath(path));
|
| 465 |
+
//cerr << ret.at(ret.GetSize()-1).GetScoreBreakdown() << endl;
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
void Manager::CalcDecoderStatistics() const
|
| 473 |
+
{
|
| 474 |
+
const Hypothesis *hypo = GetBestHypothesis();
|
| 475 |
+
if (hypo != NULL) {
|
| 476 |
+
GetSentenceStats().CalcFinalStats(*hypo);
|
| 477 |
+
IFVERBOSE(2) {
|
| 478 |
+
if (hypo != NULL) {
|
| 479 |
+
string buff;
|
| 480 |
+
string buff2;
|
| 481 |
+
TRACE_ERR( "Source and Target Units:"
|
| 482 |
+
<< hypo->GetInput());
|
| 483 |
+
buff2.insert(0,"] ");
|
| 484 |
+
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
|
| 485 |
+
buff2.insert(0,":");
|
| 486 |
+
buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
|
| 487 |
+
buff2.insert(0,"[");
|
| 488 |
+
|
| 489 |
+
hypo = hypo->GetPrevHypo();
|
| 490 |
+
while (hypo != NULL) {
|
| 491 |
+
//dont print out the empty final hypo
|
| 492 |
+
buff.insert(0,buff2);
|
| 493 |
+
buff2.clear();
|
| 494 |
+
buff2.insert(0,"] ");
|
| 495 |
+
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
|
| 496 |
+
buff2.insert(0,":");
|
| 497 |
+
buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
|
| 498 |
+
buff2.insert(0,"[");
|
| 499 |
+
hypo = hypo->GetPrevHypo();
|
| 500 |
+
}
|
| 501 |
+
TRACE_ERR( buff << endl);
|
| 502 |
+
}
|
| 503 |
+
}
|
| 504 |
+
}
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
void Manager::OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId) const
|
| 508 |
+
{
|
| 509 |
+
|
| 510 |
+
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
| 511 |
+
|
| 512 |
+
|
| 513 |
+
outputWordGraphStream << "J=" << linkId++
|
| 514 |
+
<< "\tS=" << prevHypo->GetId()
|
| 515 |
+
<< "\tE=" << hypo->GetId()
|
| 516 |
+
<< "\ta=";
|
| 517 |
+
|
| 518 |
+
// phrase table scores
|
| 519 |
+
const std::vector<PhraseDictionary*> &phraseTables = PhraseDictionary::GetColl();
|
| 520 |
+
std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
|
| 521 |
+
for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable) {
|
| 522 |
+
const PhraseDictionary *phraseTable = *iterPhraseTable;
|
| 523 |
+
vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
|
| 524 |
+
|
| 525 |
+
outputWordGraphStream << scores[0];
|
| 526 |
+
vector<float>::const_iterator iterScore;
|
| 527 |
+
for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) {
|
| 528 |
+
outputWordGraphStream << ", " << *iterScore;
|
| 529 |
+
}
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
// language model scores
|
| 533 |
+
outputWordGraphStream << "\tl=";
|
| 534 |
+
|
| 535 |
+
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 536 |
+
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
| 537 |
+
const StatefulFeatureFunction *ff = statefulFFs[i];
|
| 538 |
+
const LanguageModel *lm = static_cast<const LanguageModel*>(ff);
|
| 539 |
+
|
| 540 |
+
vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lm);
|
| 541 |
+
|
| 542 |
+
outputWordGraphStream << scores[0];
|
| 543 |
+
vector<float>::const_iterator iterScore;
|
| 544 |
+
for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) {
|
| 545 |
+
outputWordGraphStream << ", " << *iterScore;
|
| 546 |
+
}
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
// re-ordering
|
| 550 |
+
outputWordGraphStream << "\tr=";
|
| 551 |
+
|
| 552 |
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
| 553 |
+
std::vector<FeatureFunction*>::const_iterator iter;
|
| 554 |
+
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
|
| 555 |
+
const FeatureFunction *ff = *iter;
|
| 556 |
+
|
| 557 |
+
const DistortionScoreProducer *model = dynamic_cast<const DistortionScoreProducer*>(ff);
|
| 558 |
+
if (model) {
|
| 559 |
+
outputWordGraphStream << hypo->GetScoreBreakdown().GetScoreForProducer(model);
|
| 560 |
+
}
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
// output both source and target phrases in the word graph
|
| 564 |
+
outputWordGraphStream << "\tw=" << hypo->GetSourcePhraseStringRep()
|
| 565 |
+
<< "|" << hypo->GetCurrTargetPhrase();
|
| 566 |
+
|
| 567 |
+
outputWordGraphStream << endl;
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
// VN put back of OutputPassthroughInformation
|
| 571 |
+
void Manager::OutputPassthroughInformation(std::ostream &out, const Hypothesis *hypo) const
|
| 572 |
+
{
|
| 573 |
+
const std::string passthrough = hypo->GetManager().GetSource().GetPassthroughInformation();
|
| 574 |
+
out << passthrough;
|
| 575 |
+
}
|
| 576 |
+
// end of put back
|
| 577 |
+
|
| 578 |
+
void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) const
|
| 579 |
+
{
|
| 580 |
+
Phrase translation;
|
| 581 |
+
hypo->GetOutputPhrase(translation);
|
| 582 |
+
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 583 |
+
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
| 584 |
+
const StatefulFeatureFunction *ff = statefulFFs[i];
|
| 585 |
+
if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
|
| 586 |
+
lm->ReportHistoryOrder(out, translation);
|
| 587 |
+
}
|
| 588 |
+
}
|
| 589 |
+
}
|
| 590 |
+
|
| 591 |
+
void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const
|
| 592 |
+
{
|
| 593 |
+
const StaticData &staticData = StaticData::Instance();
|
| 594 |
+
const PARAM_VEC *params;
|
| 595 |
+
|
| 596 |
+
string fileName;
|
| 597 |
+
bool outputNBest = false;
|
| 598 |
+
params = staticData.GetParameter().GetParam("output-word-graph");
|
| 599 |
+
if (params && params->size()) {
|
| 600 |
+
fileName = params->at(0);
|
| 601 |
+
|
| 602 |
+
if (params->size() == 2) {
|
| 603 |
+
outputNBest = Scan<bool>(params->at(1));
|
| 604 |
+
}
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
| 608 |
+
|
| 609 |
+
outputWordGraphStream << "VERSION=1.0" << endl
|
| 610 |
+
<< "UTTERANCE=" << translationId << endl;
|
| 611 |
+
|
| 612 |
+
size_t linkId = 0;
|
| 613 |
+
std::vector < HypothesisStack* >::const_iterator iterStack;
|
| 614 |
+
for (iterStack = ++hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack) {
|
| 615 |
+
const HypothesisStack &stack = **iterStack;
|
| 616 |
+
HypothesisStack::const_iterator iterHypo;
|
| 617 |
+
for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
|
| 618 |
+
const Hypothesis *hypo = *iterHypo;
|
| 619 |
+
OutputWordGraph(outputWordGraphStream, hypo, linkId);
|
| 620 |
+
|
| 621 |
+
if (outputNBest) {
|
| 622 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 623 |
+
if (arcList != NULL) {
|
| 624 |
+
ArcList::const_iterator iterArcList;
|
| 625 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 626 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 627 |
+
OutputWordGraph(outputWordGraphStream, loserHypo, linkId);
|
| 628 |
+
}
|
| 629 |
+
}
|
| 630 |
+
} //if (outputNBest)
|
| 631 |
+
} //for (iterHypo
|
| 632 |
+
} // for (iterStack
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
void Manager::GetSearchGraph(vector<SearchGraphNode>& searchGraph) const
|
| 636 |
+
{
|
| 637 |
+
std::map < int, bool > connected;
|
| 638 |
+
std::map < int, int > forward;
|
| 639 |
+
std::map < int, double > forwardScore;
|
| 640 |
+
|
| 641 |
+
// *** find connected hypotheses ***
|
| 642 |
+
std::vector< const Hypothesis *> connectedList;
|
| 643 |
+
GetConnectedGraph(&connected, &connectedList);
|
| 644 |
+
|
| 645 |
+
// ** compute best forward path for each hypothesis *** //
|
| 646 |
+
|
| 647 |
+
// forward cost of hypotheses on final stack is 0
|
| 648 |
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
| 649 |
+
const HypothesisStack &finalStack = *hypoStackColl.back();
|
| 650 |
+
HypothesisStack::const_iterator iterHypo;
|
| 651 |
+
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
|
| 652 |
+
const Hypothesis *hypo = *iterHypo;
|
| 653 |
+
forwardScore[ hypo->GetId() ] = 0.0f;
|
| 654 |
+
forward[ hypo->GetId() ] = -1;
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
// compete for best forward score of previous hypothesis
|
| 658 |
+
std::vector < HypothesisStack* >::const_iterator iterStack;
|
| 659 |
+
for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack) {
|
| 660 |
+
const HypothesisStack &stack = **iterStack;
|
| 661 |
+
HypothesisStack::const_iterator iterHypo;
|
| 662 |
+
for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
|
| 663 |
+
const Hypothesis *hypo = *iterHypo;
|
| 664 |
+
if (connected.find( hypo->GetId() ) != connected.end()) {
|
| 665 |
+
// make a play for previous hypothesis
|
| 666 |
+
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
| 667 |
+
double fscore = forwardScore[ hypo->GetId() ] +
|
| 668 |
+
hypo->GetScore() - prevHypo->GetScore();
|
| 669 |
+
if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
|
| 670 |
+
|| forwardScore.find( prevHypo->GetId() )->second < fscore) {
|
| 671 |
+
forwardScore[ prevHypo->GetId() ] = fscore;
|
| 672 |
+
forward[ prevHypo->GetId() ] = hypo->GetId();
|
| 673 |
+
}
|
| 674 |
+
// all arcs also make a play
|
| 675 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 676 |
+
if (arcList != NULL) {
|
| 677 |
+
ArcList::const_iterator iterArcList;
|
| 678 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 679 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 680 |
+
// make a play
|
| 681 |
+
const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
|
| 682 |
+
double fscore = forwardScore[ hypo->GetId() ] +
|
| 683 |
+
loserHypo->GetScore() - loserPrevHypo->GetScore();
|
| 684 |
+
if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
|
| 685 |
+
|| forwardScore.find( loserPrevHypo->GetId() )->second < fscore) {
|
| 686 |
+
forwardScore[ loserPrevHypo->GetId() ] = fscore;
|
| 687 |
+
forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
|
| 688 |
+
}
|
| 689 |
+
} // end for arc list
|
| 690 |
+
} // end if arc list empty
|
| 691 |
+
} // end if hypo connected
|
| 692 |
+
} // end for hypo
|
| 693 |
+
} // end for stack
|
| 694 |
+
|
| 695 |
+
// *** output all connected hypotheses *** //
|
| 696 |
+
|
| 697 |
+
connected[ 0 ] = true;
|
| 698 |
+
for (iterStack = hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack) {
|
| 699 |
+
const HypothesisStack &stack = **iterStack;
|
| 700 |
+
HypothesisStack::const_iterator iterHypo;
|
| 701 |
+
for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
|
| 702 |
+
const Hypothesis *hypo = *iterHypo;
|
| 703 |
+
if (connected.find( hypo->GetId() ) != connected.end()) {
|
| 704 |
+
searchGraph.push_back(SearchGraphNode(hypo,NULL,forward[hypo->GetId()],
|
| 705 |
+
forwardScore[hypo->GetId()]));
|
| 706 |
+
|
| 707 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 708 |
+
if (arcList != NULL) {
|
| 709 |
+
ArcList::const_iterator iterArcList;
|
| 710 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 711 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 712 |
+
searchGraph.push_back(SearchGraphNode(loserHypo,hypo,
|
| 713 |
+
forward[hypo->GetId()], forwardScore[hypo->GetId()]));
|
| 714 |
+
}
|
| 715 |
+
} // end if arcList empty
|
| 716 |
+
} // end if connected
|
| 717 |
+
} // end for iterHypo
|
| 718 |
+
} // end for iterStack
|
| 719 |
+
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
void Manager::OutputFeatureWeightsForSLF(std::ostream &outputSearchGraphStream) const
|
| 723 |
+
{
|
| 724 |
+
outputSearchGraphStream.setf(std::ios::fixed);
|
| 725 |
+
outputSearchGraphStream.precision(6);
|
| 726 |
+
|
| 727 |
+
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
| 728 |
+
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 729 |
+
size_t featureIndex = 1;
|
| 730 |
+
for (size_t i = 0; i < sff.size(); ++i) {
|
| 731 |
+
featureIndex = OutputFeatureWeightsForSLF(featureIndex, sff[i], outputSearchGraphStream);
|
| 732 |
+
}
|
| 733 |
+
for (size_t i = 0; i < slf.size(); ++i) {
|
| 734 |
+
/*
|
| 735 |
+
if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
|
| 736 |
+
slf[i]->GetScoreProducerWeightShortName() != "tm" &&
|
| 737 |
+
slf[i]->GetScoreProducerWeightShortName() != "I" &&
|
| 738 |
+
slf[i]->GetScoreProducerWeightShortName() != "g")
|
| 739 |
+
*/
|
| 740 |
+
{
|
| 741 |
+
featureIndex = OutputFeatureWeightsForSLF(featureIndex, slf[i], outputSearchGraphStream);
|
| 742 |
+
}
|
| 743 |
+
}
|
| 744 |
+
const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
|
| 745 |
+
for( size_t i=0; i<pds.size(); i++ ) {
|
| 746 |
+
featureIndex = OutputFeatureWeightsForSLF(featureIndex, pds[i], outputSearchGraphStream);
|
| 747 |
+
}
|
| 748 |
+
const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
|
| 749 |
+
for( size_t i=0; i<gds.size(); i++ ) {
|
| 750 |
+
featureIndex = OutputFeatureWeightsForSLF(featureIndex, gds[i], outputSearchGraphStream);
|
| 751 |
+
}
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
void Manager::OutputFeatureValuesForSLF(const Hypothesis* hypo, bool zeros, std::ostream &outputSearchGraphStream) const
|
| 755 |
+
{
|
| 756 |
+
outputSearchGraphStream.setf(std::ios::fixed);
|
| 757 |
+
outputSearchGraphStream.precision(6);
|
| 758 |
+
|
| 759 |
+
// outputSearchGraphStream << endl;
|
| 760 |
+
// outputSearchGraphStream << (*hypo) << endl;
|
| 761 |
+
// const ScoreComponentCollection& scoreCollection = hypo->GetScoreBreakdown();
|
| 762 |
+
// outputSearchGraphStream << scoreCollection << endl;
|
| 763 |
+
|
| 764 |
+
const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
| 765 |
+
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 766 |
+
size_t featureIndex = 1;
|
| 767 |
+
for (size_t i = 0; i < sff.size(); ++i) {
|
| 768 |
+
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, sff[i], outputSearchGraphStream);
|
| 769 |
+
}
|
| 770 |
+
for (size_t i = 0; i < slf.size(); ++i) {
|
| 771 |
+
/*
|
| 772 |
+
if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
|
| 773 |
+
slf[i]->GetScoreProducerWeightShortName() != "tm" &&
|
| 774 |
+
slf[i]->GetScoreProducerWeightShortName() != "I" &&
|
| 775 |
+
slf[i]->GetScoreProducerWeightShortName() != "g")
|
| 776 |
+
*/
|
| 777 |
+
{
|
| 778 |
+
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, slf[i], outputSearchGraphStream);
|
| 779 |
+
}
|
| 780 |
+
}
|
| 781 |
+
const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
|
| 782 |
+
for( size_t i=0; i<pds.size(); i++ ) {
|
| 783 |
+
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, pds[i], outputSearchGraphStream);
|
| 784 |
+
}
|
| 785 |
+
const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
|
| 786 |
+
for( size_t i=0; i<gds.size(); i++ ) {
|
| 787 |
+
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, gds[i], outputSearchGraphStream);
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
}
|
| 791 |
+
|
| 792 |
+
void Manager::OutputFeatureValuesForHypergraph(const Hypothesis* hypo, std::ostream &outputSearchGraphStream) const
|
| 793 |
+
{
|
| 794 |
+
outputSearchGraphStream.setf(std::ios::fixed);
|
| 795 |
+
outputSearchGraphStream.precision(6);
|
| 796 |
+
ScoreComponentCollection scores = hypo->GetScoreBreakdown();
|
| 797 |
+
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
| 798 |
+
if (prevHypo) {
|
| 799 |
+
scores.MinusEquals(prevHypo->GetScoreBreakdown());
|
| 800 |
+
}
|
| 801 |
+
scores.Save(outputSearchGraphStream, false);
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
|
| 805 |
+
size_t Manager::OutputFeatureWeightsForSLF(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const
|
| 806 |
+
{
|
| 807 |
+
size_t numScoreComps = ff->GetNumScoreComponents();
|
| 808 |
+
if (numScoreComps != 0) {
|
| 809 |
+
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
|
| 810 |
+
for (size_t i = 0; i < numScoreComps; ++i) {
|
| 811 |
+
outputSearchGraphStream << "# " << ff->GetScoreProducerDescription()
|
| 812 |
+
<< " " << ff->GetScoreProducerDescription()
|
| 813 |
+
<< " " << (i+1) << " of " << numScoreComps << endl
|
| 814 |
+
<< "x" << (index+i) << "scale=" << values[i] << endl;
|
| 815 |
+
}
|
| 816 |
+
return index+numScoreComps;
|
| 817 |
+
} else {
|
| 818 |
+
cerr << "Sparse features are not supported when outputting HTK standard lattice format" << endl;
|
| 819 |
+
assert(false);
|
| 820 |
+
return 0;
|
| 821 |
+
}
|
| 822 |
+
}
|
| 823 |
+
|
| 824 |
+
size_t
|
| 825 |
+
Manager::
|
| 826 |
+
OutputFeatureValuesForSLF(size_t index, bool zeros, const Hypothesis* hypo,
|
| 827 |
+
const FeatureFunction* ff, std::ostream &out) const
|
| 828 |
+
{
|
| 829 |
+
const ScoreComponentCollection& scoreCollection = hypo->GetScoreBreakdown();
|
| 830 |
+
vector<float> featureValues = scoreCollection.GetScoresForProducer(ff);
|
| 831 |
+
size_t numScoreComps = featureValues.size();
|
| 832 |
+
for (size_t i = 0; i < numScoreComps; ++i) {
|
| 833 |
+
out << "x" << (index+i) << "=" << ((zeros) ? 0.0 : featureValues[i]) << " ";
|
| 834 |
+
}
|
| 835 |
+
return index + numScoreComps;
|
| 836 |
+
}
|
| 837 |
+
|
| 838 |
+
/**! Output search graph in hypergraph format of Kenneth Heafield's lazy hypergraph decoder */
|
| 839 |
+
void
|
| 840 |
+
Manager::
|
| 841 |
+
OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
|
| 842 |
+
{
|
| 843 |
+
|
| 844 |
+
VERBOSE(2,"Getting search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl)
|
| 845 |
+
|
| 846 |
+
vector<SearchGraphNode> searchGraph;
|
| 847 |
+
GetSearchGraph(searchGraph);
|
| 848 |
+
|
| 849 |
+
|
| 850 |
+
map<int,int> mosesIDToHypergraphID;
|
| 851 |
+
// map<int,int> hypergraphIDToMosesID;
|
| 852 |
+
set<int> terminalNodes;
|
| 853 |
+
multimap<int,int> hypergraphIDToArcs;
|
| 854 |
+
|
| 855 |
+
VERBOSE(2,"Gathering information about search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl)
|
| 856 |
+
|
| 857 |
+
long numNodes = 0;
|
| 858 |
+
long endNode = 0;
|
| 859 |
+
{
|
| 860 |
+
long hypergraphHypothesisID = 0;
|
| 861 |
+
for (size_t arcNumber = 0, size=searchGraph.size(); arcNumber < size; ++arcNumber) {
|
| 862 |
+
|
| 863 |
+
// Get an id number for the previous hypothesis
|
| 864 |
+
const Hypothesis *prevHypo = searchGraph[arcNumber].hypo->GetPrevHypo();
|
| 865 |
+
if (prevHypo!=NULL) {
|
| 866 |
+
int mosesPrevHypothesisID = prevHypo->GetId();
|
| 867 |
+
if (mosesIDToHypergraphID.count(mosesPrevHypothesisID) == 0) {
|
| 868 |
+
mosesIDToHypergraphID[mosesPrevHypothesisID] = hypergraphHypothesisID;
|
| 869 |
+
// hypergraphIDToMosesID[hypergraphHypothesisID] = mosesPrevHypothesisID;
|
| 870 |
+
hypergraphHypothesisID += 1;
|
| 871 |
+
}
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
// Get an id number for this hypothesis
|
| 875 |
+
int mosesHypothesisID;
|
| 876 |
+
if (searchGraph[arcNumber].recombinationHypo) {
|
| 877 |
+
mosesHypothesisID = searchGraph[arcNumber].recombinationHypo->GetId();
|
| 878 |
+
} else {
|
| 879 |
+
mosesHypothesisID = searchGraph[arcNumber].hypo->GetId();
|
| 880 |
+
}
|
| 881 |
+
|
| 882 |
+
if (mosesIDToHypergraphID.count(mosesHypothesisID) == 0) {
|
| 883 |
+
|
| 884 |
+
mosesIDToHypergraphID[mosesHypothesisID] = hypergraphHypothesisID;
|
| 885 |
+
// hypergraphIDToMosesID[hypergraphHypothesisID] = mosesHypothesisID;
|
| 886 |
+
|
| 887 |
+
bool terminalNode = (searchGraph[arcNumber].forward == -1);
|
| 888 |
+
if (terminalNode) {
|
| 889 |
+
// Final arc to end node, representing the end of the sentence </s>
|
| 890 |
+
terminalNodes.insert(hypergraphHypothesisID);
|
| 891 |
+
}
|
| 892 |
+
|
| 893 |
+
hypergraphHypothesisID += 1;
|
| 894 |
+
}
|
| 895 |
+
|
| 896 |
+
// Record that this arc ends at this node
|
| 897 |
+
hypergraphIDToArcs.insert(pair<int,int>(mosesIDToHypergraphID[mosesHypothesisID],arcNumber));
|
| 898 |
+
|
| 899 |
+
}
|
| 900 |
+
|
| 901 |
+
// Unique end node
|
| 902 |
+
endNode = hypergraphHypothesisID;
|
| 903 |
+
// mosesIDToHypergraphID[hypergraphHypothesisID] = hypergraphHypothesisID;
|
| 904 |
+
numNodes = endNode + 1;
|
| 905 |
+
|
| 906 |
+
}
|
| 907 |
+
|
| 908 |
+
|
| 909 |
+
long numArcs = searchGraph.size() + terminalNodes.size();
|
| 910 |
+
|
| 911 |
+
//Header
|
| 912 |
+
outputSearchGraphStream << "# target ||| features ||| source-covered" << endl;
|
| 913 |
+
|
| 914 |
+
// Print number of nodes and arcs
|
| 915 |
+
outputSearchGraphStream << numNodes << " " << numArcs << endl;
|
| 916 |
+
|
| 917 |
+
VERBOSE(2,"Search graph to output as hypergraph for sentence " << m_source.GetTranslationId()
|
| 918 |
+
<< " contains " << numArcs << " arcs and " << numNodes << " nodes" << std::endl)
|
| 919 |
+
|
| 920 |
+
VERBOSE(2,"Outputting search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl)
|
| 921 |
+
|
| 922 |
+
|
| 923 |
+
for (int hypergraphHypothesisID=0; hypergraphHypothesisID < endNode; hypergraphHypothesisID+=1) {
|
| 924 |
+
if (hypergraphHypothesisID % 100000 == 0) {
|
| 925 |
+
VERBOSE(2,"Processed " << hypergraphHypothesisID << " of " << numNodes << " hypergraph nodes for sentence " << m_source.GetTranslationId() << std::endl);
|
| 926 |
+
}
|
| 927 |
+
// int mosesID = hypergraphIDToMosesID[hypergraphHypothesisID];
|
| 928 |
+
size_t count = hypergraphIDToArcs.count(hypergraphHypothesisID);
|
| 929 |
+
// VERBOSE(2,"Hypergraph node " << hypergraphHypothesisID << " has " << count << " incoming arcs" << std::endl)
|
| 930 |
+
if (count > 0) {
|
| 931 |
+
outputSearchGraphStream << "# node " << hypergraphHypothesisID << endl;
|
| 932 |
+
outputSearchGraphStream << count << "\n";
|
| 933 |
+
|
| 934 |
+
pair<multimap<int,int>::iterator, multimap<int,int>::iterator> range =
|
| 935 |
+
hypergraphIDToArcs.equal_range(hypergraphHypothesisID);
|
| 936 |
+
for (multimap<int,int>::iterator it=range.first; it!=range.second; ++it) {
|
| 937 |
+
int lineNumber = (*it).second;
|
| 938 |
+
const Hypothesis *thisHypo = searchGraph[lineNumber].hypo;
|
| 939 |
+
int mosesHypothesisID;// = thisHypo->GetId();
|
| 940 |
+
if (searchGraph[lineNumber].recombinationHypo) {
|
| 941 |
+
mosesHypothesisID = searchGraph[lineNumber].recombinationHypo->GetId();
|
| 942 |
+
} else {
|
| 943 |
+
mosesHypothesisID = searchGraph[lineNumber].hypo->GetId();
|
| 944 |
+
}
|
| 945 |
+
// int actualHypergraphHypothesisID = mosesIDToHypergraphID[mosesHypothesisID];
|
| 946 |
+
UTIL_THROW_IF2(
|
| 947 |
+
(hypergraphHypothesisID != mosesIDToHypergraphID[mosesHypothesisID]),
|
| 948 |
+
"Error while writing search lattice as hypergraph for sentence " << m_source.GetTranslationId() << ". " <<
|
| 949 |
+
"Moses node " << mosesHypothesisID << " was expected to have hypergraph id " << hypergraphHypothesisID <<
|
| 950 |
+
", but actually had hypergraph id " << mosesIDToHypergraphID[mosesHypothesisID] <<
|
| 951 |
+
". There are " << numNodes << " nodes in the search lattice."
|
| 952 |
+
);
|
| 953 |
+
|
| 954 |
+
const Hypothesis *prevHypo = thisHypo->GetPrevHypo();
|
| 955 |
+
if (prevHypo==NULL) {
|
| 956 |
+
// VERBOSE(2,"Hypergraph node " << hypergraphHypothesisID << " start of sentence" << std::endl)
|
| 957 |
+
outputSearchGraphStream << "<s> ||| ||| 0\n";
|
| 958 |
+
} else {
|
| 959 |
+
int startNode = mosesIDToHypergraphID[prevHypo->GetId()];
|
| 960 |
+
// VERBOSE(2,"Hypergraph node " << hypergraphHypothesisID << " has parent node " << startNode << std::endl)
|
| 961 |
+
UTIL_THROW_IF2(
|
| 962 |
+
(startNode >= hypergraphHypothesisID),
|
| 963 |
+
"Error while writing search lattice as hypergraph for sentence" << m_source.GetTranslationId() << ". " <<
|
| 964 |
+
"The nodes must be output in topological order. The code attempted to violate this restriction."
|
| 965 |
+
);
|
| 966 |
+
|
| 967 |
+
const TargetPhrase &targetPhrase = thisHypo->GetCurrTargetPhrase();
|
| 968 |
+
int targetWordCount = targetPhrase.GetSize();
|
| 969 |
+
|
| 970 |
+
outputSearchGraphStream << "[" << startNode << "] ";
|
| 971 |
+
for (int targetWordIndex=0; targetWordIndex<targetWordCount; targetWordIndex+=1) {
|
| 972 |
+
outputSearchGraphStream << targetPhrase.GetWord(targetWordIndex)[0]->GetString() << " ";
|
| 973 |
+
}
|
| 974 |
+
outputSearchGraphStream << " ||| ";
|
| 975 |
+
OutputFeatureValuesForHypergraph(thisHypo, outputSearchGraphStream);
|
| 976 |
+
outputSearchGraphStream << " ||| " << thisHypo->GetWordsBitmap().GetNumWordsCovered();
|
| 977 |
+
outputSearchGraphStream << "\n";
|
| 978 |
+
}
|
| 979 |
+
}
|
| 980 |
+
}
|
| 981 |
+
}
|
| 982 |
+
|
| 983 |
+
// Print node and arc(s) for end of sentence </s>
|
| 984 |
+
outputSearchGraphStream << "# node " << endNode << endl;
|
| 985 |
+
outputSearchGraphStream << terminalNodes.size() << "\n";
|
| 986 |
+
for (set<int>::iterator it=terminalNodes.begin(); it!=terminalNodes.end(); ++it) {
|
| 987 |
+
outputSearchGraphStream << "[" << (*it) << "] </s> ||| ||| " << GetSource().GetSize() << "\n";
|
| 988 |
+
}
|
| 989 |
+
|
| 990 |
+
}
|
| 991 |
+
|
| 992 |
+
|
| 993 |
+
/**! Output search graph in HTK standard lattice format (SLF) */
|
| 994 |
+
void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSearchGraphStream) const
|
| 995 |
+
{
|
| 996 |
+
|
| 997 |
+
vector<SearchGraphNode> searchGraph;
|
| 998 |
+
GetSearchGraph(searchGraph);
|
| 999 |
+
|
| 1000 |
+
long numArcs = 0;
|
| 1001 |
+
long numNodes = 0;
|
| 1002 |
+
|
| 1003 |
+
map<int,int> nodes;
|
| 1004 |
+
set<int> terminalNodes;
|
| 1005 |
+
|
| 1006 |
+
// Unique start node
|
| 1007 |
+
nodes[0] = 0;
|
| 1008 |
+
|
| 1009 |
+
for (size_t arcNumber = 0; arcNumber < searchGraph.size(); ++arcNumber) {
|
| 1010 |
+
|
| 1011 |
+
int targetWordCount = searchGraph[arcNumber].hypo->GetCurrTargetPhrase().GetSize();
|
| 1012 |
+
numArcs += targetWordCount;
|
| 1013 |
+
|
| 1014 |
+
int hypothesisID = searchGraph[arcNumber].hypo->GetId();
|
| 1015 |
+
if (nodes.count(hypothesisID) == 0) {
|
| 1016 |
+
|
| 1017 |
+
numNodes += targetWordCount;
|
| 1018 |
+
nodes[hypothesisID] = numNodes;
|
| 1019 |
+
//numNodes += 1;
|
| 1020 |
+
|
| 1021 |
+
bool terminalNode = (searchGraph[arcNumber].forward == -1);
|
| 1022 |
+
if (terminalNode) {
|
| 1023 |
+
numArcs += 1;
|
| 1024 |
+
}
|
| 1025 |
+
}
|
| 1026 |
+
|
| 1027 |
+
}
|
| 1028 |
+
numNodes += 1;
|
| 1029 |
+
|
| 1030 |
+
// Unique end node
|
| 1031 |
+
nodes[numNodes] = numNodes;
|
| 1032 |
+
|
| 1033 |
+
outputSearchGraphStream << "UTTERANCE=Sentence_" << translationId << endl;
|
| 1034 |
+
outputSearchGraphStream << "VERSION=1.1" << endl;
|
| 1035 |
+
outputSearchGraphStream << "base=2.71828182845905" << endl;
|
| 1036 |
+
outputSearchGraphStream << "NODES=" << (numNodes+1) << endl;
|
| 1037 |
+
outputSearchGraphStream << "LINKS=" << numArcs << endl;
|
| 1038 |
+
|
| 1039 |
+
OutputFeatureWeightsForSLF(outputSearchGraphStream);
|
| 1040 |
+
|
| 1041 |
+
for (size_t arcNumber = 0, lineNumber = 0; lineNumber < searchGraph.size(); ++lineNumber) {
|
| 1042 |
+
const Hypothesis *thisHypo = searchGraph[lineNumber].hypo;
|
| 1043 |
+
const Hypothesis *prevHypo = thisHypo->GetPrevHypo();
|
| 1044 |
+
if (prevHypo) {
|
| 1045 |
+
|
| 1046 |
+
int startNode = nodes[prevHypo->GetId()];
|
| 1047 |
+
int endNode = nodes[thisHypo->GetId()];
|
| 1048 |
+
bool terminalNode = (searchGraph[lineNumber].forward == -1);
|
| 1049 |
+
const TargetPhrase &targetPhrase = thisHypo->GetCurrTargetPhrase();
|
| 1050 |
+
int targetWordCount = targetPhrase.GetSize();
|
| 1051 |
+
|
| 1052 |
+
for (int targetWordIndex=0; targetWordIndex<targetWordCount; targetWordIndex+=1) {
|
| 1053 |
+
int x = (targetWordCount-targetWordIndex);
|
| 1054 |
+
|
| 1055 |
+
outputSearchGraphStream << "J=" << arcNumber;
|
| 1056 |
+
|
| 1057 |
+
if (targetWordIndex==0) {
|
| 1058 |
+
outputSearchGraphStream << " S=" << startNode;
|
| 1059 |
+
} else {
|
| 1060 |
+
outputSearchGraphStream << " S=" << endNode - x;
|
| 1061 |
+
}
|
| 1062 |
+
|
| 1063 |
+
outputSearchGraphStream << " E=" << endNode - (x-1)
|
| 1064 |
+
<< " W=" << targetPhrase.GetWord(targetWordIndex);
|
| 1065 |
+
|
| 1066 |
+
OutputFeatureValuesForSLF(thisHypo, (targetWordIndex>0), outputSearchGraphStream);
|
| 1067 |
+
|
| 1068 |
+
outputSearchGraphStream << endl;
|
| 1069 |
+
|
| 1070 |
+
arcNumber += 1;
|
| 1071 |
+
}
|
| 1072 |
+
|
| 1073 |
+
if (terminalNode && terminalNodes.count(endNode) == 0) {
|
| 1074 |
+
terminalNodes.insert(endNode);
|
| 1075 |
+
outputSearchGraphStream << "J=" << arcNumber
|
| 1076 |
+
<< " S=" << endNode
|
| 1077 |
+
<< " E=" << numNodes
|
| 1078 |
+
<< endl;
|
| 1079 |
+
arcNumber += 1;
|
| 1080 |
+
}
|
| 1081 |
+
}
|
| 1082 |
+
}
|
| 1083 |
+
|
| 1084 |
+
}
|
| 1085 |
+
|
| 1086 |
+
|
| 1087 |
+
void
|
| 1088 |
+
OutputSearchNode(AllOptions const& opts, long translationId,
|
| 1089 |
+
std::ostream &out,
|
| 1090 |
+
SearchGraphNode const& searchNode)
|
| 1091 |
+
{
|
| 1092 |
+
const vector<FactorType> &outputFactorOrder = opts.output.factor_order;
|
| 1093 |
+
bool extendedFormat = opts.output.SearchGraphExtended.size();
|
| 1094 |
+
out << translationId;
|
| 1095 |
+
|
| 1096 |
+
// special case: initial hypothesis
|
| 1097 |
+
if ( searchNode.hypo->GetId() == 0 ) {
|
| 1098 |
+
out << " hyp=0 stack=0";
|
| 1099 |
+
if (extendedFormat) {
|
| 1100 |
+
out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore;
|
| 1101 |
+
}
|
| 1102 |
+
out << endl;
|
| 1103 |
+
return;
|
| 1104 |
+
}
|
| 1105 |
+
|
| 1106 |
+
const Hypothesis *prevHypo = searchNode.hypo->GetPrevHypo();
|
| 1107 |
+
|
| 1108 |
+
// output in traditional format
|
| 1109 |
+
if (!extendedFormat) {
|
| 1110 |
+
out << " hyp=" << searchNode.hypo->GetId()
|
| 1111 |
+
<< " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered()
|
| 1112 |
+
<< " back=" << prevHypo->GetId()
|
| 1113 |
+
<< " score=" << searchNode.hypo->GetScore()
|
| 1114 |
+
<< " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore());
|
| 1115 |
+
|
| 1116 |
+
if (searchNode.recombinationHypo != NULL)
|
| 1117 |
+
out << " recombined=" << searchNode.recombinationHypo->GetId();
|
| 1118 |
+
|
| 1119 |
+
out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore
|
| 1120 |
+
<< " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos()
|
| 1121 |
+
<< "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos()
|
| 1122 |
+
<< " out=" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder)
|
| 1123 |
+
<< endl;
|
| 1124 |
+
return;
|
| 1125 |
+
}
|
| 1126 |
+
|
| 1127 |
+
out << " hyp=" << searchNode.hypo->GetId();
|
| 1128 |
+
out << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered()
|
| 1129 |
+
<< " back=" << prevHypo->GetId()
|
| 1130 |
+
<< " score=" << searchNode.hypo->GetScore()
|
| 1131 |
+
<< " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore());
|
| 1132 |
+
|
| 1133 |
+
if (searchNode.recombinationHypo != NULL)
|
| 1134 |
+
out << " recombined=" << searchNode.recombinationHypo->GetId();
|
| 1135 |
+
|
| 1136 |
+
out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore
|
| 1137 |
+
<< " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos()
|
| 1138 |
+
<< "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos();
|
| 1139 |
+
|
| 1140 |
+
// Modified so that -osgx is a superset of -osg (GST Oct 2011)
|
| 1141 |
+
ScoreComponentCollection scoreBreakdown = searchNode.hypo->GetScoreBreakdown();
|
| 1142 |
+
scoreBreakdown.MinusEquals( prevHypo->GetScoreBreakdown() );
|
| 1143 |
+
out << " scores=\"" << scoreBreakdown << "\""
|
| 1144 |
+
<< " out=\"" << searchNode.hypo->GetSourcePhraseStringRep()
|
| 1145 |
+
<< "|" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << "\"" << endl;
|
| 1146 |
+
}
|
| 1147 |
+
|
| 1148 |
+
void Manager::GetConnectedGraph(
|
| 1149 |
+
std::map< int, bool >* pConnected,
|
| 1150 |
+
std::vector< const Hypothesis* >* pConnectedList) const
|
| 1151 |
+
{
|
| 1152 |
+
std::map < int, bool >& connected = *pConnected;
|
| 1153 |
+
std::vector< const Hypothesis *>& connectedList = *pConnectedList;
|
| 1154 |
+
|
| 1155 |
+
// start with the ones in the final stack
|
| 1156 |
+
const std::vector < HypothesisStack* > &hypoStackColl
|
| 1157 |
+
= m_search->GetHypothesisStacks();
|
| 1158 |
+
const HypothesisStack &finalStack = *hypoStackColl.back();
|
| 1159 |
+
HypothesisStack::const_iterator iterHypo;
|
| 1160 |
+
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
|
| 1161 |
+
const Hypothesis *hypo = *iterHypo;
|
| 1162 |
+
connected[ hypo->GetId() ] = true;
|
| 1163 |
+
connectedList.push_back( hypo );
|
| 1164 |
+
}
|
| 1165 |
+
// move back from known connected hypotheses
|
| 1166 |
+
for(size_t i=0; i<connectedList.size(); i++) {
|
| 1167 |
+
const Hypothesis *hypo = connectedList[i];
|
| 1168 |
+
|
| 1169 |
+
// add back pointer
|
| 1170 |
+
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
| 1171 |
+
if (prevHypo && prevHypo->GetId() > 0 // don't add empty hypothesis
|
| 1172 |
+
&& connected.find( prevHypo->GetId() ) == connected.end()) { // don't add already added
|
| 1173 |
+
connected[ prevHypo->GetId() ] = true;
|
| 1174 |
+
connectedList.push_back( prevHypo );
|
| 1175 |
+
}
|
| 1176 |
+
|
| 1177 |
+
// add arcs
|
| 1178 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 1179 |
+
if (arcList != NULL) {
|
| 1180 |
+
ArcList::const_iterator iterArcList;
|
| 1181 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 1182 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 1183 |
+
if (connected.find( loserHypo->GetId() ) == connected.end()) { // don't add already added
|
| 1184 |
+
connected[ loserHypo->GetId() ] = true;
|
| 1185 |
+
connectedList.push_back( loserHypo );
|
| 1186 |
+
}
|
| 1187 |
+
}
|
| 1188 |
+
}
|
| 1189 |
+
}
|
| 1190 |
+
}
|
| 1191 |
+
|
| 1192 |
+
void Manager::GetWinnerConnectedGraph(
|
| 1193 |
+
std::map< int, bool >* pConnected,
|
| 1194 |
+
std::vector< const Hypothesis* >* pConnectedList) const
|
| 1195 |
+
{
|
| 1196 |
+
std::map < int, bool >& connected = *pConnected;
|
| 1197 |
+
std::vector< const Hypothesis *>& connectedList = *pConnectedList;
|
| 1198 |
+
|
| 1199 |
+
// start with the ones in the final stack
|
| 1200 |
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
| 1201 |
+
const HypothesisStack &finalStack = *hypoStackColl.back();
|
| 1202 |
+
HypothesisStack::const_iterator iterHypo;
|
| 1203 |
+
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
|
| 1204 |
+
const Hypothesis *hypo = *iterHypo;
|
| 1205 |
+
connected[ hypo->GetId() ] = true;
|
| 1206 |
+
connectedList.push_back( hypo );
|
| 1207 |
+
}
|
| 1208 |
+
|
| 1209 |
+
// move back from known connected hypotheses
|
| 1210 |
+
for(size_t i=0; i<connectedList.size(); i++) {
|
| 1211 |
+
const Hypothesis *hypo = connectedList[i];
|
| 1212 |
+
|
| 1213 |
+
// add back pointer
|
| 1214 |
+
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
| 1215 |
+
if (prevHypo->GetId() > 0 // don't add empty hypothesis
|
| 1216 |
+
&& connected.find( prevHypo->GetId() ) == connected.end()) { // don't add already added
|
| 1217 |
+
connected[ prevHypo->GetId() ] = true;
|
| 1218 |
+
connectedList.push_back( prevHypo );
|
| 1219 |
+
}
|
| 1220 |
+
|
| 1221 |
+
// add arcs
|
| 1222 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 1223 |
+
if (arcList != NULL) {
|
| 1224 |
+
ArcList::const_iterator iterArcList;
|
| 1225 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 1226 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 1227 |
+
if (connected.find( loserHypo->GetPrevHypo()->GetId() ) == connected.end() && loserHypo->GetPrevHypo()->GetId() > 0) { // don't add already added & don't add hyp 0
|
| 1228 |
+
connected[ loserHypo->GetPrevHypo()->GetId() ] = true;
|
| 1229 |
+
connectedList.push_back( loserHypo->GetPrevHypo() );
|
| 1230 |
+
}
|
| 1231 |
+
}
|
| 1232 |
+
}
|
| 1233 |
+
}
|
| 1234 |
+
}
|
| 1235 |
+
|
| 1236 |
+
|
| 1237 |
+
#ifdef HAVE_PROTOBUF
|
| 1238 |
+
|
| 1239 |
+
void SerializeEdgeInfo(const Hypothesis* hypo, hgmert::Hypergraph_Edge* edge)
|
| 1240 |
+
{
|
| 1241 |
+
hgmert::Rule* rule = edge->mutable_rule();
|
| 1242 |
+
hypo->GetCurrTargetPhrase().WriteToRulePB(rule);
|
| 1243 |
+
const Hypothesis* prev = hypo->GetPrevHypo();
|
| 1244 |
+
// if the feature values are empty, they default to 0
|
| 1245 |
+
if (!prev) return;
|
| 1246 |
+
// score breakdown is an aggregate (forward) quantity, but the exported
|
| 1247 |
+
// graph object just wants the feature values on the edges
|
| 1248 |
+
const ScoreComponentCollection& scores = hypo->GetScoreBreakdown();
|
| 1249 |
+
const ScoreComponentCollection& pscores = prev->GetScoreBreakdown();
|
| 1250 |
+
for (unsigned int i = 0; i < scores.size(); ++i)
|
| 1251 |
+
edge->add_feature_values((scores[i] - pscores[i]) * -1.0);
|
| 1252 |
+
}
|
| 1253 |
+
|
| 1254 |
+
hgmert::Hypergraph_Node* GetHGNode(
|
| 1255 |
+
const Hypothesis* hypo,
|
| 1256 |
+
std::map< int, int>* i2hgnode,
|
| 1257 |
+
hgmert::Hypergraph* hg,
|
| 1258 |
+
int* hgNodeIdx)
|
| 1259 |
+
{
|
| 1260 |
+
hgmert::Hypergraph_Node* hgnode;
|
| 1261 |
+
std::map < int, int >::iterator idxi = i2hgnode->find(hypo->GetId());
|
| 1262 |
+
if (idxi == i2hgnode->end()) {
|
| 1263 |
+
*hgNodeIdx = ((*i2hgnode)[hypo->GetId()] = hg->nodes_size());
|
| 1264 |
+
hgnode = hg->add_nodes();
|
| 1265 |
+
} else {
|
| 1266 |
+
*hgNodeIdx = idxi->second;
|
| 1267 |
+
hgnode = hg->mutable_nodes(*hgNodeIdx);
|
| 1268 |
+
}
|
| 1269 |
+
return hgnode;
|
| 1270 |
+
}
|
| 1271 |
+
|
| 1272 |
+
void Manager::SerializeSearchGraphPB(
|
| 1273 |
+
long translationId,
|
| 1274 |
+
std::ostream& outputStream) const
|
| 1275 |
+
{
|
| 1276 |
+
using namespace hgmert;
|
| 1277 |
+
std::map < int, bool > connected;
|
| 1278 |
+
std::map < int, int > i2hgnode;
|
| 1279 |
+
std::vector< const Hypothesis *> connectedList;
|
| 1280 |
+
GetConnectedGraph(&connected, &connectedList);
|
| 1281 |
+
connected[ 0 ] = true;
|
| 1282 |
+
Hypergraph hg;
|
| 1283 |
+
hg.set_is_sorted(false);
|
| 1284 |
+
int num_feats = (*m_search->GetHypothesisStacks().back()->begin())->GetScoreBreakdown().size();
|
| 1285 |
+
hg.set_num_features(num_feats);
|
| 1286 |
+
StaticData::Instance().GetScoreIndexManager().SerializeFeatureNamesToPB(&hg);
|
| 1287 |
+
Hypergraph_Node* goal = hg.add_nodes(); // idx=0 goal node must have idx 0
|
| 1288 |
+
Hypergraph_Node* source = hg.add_nodes(); // idx=1
|
| 1289 |
+
i2hgnode[-1] = 1; // source node
|
| 1290 |
+
const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
|
| 1291 |
+
const HypothesisStack &finalStack = *hypoStackColl.back();
|
| 1292 |
+
for (std::vector < HypothesisStack* >::const_iterator iterStack = hypoStackColl.begin();
|
| 1293 |
+
iterStack != hypoStackColl.end() ; ++iterStack) {
|
| 1294 |
+
const HypothesisStack &stack = **iterStack;
|
| 1295 |
+
HypothesisStack::const_iterator iterHypo;
|
| 1296 |
+
|
| 1297 |
+
for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
|
| 1298 |
+
const Hypothesis *hypo = *iterHypo;
|
| 1299 |
+
bool is_goal = hypo->GetWordsBitmap().IsComplete();
|
| 1300 |
+
if (connected.find( hypo->GetId() ) != connected.end()) {
|
| 1301 |
+
int headNodeIdx;
|
| 1302 |
+
Hypergraph_Node* headNode = GetHGNode(hypo, &i2hgnode, &hg, &headNodeIdx);
|
| 1303 |
+
if (is_goal) {
|
| 1304 |
+
Hypergraph_Edge* ge = hg.add_edges();
|
| 1305 |
+
ge->set_head_node(0); // goal
|
| 1306 |
+
ge->add_tail_nodes(headNodeIdx);
|
| 1307 |
+
ge->mutable_rule()->add_trg_words("[X,1]");
|
| 1308 |
+
}
|
| 1309 |
+
Hypergraph_Edge* edge = hg.add_edges();
|
| 1310 |
+
SerializeEdgeInfo(hypo, edge);
|
| 1311 |
+
edge->set_head_node(headNodeIdx);
|
| 1312 |
+
const Hypothesis* prev = hypo->GetPrevHypo();
|
| 1313 |
+
int tailNodeIdx = 1; // source
|
| 1314 |
+
if (prev)
|
| 1315 |
+
tailNodeIdx = i2hgnode.find(prev->GetId())->second;
|
| 1316 |
+
edge->add_tail_nodes(tailNodeIdx);
|
| 1317 |
+
|
| 1318 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 1319 |
+
if (arcList != NULL) {
|
| 1320 |
+
ArcList::const_iterator iterArcList;
|
| 1321 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 1322 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 1323 |
+
UTIL_THROW_IF2(!connected[loserHypo->GetId()],
|
| 1324 |
+
"Hypothesis " << loserHypo->GetId() << " is not connected");
|
| 1325 |
+
Hypergraph_Edge* edge = hg.add_edges();
|
| 1326 |
+
SerializeEdgeInfo(loserHypo, edge);
|
| 1327 |
+
edge->set_head_node(headNodeIdx);
|
| 1328 |
+
tailNodeIdx = i2hgnode.find(loserHypo->GetPrevHypo()->GetId())->second;
|
| 1329 |
+
edge->add_tail_nodes(tailNodeIdx);
|
| 1330 |
+
}
|
| 1331 |
+
} // end if arcList empty
|
| 1332 |
+
} // end if connected
|
| 1333 |
+
} // end for iterHypo
|
| 1334 |
+
} // end for iterStack
|
| 1335 |
+
hg.SerializeToOstream(&outputStream);
|
| 1336 |
+
}
|
| 1337 |
+
#endif
|
| 1338 |
+
|
| 1339 |
+
void
|
| 1340 |
+
Manager::
|
| 1341 |
+
OutputSearchGraph(long translationId, std::ostream &out) const
|
| 1342 |
+
{
|
| 1343 |
+
vector<SearchGraphNode> searchGraph;
|
| 1344 |
+
GetSearchGraph(searchGraph);
|
| 1345 |
+
for (size_t i = 0; i < searchGraph.size(); ++i) {
|
| 1346 |
+
OutputSearchNode(*options(),translationId,out,searchGraph[i]);
|
| 1347 |
+
}
|
| 1348 |
+
}
|
| 1349 |
+
|
| 1350 |
+
void
|
| 1351 |
+
Manager::
|
| 1352 |
+
GetForwardBackwardSearchGraph
|
| 1353 |
+
( std::map< int, bool >* pConnected,
|
| 1354 |
+
std::vector<Hypothesis const* >* pConnectedList,
|
| 1355 |
+
std::map<Hypothesis const*, set<Hypothesis const*> >* pOutgoingHyps,
|
| 1356 |
+
vector< float>* pFwdBwdScores) const
|
| 1357 |
+
{
|
| 1358 |
+
std::map < int, bool > &connected = *pConnected;
|
| 1359 |
+
std::vector< const Hypothesis *>& connectedList = *pConnectedList;
|
| 1360 |
+
std::map < int, int > forward;
|
| 1361 |
+
std::map < int, double > forwardScore;
|
| 1362 |
+
|
| 1363 |
+
std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps
|
| 1364 |
+
= *pOutgoingHyps;
|
| 1365 |
+
vector< float> & estimatedScores = *pFwdBwdScores;
|
| 1366 |
+
|
| 1367 |
+
// *** find connected hypotheses ***
|
| 1368 |
+
GetWinnerConnectedGraph(&connected, &connectedList);
|
| 1369 |
+
|
| 1370 |
+
// ** compute best forward path for each hypothesis *** //
|
| 1371 |
+
|
| 1372 |
+
// forward cost of hypotheses on final stack is 0
|
| 1373 |
+
const std::vector < HypothesisStack* > &hypoStackColl
|
| 1374 |
+
= m_search->GetHypothesisStacks();
|
| 1375 |
+
const HypothesisStack &finalStack = *hypoStackColl.back();
|
| 1376 |
+
HypothesisStack::const_iterator iterHypo;
|
| 1377 |
+
for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
|
| 1378 |
+
const Hypothesis *hypo = *iterHypo;
|
| 1379 |
+
forwardScore[ hypo->GetId() ] = 0.0f;
|
| 1380 |
+
forward[ hypo->GetId() ] = -1;
|
| 1381 |
+
}
|
| 1382 |
+
|
| 1383 |
+
// compete for best forward score of previous hypothesis
|
| 1384 |
+
std::vector < HypothesisStack* >::const_iterator iterStack;
|
| 1385 |
+
for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack) {
|
| 1386 |
+
const HypothesisStack &stack = **iterStack;
|
| 1387 |
+
HypothesisStack::const_iterator iterHypo;
|
| 1388 |
+
for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
|
| 1389 |
+
const Hypothesis *hypo = *iterHypo;
|
| 1390 |
+
if (connected.find( hypo->GetId() ) != connected.end()) {
|
| 1391 |
+
// make a play for previous hypothesis
|
| 1392 |
+
const Hypothesis *prevHypo = hypo->GetPrevHypo();
|
| 1393 |
+
double fscore = forwardScore[ hypo->GetId() ] +
|
| 1394 |
+
hypo->GetScore() - prevHypo->GetScore();
|
| 1395 |
+
if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
|
| 1396 |
+
|| forwardScore.find( prevHypo->GetId() )->second < fscore) {
|
| 1397 |
+
forwardScore[ prevHypo->GetId() ] = fscore;
|
| 1398 |
+
forward[ prevHypo->GetId() ] = hypo->GetId();
|
| 1399 |
+
}
|
| 1400 |
+
//store outgoing info
|
| 1401 |
+
outgoingHyps[prevHypo].insert(hypo);
|
| 1402 |
+
|
| 1403 |
+
// all arcs also make a play
|
| 1404 |
+
const ArcList *arcList = hypo->GetArcList();
|
| 1405 |
+
if (arcList != NULL) {
|
| 1406 |
+
ArcList::const_iterator iterArcList;
|
| 1407 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 1408 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 1409 |
+
// make a play
|
| 1410 |
+
const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
|
| 1411 |
+
double fscore = forwardScore[ hypo->GetId() ] +
|
| 1412 |
+
loserHypo->GetScore() - loserPrevHypo->GetScore();
|
| 1413 |
+
if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
|
| 1414 |
+
|| forwardScore.find( loserPrevHypo->GetId() )->second < fscore) {
|
| 1415 |
+
forwardScore[ loserPrevHypo->GetId() ] = fscore;
|
| 1416 |
+
forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
|
| 1417 |
+
}
|
| 1418 |
+
//store outgoing info
|
| 1419 |
+
outgoingHyps[loserPrevHypo].insert(hypo);
|
| 1420 |
+
|
| 1421 |
+
|
| 1422 |
+
} // end for arc list
|
| 1423 |
+
} // end if arc list empty
|
| 1424 |
+
} // end if hypo connected
|
| 1425 |
+
} // end for hypo
|
| 1426 |
+
} // end for stack
|
| 1427 |
+
|
| 1428 |
+
for (std::vector< const Hypothesis *>::iterator it = connectedList.begin(); it != connectedList.end(); ++it) {
|
| 1429 |
+
float estimatedScore = (*it)->GetScore() + forwardScore[(*it)->GetId()];
|
| 1430 |
+
estimatedScores.push_back(estimatedScore);
|
| 1431 |
+
}
|
| 1432 |
+
}
|
| 1433 |
+
|
| 1434 |
+
|
| 1435 |
+
const Hypothesis *Manager::GetBestHypothesis() const
|
| 1436 |
+
{
|
| 1437 |
+
return m_search->GetBestHypothesis();
|
| 1438 |
+
}
|
| 1439 |
+
|
| 1440 |
+
int Manager::GetNextHypoId()
|
| 1441 |
+
{
|
| 1442 |
+
GetSentenceStats().AddCreated(); // count created hypotheses
|
| 1443 |
+
return m_hypoId++;
|
| 1444 |
+
}
|
| 1445 |
+
|
| 1446 |
+
void Manager::ResetSentenceStats(const InputType& source)
|
| 1447 |
+
{
|
| 1448 |
+
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
| 1449 |
+
}
|
| 1450 |
+
SentenceStats& Manager::GetSentenceStats() const
|
| 1451 |
+
{
|
| 1452 |
+
return *m_sentenceStats;
|
| 1453 |
+
|
| 1454 |
+
}
|
| 1455 |
+
|
| 1456 |
+
void Manager::OutputBest(OutputCollector *collector) const
|
| 1457 |
+
{
|
| 1458 |
+
long translationId = m_source.GetTranslationId();
|
| 1459 |
+
|
| 1460 |
+
Timer additionalReportingTime;
|
| 1461 |
+
|
| 1462 |
+
// apply decision rule and output best translation(s)
|
| 1463 |
+
if (collector) {
|
| 1464 |
+
ostringstream out;
|
| 1465 |
+
ostringstream debug;
|
| 1466 |
+
FixPrecision(debug,PRECISION);
|
| 1467 |
+
|
| 1468 |
+
// all derivations - send them to debug stream
|
| 1469 |
+
if (options()->output.PrintAllDerivations) {
|
| 1470 |
+
additionalReportingTime.start();
|
| 1471 |
+
PrintAllDerivations(translationId, debug);
|
| 1472 |
+
additionalReportingTime.stop();
|
| 1473 |
+
}
|
| 1474 |
+
|
| 1475 |
+
Timer decisionRuleTime;
|
| 1476 |
+
decisionRuleTime.start();
|
| 1477 |
+
|
| 1478 |
+
// MAP decoding: best hypothesis
|
| 1479 |
+
const Hypothesis* bestHypo = NULL;
|
| 1480 |
+
if (!options()->mbr.enabled) {
|
| 1481 |
+
bestHypo = GetBestHypothesis();
|
| 1482 |
+
if (bestHypo) {
|
| 1483 |
+
if (options()->output.ReportHypoScore) {
|
| 1484 |
+
out << bestHypo->GetFutureScore() << ' ';
|
| 1485 |
+
}
|
| 1486 |
+
if (options()->output.RecoverPath) {
|
| 1487 |
+
bestHypo->OutputInput(out);
|
| 1488 |
+
out << "||| ";
|
| 1489 |
+
}
|
| 1490 |
+
|
| 1491 |
+
if (options()->output.PrintID) {
|
| 1492 |
+
out << translationId << " ";
|
| 1493 |
+
}
|
| 1494 |
+
|
| 1495 |
+
// VN : I put back the code for OutputPassthroughInformation
|
| 1496 |
+
if (options()->output.PrintPassThrough) {
|
| 1497 |
+
OutputPassthroughInformation(out, bestHypo);
|
| 1498 |
+
}
|
| 1499 |
+
// end of add back
|
| 1500 |
+
|
| 1501 |
+
if (options()->output.ReportSegmentation == 2) {
|
| 1502 |
+
GetOutputLanguageModelOrder(out, bestHypo);
|
| 1503 |
+
}
|
| 1504 |
+
OutputSurface(out,*bestHypo, true);
|
| 1505 |
+
if (options()->output.PrintAlignmentInfo) {
|
| 1506 |
+
out << "||| ";
|
| 1507 |
+
bestHypo->OutputAlignment(out, true);
|
| 1508 |
+
}
|
| 1509 |
+
|
| 1510 |
+
IFVERBOSE(1) {
|
| 1511 |
+
debug << "BEST TRANSLATION: " << *bestHypo << endl;
|
| 1512 |
+
}
|
| 1513 |
+
} else {
|
| 1514 |
+
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
| 1515 |
+
}
|
| 1516 |
+
|
| 1517 |
+
out << endl;
|
| 1518 |
+
} // if (!staticData.UseMBR())
|
| 1519 |
+
|
| 1520 |
+
// MBR decoding (n-best MBR, lattice MBR, consensus)
|
| 1521 |
+
else {
|
| 1522 |
+
// we first need the n-best translations
|
| 1523 |
+
size_t nBestSize = options()->mbr.size;
|
| 1524 |
+
if (nBestSize <= 0) {
|
| 1525 |
+
cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
|
| 1526 |
+
exit(1);
|
| 1527 |
+
}
|
| 1528 |
+
TrellisPathList nBestList;
|
| 1529 |
+
CalcNBest(nBestSize, nBestList, true);
|
| 1530 |
+
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
|
| 1531 |
+
IFVERBOSE(2) {
|
| 1532 |
+
PrintUserTime("calculated n-best list for (L)MBR decoding");
|
| 1533 |
+
}
|
| 1534 |
+
|
| 1535 |
+
// lattice MBR
|
| 1536 |
+
if (options()->lmbr.enabled) {
|
| 1537 |
+
if (options()->nbest.enabled) {
|
| 1538 |
+
//lattice mbr nbest
|
| 1539 |
+
vector<LatticeMBRSolution> solutions;
|
| 1540 |
+
size_t n = min(nBestSize, options()->nbest.nbest_size);
|
| 1541 |
+
getLatticeMBRNBest(*this,nBestList,solutions,n);
|
| 1542 |
+
OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
|
| 1543 |
+
} else {
|
| 1544 |
+
//Lattice MBR decoding
|
| 1545 |
+
vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
|
| 1546 |
+
OutputBestHypo(mbrBestHypo, out);
|
| 1547 |
+
IFVERBOSE(2) {
|
| 1548 |
+
PrintUserTime("finished Lattice MBR decoding");
|
| 1549 |
+
}
|
| 1550 |
+
}
|
| 1551 |
+
}
|
| 1552 |
+
|
| 1553 |
+
// consensus decoding
|
| 1554 |
+
else if (options()->search.consensus) {
|
| 1555 |
+
const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
|
| 1556 |
+
OutputBestHypo(conBestHypo, out);
|
| 1557 |
+
OutputAlignment(m_alignmentOut, conBestHypo);
|
| 1558 |
+
IFVERBOSE(2) {
|
| 1559 |
+
PrintUserTime("finished Consensus decoding");
|
| 1560 |
+
}
|
| 1561 |
+
}
|
| 1562 |
+
|
| 1563 |
+
// n-best MBR decoding
|
| 1564 |
+
else {
|
| 1565 |
+
const TrellisPath &mbrBestHypo = doMBR(nBestList, *options());
|
| 1566 |
+
OutputBestHypo(mbrBestHypo, out);
|
| 1567 |
+
OutputAlignment(m_alignmentOut, mbrBestHypo);
|
| 1568 |
+
IFVERBOSE(2) {
|
| 1569 |
+
PrintUserTime("finished MBR decoding");
|
| 1570 |
+
}
|
| 1571 |
+
}
|
| 1572 |
+
}
|
| 1573 |
+
|
| 1574 |
+
// report best translation to output collector
|
| 1575 |
+
collector->Write(translationId,out.str(),debug.str());
|
| 1576 |
+
|
| 1577 |
+
decisionRuleTime.stop();
|
| 1578 |
+
VERBOSE(1, "Line " << translationId << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
|
| 1579 |
+
} // if (m_ioWrapper.GetSingleBestOutputCollector())
|
| 1580 |
+
|
| 1581 |
+
}
|
| 1582 |
+
|
| 1583 |
+
void Manager::OutputNBest(OutputCollector *collector) const
|
| 1584 |
+
{
|
| 1585 |
+
if (collector == NULL) {
|
| 1586 |
+
return;
|
| 1587 |
+
}
|
| 1588 |
+
|
| 1589 |
+
if (options()->lmbr.enabled) {
|
| 1590 |
+
if (options()->nbest.enabled) {
|
| 1591 |
+
collector->Write(m_source.GetTranslationId(), m_latticeNBestOut.str());
|
| 1592 |
+
}
|
| 1593 |
+
} else {
|
| 1594 |
+
TrellisPathList nBestList;
|
| 1595 |
+
ostringstream out;
|
| 1596 |
+
NBestOptions const& nbo = options()->nbest;
|
| 1597 |
+
CalcNBest(nbo.nbest_size, nBestList, nbo.only_distinct);
|
| 1598 |
+
OutputNBest(out, nBestList);
|
| 1599 |
+
collector->Write(m_source.GetTranslationId(), out.str());
|
| 1600 |
+
}
|
| 1601 |
+
|
| 1602 |
+
}
|
| 1603 |
+
|
| 1604 |
+
void
|
| 1605 |
+
Manager::
|
| 1606 |
+
OutputNBest(std::ostream& out, Moses::TrellisPathList const& nBestList) const
|
| 1607 |
+
{
|
| 1608 |
+
NBestOptions const& nbo = options()->nbest;
|
| 1609 |
+
bool reportAllFactors = nbo.include_all_factors;
|
| 1610 |
+
bool includeSegmentation = nbo.include_segmentation;
|
| 1611 |
+
bool includeWordAlignment = nbo.include_alignment_info;
|
| 1612 |
+
|
| 1613 |
+
TrellisPathList::const_iterator iter;
|
| 1614 |
+
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
|
| 1615 |
+
const TrellisPath &path = **iter;
|
| 1616 |
+
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
| 1617 |
+
|
| 1618 |
+
// print the surface factor of the translation
|
| 1619 |
+
out << m_source.GetTranslationId() << " ||| ";
|
| 1620 |
+
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
| 1621 |
+
const Hypothesis &edge = *edges[currEdge];
|
| 1622 |
+
OutputSurface(out, edge);
|
| 1623 |
+
}
|
| 1624 |
+
out << " |||";
|
| 1625 |
+
|
| 1626 |
+
// print scores with feature names
|
| 1627 |
+
bool with_labels = options()->nbest.include_feature_labels;
|
| 1628 |
+
path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels);
|
| 1629 |
+
|
| 1630 |
+
// total
|
| 1631 |
+
out << " ||| " << path.GetFutureScore();
|
| 1632 |
+
|
| 1633 |
+
//phrase-to-phrase segmentation
|
| 1634 |
+
if (includeSegmentation) {
|
| 1635 |
+
out << " |||";
|
| 1636 |
+
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
|
| 1637 |
+
const Hypothesis &edge = *edges[currEdge];
|
| 1638 |
+
const Range &sourceRange = edge.GetCurrSourceWordsRange();
|
| 1639 |
+
Range targetRange = path.GetTargetWordsRange(edge);
|
| 1640 |
+
out << " " << sourceRange.GetStartPos();
|
| 1641 |
+
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
|
| 1642 |
+
out << "-" << sourceRange.GetEndPos();
|
| 1643 |
+
}
|
| 1644 |
+
out<< "=" << targetRange.GetStartPos();
|
| 1645 |
+
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
|
| 1646 |
+
out<< "-" << targetRange.GetEndPos();
|
| 1647 |
+
}
|
| 1648 |
+
}
|
| 1649 |
+
}
|
| 1650 |
+
|
| 1651 |
+
if (includeWordAlignment) {
|
| 1652 |
+
out << " ||| ";
|
| 1653 |
+
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
|
| 1654 |
+
const Hypothesis &edge = *edges[currEdge];
|
| 1655 |
+
const Range &sourceRange = edge.GetCurrSourceWordsRange();
|
| 1656 |
+
Range targetRange = path.GetTargetWordsRange(edge);
|
| 1657 |
+
const int sourceOffset = sourceRange.GetStartPos();
|
| 1658 |
+
const int targetOffset = targetRange.GetStartPos();
|
| 1659 |
+
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
|
| 1660 |
+
|
| 1661 |
+
OutputAlignment(out, ai, sourceOffset, targetOffset);
|
| 1662 |
+
|
| 1663 |
+
}
|
| 1664 |
+
}
|
| 1665 |
+
|
| 1666 |
+
if (options()->output.RecoverPath) {
|
| 1667 |
+
out << " ||| ";
|
| 1668 |
+
OutputInput(out, edges[0]);
|
| 1669 |
+
}
|
| 1670 |
+
|
| 1671 |
+
out << endl;
|
| 1672 |
+
}
|
| 1673 |
+
|
| 1674 |
+
out << std::flush;
|
| 1675 |
+
}
|
| 1676 |
+
|
| 1677 |
+
//////////////////////////////////////////////////////////////////////////
|
| 1678 |
+
/***
|
| 1679 |
+
* print surface factor only for the given phrase
|
| 1680 |
+
*/
|
| 1681 |
+
void
|
| 1682 |
+
Manager::
|
| 1683 |
+
OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) const
|
| 1684 |
+
{
|
| 1685 |
+
if (recursive && edge.GetPrevHypo()) {
|
| 1686 |
+
OutputSurface(out,*edge.GetPrevHypo(), true);
|
| 1687 |
+
}
|
| 1688 |
+
|
| 1689 |
+
std::vector<FactorType> outputFactorOrder = options()->output.factor_order;
|
| 1690 |
+
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
|
| 1691 |
+
"Must specific at least 1 output factor");
|
| 1692 |
+
|
| 1693 |
+
FactorType placeholderFactor = options()->input.placeholder_factor;
|
| 1694 |
+
std::map<size_t, const Factor*> placeholders;
|
| 1695 |
+
if (placeholderFactor != NOT_FOUND) {
|
| 1696 |
+
// creates map of target position -> factor for placeholders
|
| 1697 |
+
placeholders = GetPlaceholders(edge, placeholderFactor);
|
| 1698 |
+
}
|
| 1699 |
+
|
| 1700 |
+
bool markUnknown = options()->unk.mark;
|
| 1701 |
+
std::string const& fd = options()->output.factor_delimiter;
|
| 1702 |
+
|
| 1703 |
+
TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
|
| 1704 |
+
size_t size = phrase.GetSize();
|
| 1705 |
+
for (size_t pos = 0 ; pos < size ; pos++) {
|
| 1706 |
+
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
| 1707 |
+
if (placeholders.size()) {
|
| 1708 |
+
// do placeholders
|
| 1709 |
+
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
|
| 1710 |
+
if (iter != placeholders.end()) {
|
| 1711 |
+
factor = iter->second;
|
| 1712 |
+
}
|
| 1713 |
+
}
|
| 1714 |
+
|
| 1715 |
+
UTIL_THROW_IF2(factor == NULL, "No factor 0 at position " << pos);
|
| 1716 |
+
|
| 1717 |
+
//preface surface form with UNK if marking unknowns
|
| 1718 |
+
const Word &word = phrase.GetWord(pos);
|
| 1719 |
+
if(markUnknown && word.IsOOV()) {
|
| 1720 |
+
out << options()->unk.prefix;
|
| 1721 |
+
}
|
| 1722 |
+
|
| 1723 |
+
out << *factor;
|
| 1724 |
+
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
|
| 1725 |
+
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
| 1726 |
+
if (factor) out << fd << *factor;
|
| 1727 |
+
//else out << fd << UNKNOWN_FACTOR;
|
| 1728 |
+
}
|
| 1729 |
+
|
| 1730 |
+
if(markUnknown && word.IsOOV()) {
|
| 1731 |
+
out << options()->unk.suffix;
|
| 1732 |
+
}
|
| 1733 |
+
|
| 1734 |
+
out << " ";
|
| 1735 |
+
|
| 1736 |
+
}
|
| 1737 |
+
|
| 1738 |
+
// trace ("report segmentation") option "-t" / "-tt"
|
| 1739 |
+
int reportSegmentation = options()->output.ReportSegmentation;
|
| 1740 |
+
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
|
| 1741 |
+
const Range &sourceRange = edge.GetCurrSourceWordsRange();
|
| 1742 |
+
const int sourceStart = sourceRange.GetStartPos();
|
| 1743 |
+
const int sourceEnd = sourceRange.GetEndPos();
|
| 1744 |
+
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
|
| 1745 |
+
if (reportSegmentation == 2) {
|
| 1746 |
+
out << ",wa=";
|
| 1747 |
+
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
|
| 1748 |
+
OutputAlignment(out, ai, 0, 0);
|
| 1749 |
+
out << ",total=";
|
| 1750 |
+
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
|
| 1751 |
+
out << ",";
|
| 1752 |
+
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
|
| 1753 |
+
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
|
| 1754 |
+
bool with_labels = options()->nbest.include_feature_labels;
|
| 1755 |
+
scoreBreakdown.OutputAllFeatureScores(out, with_labels);
|
| 1756 |
+
}
|
| 1757 |
+
out << "| ";
|
| 1758 |
+
}
|
| 1759 |
+
}
|
| 1760 |
+
|
| 1761 |
+
void
|
| 1762 |
+
Manager::
|
| 1763 |
+
OutputAlignment(ostream &out, const AlignmentInfo &ai,
|
| 1764 |
+
size_t sourceOffset, size_t targetOffset) const
|
| 1765 |
+
{
|
| 1766 |
+
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
|
| 1767 |
+
AlignVec alignments = ai.GetSortedAlignments(options()->output.WA_SortOrder);
|
| 1768 |
+
|
| 1769 |
+
AlignVec::const_iterator it;
|
| 1770 |
+
for (it = alignments.begin(); it != alignments.end(); ++it) {
|
| 1771 |
+
const std::pair<size_t,size_t> &alignment = **it;
|
| 1772 |
+
out << alignment.first + sourceOffset << "-"
|
| 1773 |
+
<< alignment.second + targetOffset << " ";
|
| 1774 |
+
}
|
| 1775 |
+
|
| 1776 |
+
}
|
| 1777 |
+
|
| 1778 |
+
void
|
| 1779 |
+
Manager::
|
| 1780 |
+
OutputInput(std::ostream& os, const Hypothesis* hypo) const
|
| 1781 |
+
{
|
| 1782 |
+
size_t len = hypo->GetInput().GetSize();
|
| 1783 |
+
std::vector<const Phrase*> inp_phrases(len, 0);
|
| 1784 |
+
OutputInput(inp_phrases, hypo);
|
| 1785 |
+
for (size_t i=0; i<len; ++i)
|
| 1786 |
+
if (inp_phrases[i]) os << *inp_phrases[i];
|
| 1787 |
+
}
|
| 1788 |
+
|
| 1789 |
+
void Manager::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const
|
| 1790 |
+
{
|
| 1791 |
+
if (hypo->GetPrevHypo()) {
|
| 1792 |
+
OutputInput(map, hypo->GetPrevHypo());
|
| 1793 |
+
map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
|
| 1794 |
+
}
|
| 1795 |
+
}
|
| 1796 |
+
|
| 1797 |
+
std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
|
| 1798 |
+
{
|
| 1799 |
+
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
|
| 1800 |
+
const Phrase &inputPhrase = inputPath.GetPhrase();
|
| 1801 |
+
|
| 1802 |
+
std::map<size_t, const Factor*> ret;
|
| 1803 |
+
|
| 1804 |
+
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
|
| 1805 |
+
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
|
| 1806 |
+
if (factor) {
|
| 1807 |
+
TargetPhrase const& tp = hypo.GetTranslationOption().GetTargetPhrase();
|
| 1808 |
+
std::set<size_t> targetPos = tp.GetAlignTerm().GetAlignmentsForSource(sourcePos);
|
| 1809 |
+
UTIL_THROW_IF2(targetPos.size() != 1,
|
| 1810 |
+
"Placeholder should be aligned to 1, and only 1, word");
|
| 1811 |
+
ret[*targetPos.begin()] = factor;
|
| 1812 |
+
}
|
| 1813 |
+
}
|
| 1814 |
+
|
| 1815 |
+
return ret;
|
| 1816 |
+
}
|
| 1817 |
+
|
| 1818 |
+
void Manager::OutputLatticeSamples(OutputCollector *collector) const
|
| 1819 |
+
{
|
| 1820 |
+
if (collector) {
|
| 1821 |
+
TrellisPathList latticeSamples;
|
| 1822 |
+
ostringstream out;
|
| 1823 |
+
CalcLatticeSamples(options()->output.lattice_sample_size, latticeSamples);
|
| 1824 |
+
OutputNBest(out,latticeSamples);
|
| 1825 |
+
collector->Write(m_source.GetTranslationId(), out.str());
|
| 1826 |
+
}
|
| 1827 |
+
|
| 1828 |
+
}
|
| 1829 |
+
|
| 1830 |
+
void Manager::OutputAlignment(OutputCollector *collector) const
|
| 1831 |
+
{
|
| 1832 |
+
if (collector == NULL) {
|
| 1833 |
+
return;
|
| 1834 |
+
}
|
| 1835 |
+
|
| 1836 |
+
if (!m_alignmentOut.str().empty()) {
|
| 1837 |
+
collector->Write(m_source.GetTranslationId(), m_alignmentOut.str());
|
| 1838 |
+
} else {
|
| 1839 |
+
std::vector<const Hypothesis *> edges;
|
| 1840 |
+
const Hypothesis *currentHypo = GetBestHypothesis();
|
| 1841 |
+
while (currentHypo) {
|
| 1842 |
+
edges.push_back(currentHypo);
|
| 1843 |
+
currentHypo = currentHypo->GetPrevHypo();
|
| 1844 |
+
}
|
| 1845 |
+
ostringstream out;
|
| 1846 |
+
size_t targetOffset = 0;
|
| 1847 |
+
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
|
| 1848 |
+
const TargetPhrase &tp = e->GetCurrTargetPhrase();
|
| 1849 |
+
size_t sourceOffset = e->GetCurrSourceWordsRange().GetStartPos();
|
| 1850 |
+
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
|
| 1851 |
+
targetOffset += tp.GetSize();
|
| 1852 |
+
}
|
| 1853 |
+
out << std::endl; // Used by --alignment-output-file so requires endl
|
| 1854 |
+
collector->Write(m_source.GetTranslationId(), out.str());
|
| 1855 |
+
|
| 1856 |
+
}
|
| 1857 |
+
}
|
| 1858 |
+
|
| 1859 |
+
void
|
| 1860 |
+
Manager::
|
| 1861 |
+
OutputDetailedTranslationReport(OutputCollector *collector) const
|
| 1862 |
+
{
|
| 1863 |
+
if (collector) {
|
| 1864 |
+
ostringstream out;
|
| 1865 |
+
FixPrecision(out,PRECISION);
|
| 1866 |
+
TranslationAnalysis::PrintTranslationAnalysis(out, GetBestHypothesis());
|
| 1867 |
+
collector->Write(m_source.GetTranslationId(),out.str());
|
| 1868 |
+
}
|
| 1869 |
+
|
| 1870 |
+
}
|
| 1871 |
+
|
| 1872 |
+
void
|
| 1873 |
+
Manager::
|
| 1874 |
+
OutputUnknowns(OutputCollector *collector) const
|
| 1875 |
+
{
|
| 1876 |
+
if (collector) {
|
| 1877 |
+
long translationId = m_source.GetTranslationId();
|
| 1878 |
+
const vector<const Phrase*>& unknowns = m_transOptColl->GetUnknownSources();
|
| 1879 |
+
ostringstream out;
|
| 1880 |
+
for (size_t i = 0; i < unknowns.size(); ++i) {
|
| 1881 |
+
out << *(unknowns[i]);
|
| 1882 |
+
}
|
| 1883 |
+
out << endl;
|
| 1884 |
+
collector->Write(translationId, out.str());
|
| 1885 |
+
}
|
| 1886 |
+
|
| 1887 |
+
}
|
| 1888 |
+
|
| 1889 |
+
void
|
| 1890 |
+
Manager::
|
| 1891 |
+
OutputWordGraph(OutputCollector *collector) const
|
| 1892 |
+
{
|
| 1893 |
+
if (collector) {
|
| 1894 |
+
long translationId = m_source.GetTranslationId();
|
| 1895 |
+
ostringstream out;
|
| 1896 |
+
FixPrecision(out,PRECISION);
|
| 1897 |
+
GetWordGraph(translationId, out);
|
| 1898 |
+
collector->Write(translationId, out.str());
|
| 1899 |
+
}
|
| 1900 |
+
}
|
| 1901 |
+
|
| 1902 |
+
void
|
| 1903 |
+
Manager::
|
| 1904 |
+
OutputSearchGraph(OutputCollector *collector) const
|
| 1905 |
+
{
|
| 1906 |
+
if (collector) {
|
| 1907 |
+
long translationId = m_source.GetTranslationId();
|
| 1908 |
+
ostringstream out;
|
| 1909 |
+
FixPrecision(out,PRECISION);
|
| 1910 |
+
OutputSearchGraph(translationId, out);
|
| 1911 |
+
collector->Write(translationId, out.str());
|
| 1912 |
+
|
| 1913 |
+
#ifdef HAVE_PROTOBUF
|
| 1914 |
+
const StaticData &staticData = StaticData::Instance();
|
| 1915 |
+
if (staticData.GetOutputSearchGraphPB()) {
|
| 1916 |
+
ostringstream sfn;
|
| 1917 |
+
sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << translationId << ".pb" << ends;
|
| 1918 |
+
string fn = sfn.str();
|
| 1919 |
+
VERBOSE(2, "Writing search graph to " << fn << endl);
|
| 1920 |
+
fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
|
| 1921 |
+
SerializeSearchGraphPB(translationId, output);
|
| 1922 |
+
}
|
| 1923 |
+
#endif
|
| 1924 |
+
}
|
| 1925 |
+
|
| 1926 |
+
}
|
| 1927 |
+
|
| 1928 |
+
void Manager::OutputSearchGraphSLF() const
|
| 1929 |
+
{
|
| 1930 |
+
// const StaticData &staticData = StaticData::Instance();
|
| 1931 |
+
long translationId = m_source.GetTranslationId();
|
| 1932 |
+
|
| 1933 |
+
// Output search graph in HTK standard lattice format (SLF)
|
| 1934 |
+
std::string const& slf = options()->output.SearchGraphSLF;
|
| 1935 |
+
if (slf.size()) {
|
| 1936 |
+
util::StringStream fileName;
|
| 1937 |
+
fileName << slf << "/" << translationId << ".slf";
|
| 1938 |
+
ofstream *file = new ofstream;
|
| 1939 |
+
file->open(fileName.str().c_str());
|
| 1940 |
+
if (file->is_open() && file->good()) {
|
| 1941 |
+
ostringstream out;
|
| 1942 |
+
FixPrecision(out,PRECISION);
|
| 1943 |
+
OutputSearchGraphAsSLF(translationId, out);
|
| 1944 |
+
*file << out.str();
|
| 1945 |
+
file -> flush();
|
| 1946 |
+
} else {
|
| 1947 |
+
TRACE_ERR("Cannot output HTK standard lattice for line " << translationId << " because the output file is not open or not ready for writing" << endl);
|
| 1948 |
+
}
|
| 1949 |
+
delete file;
|
| 1950 |
+
}
|
| 1951 |
+
|
| 1952 |
+
}
|
| 1953 |
+
|
| 1954 |
+
void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const
|
| 1955 |
+
{
|
| 1956 |
+
for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
|
| 1957 |
+
out << translationId;
|
| 1958 |
+
out << " |||";
|
| 1959 |
+
const vector<Word> mbrHypo = si->GetWords();
|
| 1960 |
+
for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
|
| 1961 |
+
const Factor *factor = mbrHypo[i].GetFactor(options()->output.factor_order[0]);
|
| 1962 |
+
if (i>0) out << " " << *factor;
|
| 1963 |
+
else out << *factor;
|
| 1964 |
+
}
|
| 1965 |
+
out << " |||";
|
| 1966 |
+
out << " map: " << si->GetMapScore();
|
| 1967 |
+
out << " w: " << mbrHypo.size();
|
| 1968 |
+
const vector<float>& ngramScores = si->GetNgramScores();
|
| 1969 |
+
for (size_t i = 0; i < ngramScores.size(); ++i) {
|
| 1970 |
+
out << " " << ngramScores[i];
|
| 1971 |
+
}
|
| 1972 |
+
out << " ||| " << si->GetScore();
|
| 1973 |
+
|
| 1974 |
+
out << endl;
|
| 1975 |
+
}
|
| 1976 |
+
}
|
| 1977 |
+
|
| 1978 |
+
void
|
| 1979 |
+
Manager::
|
| 1980 |
+
OutputBestHypo(const std::vector<Word>& mbrBestHypo, ostream& out) const
|
| 1981 |
+
{
|
| 1982 |
+
FactorType f = options()->output.factor_order[0];
|
| 1983 |
+
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
|
| 1984 |
+
const Factor *factor = mbrBestHypo[i].GetFactor(f);
|
| 1985 |
+
UTIL_THROW_IF2(factor == NULL, "No factor " << f << " at position " << i);
|
| 1986 |
+
if (i) out << " ";
|
| 1987 |
+
out << *factor;
|
| 1988 |
+
}
|
| 1989 |
+
out << endl;
|
| 1990 |
+
}
|
| 1991 |
+
|
| 1992 |
+
void
|
| 1993 |
+
Manager::
|
| 1994 |
+
OutputBestHypo(const Moses::TrellisPath &path, std::ostream &out) const
|
| 1995 |
+
{
|
| 1996 |
+
std::vector<const Hypothesis *> const& edges = path.GetEdges();
|
| 1997 |
+
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
| 1998 |
+
Hypothesis const& edge = *edges[currEdge];
|
| 1999 |
+
OutputSurface(out, edge);
|
| 2000 |
+
}
|
| 2001 |
+
out << endl;
|
| 2002 |
+
}
|
| 2003 |
+
|
| 2004 |
+
void
|
| 2005 |
+
Manager::
|
| 2006 |
+
OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
|
| 2007 |
+
{
|
| 2008 |
+
WordAlignmentSort waso = options()->output.WA_SortOrder;
|
| 2009 |
+
BOOST_REVERSE_FOREACH(Hypothesis const* e, path.GetEdges())
|
| 2010 |
+
e->OutputAlignment(out, false);
|
| 2011 |
+
// Hypothesis::OutputAlignment(out, path.GetEdges(), waso);
|
| 2012 |
+
// Used by --alignment-output-file so requires endl
|
| 2013 |
+
out << std::endl;
|
| 2014 |
+
}
|
| 2015 |
+
|
| 2016 |
+
} // namespace
|
mosesdecoder/moses/MockHypothesis.h
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- c++ -*-
|
| 2 |
+
/***********************************************************************
|
| 3 |
+
Moses - factored phrase-based language decoder
|
| 4 |
+
Copyright (C) 2010 University of Edinburgh
|
| 5 |
+
|
| 6 |
+
This library is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU Lesser General Public
|
| 8 |
+
License as published by the Free Software Foundation; either
|
| 9 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This library is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 14 |
+
Lesser General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU Lesser General Public
|
| 17 |
+
License along with this library; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 19 |
+
***********************************************************************/
|
| 20 |
+
|
| 21 |
+
#ifndef _MOCK_HYPOTHESIS_
|
| 22 |
+
#define _MOCK_HYPOTHESIS_
|
| 23 |
+
|
| 24 |
+
#include <memory>
|
| 25 |
+
#include <vector>
|
| 26 |
+
|
| 27 |
+
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
| 28 |
+
#include "moses/FF/DistortionScoreProducer.h"
|
| 29 |
+
#include "moses/FF/WordPenaltyProducer.h"
|
| 30 |
+
#include "Hypothesis.h"
|
| 31 |
+
#include "Manager.h"
|
| 32 |
+
#include "TranslationOption.h"
|
| 33 |
+
|
| 34 |
+
namespace MosesTest
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
//
|
| 38 |
+
// Construct a hypothesis with arbitrary source and target phrase
|
| 39 |
+
// sequences. Useful for testing feature functions.
|
| 40 |
+
//
|
| 41 |
+
|
| 42 |
+
typedef std::pair<size_t,size_t> Alignment; //(first,last) in source
|
| 43 |
+
|
| 44 |
+
class MockHypothesisGuard
|
| 45 |
+
{
|
| 46 |
+
public:
|
| 47 |
+
/** Creates a phrase-based hypothesis.
|
| 48 |
+
*/
|
| 49 |
+
MockHypothesisGuard
|
| 50 |
+
( const std::string& sourceSentence,
|
| 51 |
+
const std::vector<Alignment>& alignments,
|
| 52 |
+
const std::vector<std::string>& targetSegments);
|
| 53 |
+
|
| 54 |
+
Moses::Hypothesis* operator*() const {
|
| 55 |
+
return m_hypothesis;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
/** Destroy the hypothesis chain */
|
| 59 |
+
~MockHypothesisGuard();
|
| 60 |
+
|
| 61 |
+
private:
|
| 62 |
+
Moses::TranslationOption m_initialTransOpt;
|
| 63 |
+
boost::shared_ptr<Moses::Sentence> m_sentence;
|
| 64 |
+
Moses::WordPenaltyProducer m_wp;
|
| 65 |
+
Moses::UnknownWordPenaltyProducer m_uwp;
|
| 66 |
+
Moses::DistortionScoreProducer m_dist;
|
| 67 |
+
boost::shared_ptr<Moses::Manager> m_manager;
|
| 68 |
+
boost::shared_ptr<Moses::TranslationTask> m_ttask;
|
| 69 |
+
Moses::Hypothesis* m_hypothesis;
|
| 70 |
+
std::vector<Moses::TargetPhrase> m_targetPhrases;
|
| 71 |
+
std::vector<Moses::TranslationOption*> m_toptions;
|
| 72 |
+
};
|
| 73 |
+
|
| 74 |
+
class HypothesisFixture
|
| 75 |
+
{
|
| 76 |
+
public:
|
| 77 |
+
HypothesisFixture();
|
| 78 |
+
const Moses::Hypothesis* empty() {
|
| 79 |
+
return **m_empty;
|
| 80 |
+
}
|
| 81 |
+
const Moses::Hypothesis* partial() {
|
| 82 |
+
return **m_partial;
|
| 83 |
+
}
|
| 84 |
+
const Moses::Hypothesis* full() {
|
| 85 |
+
return **m_full;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
private:
|
| 89 |
+
std::auto_ptr<MockHypothesisGuard> m_empty;
|
| 90 |
+
std::auto_ptr<MockHypothesisGuard> m_partial;
|
| 91 |
+
std::auto_ptr<MockHypothesisGuard> m_full;
|
| 92 |
+
};
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
#endif
|
mosesdecoder/moses/OutputFileStream.h
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <cstdlib>
|
| 25 |
+
#include <fstream>
|
| 26 |
+
#include <string>
|
| 27 |
+
#include <iostream>
|
| 28 |
+
#include <boost/iostreams/filtering_stream.hpp>
|
| 29 |
+
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
/** Version of std::ostream with transparent compression.
|
| 34 |
+
*
|
| 35 |
+
* Transparently compresses output when writing to a file whose name ends in
|
| 36 |
+
* ".gz". Or, writes to stdout instead of a file when given a filename
|
| 37 |
+
* consisting of just a dash ("-").
|
| 38 |
+
*/
|
| 39 |
+
class OutputFileStream : public boost::iostreams::filtering_ostream
|
| 40 |
+
{
|
| 41 |
+
private:
|
| 42 |
+
/** File that needs flushing & closing when we close this stream.
|
| 43 |
+
*
|
| 44 |
+
* Is NULL when no file is opened, e.g. when writing to standard output.
|
| 45 |
+
*/
|
| 46 |
+
std::ofstream *m_outFile;
|
| 47 |
+
|
| 48 |
+
/// Is this stream open?
|
| 49 |
+
bool m_open;
|
| 50 |
+
|
| 51 |
+
public:
|
| 52 |
+
/** Create an unopened OutputFileStream.
|
| 53 |
+
*
|
| 54 |
+
* Until it's been opened, nothing can be done with this stream.
|
| 55 |
+
*/
|
| 56 |
+
OutputFileStream();
|
| 57 |
+
|
| 58 |
+
/// Create an OutputFileStream, and open it by calling Open().
|
| 59 |
+
OutputFileStream(const std::string &filePath);
|
| 60 |
+
virtual ~OutputFileStream();
|
| 61 |
+
|
| 62 |
+
// TODO: Can we please just always throw an exception when this fails?
|
| 63 |
+
/** Open stream.
|
| 64 |
+
*
|
| 65 |
+
* If filePath is "-" (just a dash), this opens the stream for writing to
|
| 66 |
+
* standard output. Otherwise, it opens the given file. If the filename
|
| 67 |
+
* has the ".gz" suffix, output will be transparently compressed.
|
| 68 |
+
*
|
| 69 |
+
* Call Close() to close the file.
|
| 70 |
+
*
|
| 71 |
+
* Returns whether opening the file was successful. It may also throw an
|
| 72 |
+
* exception on failure.
|
| 73 |
+
*/
|
| 74 |
+
bool Open(const std::string &filePath);
|
| 75 |
+
|
| 76 |
+
/// Flush and close stream. After this, the stream can be opened again.
|
| 77 |
+
void Close();
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
}
|
| 81 |
+
|
mosesdecoder/moses/PCNTools.h
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_PCNTools
|
| 23 |
+
#define moses_PCNTools
|
| 24 |
+
|
| 25 |
+
#include <vector>
|
| 26 |
+
#include <map>
|
| 27 |
+
#include <string>
|
| 28 |
+
#include <utility>
|
| 29 |
+
#include <cstdlib>
|
| 30 |
+
|
| 31 |
+
/** A couple of utilities to read .pcn files. A python-compatible format
|
| 32 |
+
* for encoding confusion networks and word lattices.
|
| 33 |
+
*/
|
| 34 |
+
namespace PCN
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
struct CNAlt {
|
| 38 |
+
CNAlt() {
|
| 39 |
+
}
|
| 40 |
+
CNAlt(const std::string &word,
|
| 41 |
+
const std::vector<float> &denseFeatures,
|
| 42 |
+
const std::map<std::string, float> &sparseFeatures,
|
| 43 |
+
size_t next)
|
| 44 |
+
:m_word(word)
|
| 45 |
+
,m_denseFeatures(denseFeatures)
|
| 46 |
+
,m_sparseFeatures(sparseFeatures)
|
| 47 |
+
,m_next(next) {
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
std::string m_word;
|
| 51 |
+
std::vector<float> m_denseFeatures;
|
| 52 |
+
std::map<std::string, float> m_sparseFeatures;
|
| 53 |
+
size_t m_next;
|
| 54 |
+
};
|
| 55 |
+
|
| 56 |
+
//typedef std::pair<std::pair<std::string, std::vector<float> >, size_t> CNAlt;
|
| 57 |
+
typedef std::vector<CNAlt> CNCol;
|
| 58 |
+
typedef std::vector<CNCol> CN;
|
| 59 |
+
|
| 60 |
+
/** Given a string ((('foo',0.1,1),('bar',0.9,2)),...) representation of a
|
| 61 |
+
* word lattice in PCN format, return a CN object representing the lattice
|
| 62 |
+
*/
|
| 63 |
+
CN parsePCN(const std::string& in);
|
| 64 |
+
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
#endif
|
mosesdecoder/moses/PDTAimp.cpp
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "PDTAimp.h"
|
| 2 |
+
|
| 3 |
+
namespace Moses
|
| 4 |
+
{
|
| 5 |
+
|
| 6 |
+
PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
|
| 7 |
+
: m_dict(0),
|
| 8 |
+
m_obj(p),
|
| 9 |
+
useCache(1),
|
| 10 |
+
totalE(0),
|
| 11 |
+
distinctE(0)
|
| 12 |
+
{
|
| 13 |
+
m_numInputScores = 0;
|
| 14 |
+
m_inputFeature = InputFeature::InstancePtr();
|
| 15 |
+
|
| 16 |
+
if (m_inputFeature) {
|
| 17 |
+
const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];
|
| 18 |
+
if (firstPt == m_obj) {
|
| 19 |
+
m_numInputScores = m_inputFeature->GetNumScoreComponents();
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
PDTAimp::~PDTAimp()
|
| 25 |
+
{
|
| 26 |
+
CleanUp();
|
| 27 |
+
delete m_dict;
|
| 28 |
+
|
| 29 |
+
if (StaticData::Instance().GetVerboseLevel() >= 2) {
|
| 30 |
+
|
| 31 |
+
TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
|
| 32 |
+
<<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates="
|
| 33 |
+
<<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
|
| 34 |
+
<<")\n");
|
| 35 |
+
|
| 36 |
+
TRACE_ERR("\npath statistics\n");
|
| 37 |
+
|
| 38 |
+
if(path1Best.size()) {
|
| 39 |
+
TRACE_ERR("1-best: ");
|
| 40 |
+
std::copy(path1Best.begin()+1,path1Best.end(),
|
| 41 |
+
std::ostream_iterator<size_t>(std::cerr," \t"));
|
| 42 |
+
TRACE_ERR("\n");
|
| 43 |
+
}
|
| 44 |
+
if(pathCN.size()) {
|
| 45 |
+
TRACE_ERR("CN (full): ");
|
| 46 |
+
std::transform(pathCN.begin()+1
|
| 47 |
+
,pathCN.end()
|
| 48 |
+
,std::ostream_iterator<double>(std::cerr," \t")
|
| 49 |
+
,Exp);
|
| 50 |
+
TRACE_ERR("\n");
|
| 51 |
+
}
|
| 52 |
+
if(pathExplored.size()) {
|
| 53 |
+
TRACE_ERR("CN (explored): ");
|
| 54 |
+
std::copy(pathExplored.begin()+1,pathExplored.end(),
|
| 55 |
+
std::ostream_iterator<size_t>(std::cerr," \t"));
|
| 56 |
+
TRACE_ERR("\n");
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
void PDTAimp::CleanUp()
|
| 63 |
+
{
|
| 64 |
+
assert(m_dict);
|
| 65 |
+
m_dict->FreeMemory();
|
| 66 |
+
// for(size_t i=0; i<m_tgtColls.size(); ++i) m_tgtColls[i].reset();
|
| 67 |
+
m_tgtColls.clear();
|
| 68 |
+
m_cache.clear();
|
| 69 |
+
m_rangeCache.clear();
|
| 70 |
+
uniqSrcPhr.clear();
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
| 74 |
+
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const
|
| 75 |
+
{
|
| 76 |
+
|
| 77 |
+
assert(m_dict);
|
| 78 |
+
|
| 79 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr ret;
|
| 80 |
+
if(src.GetSize()==0) return ret;
|
| 81 |
+
|
| 82 |
+
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
| 83 |
+
if(useCache) {
|
| 84 |
+
piter=m_cache.insert(std::make_pair(src, ret));
|
| 85 |
+
if(!piter.second) return piter.first->second;
|
| 86 |
+
} else if (m_cache.size()) {
|
| 87 |
+
MapSrc2Tgt::const_iterator i=m_cache.find(src);
|
| 88 |
+
return (i!=m_cache.end() ? i->second : ret);
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
std::vector<std::string> srcString(src.GetSize());
|
| 92 |
+
// convert source Phrase into vector of strings
|
| 93 |
+
for(size_t i=0; i<srcString.size(); ++i) {
|
| 94 |
+
Factors2String(src.GetWord(i),srcString[i]);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// get target phrases in string representation
|
| 98 |
+
std::vector<StringTgtCand> cands;
|
| 99 |
+
std::vector<std::string> wacands;
|
| 100 |
+
m_dict->GetTargetCandidates(srcString,cands,wacands);
|
| 101 |
+
if(cands.empty()) {
|
| 102 |
+
return ret;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
//TODO: Multiple models broken here
|
| 106 |
+
std::vector<float> weights = StaticData::Instance().GetWeights(m_obj);
|
| 107 |
+
|
| 108 |
+
std::vector<TargetPhrase> tCands;
|
| 109 |
+
tCands.reserve(cands.size());
|
| 110 |
+
|
| 111 |
+
std::vector<std::pair<float,size_t> > costs;
|
| 112 |
+
costs.reserve(cands.size());
|
| 113 |
+
|
| 114 |
+
std::vector<Phrase> sourcePhrases;
|
| 115 |
+
sourcePhrases.reserve(cands.size());
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
// convert into TargetPhrases
|
| 119 |
+
std::string fd = m_obj->options()->output.factor_delimiter;
|
| 120 |
+
for(size_t i=0; i<cands.size(); ++i) {
|
| 121 |
+
TargetPhrase targetPhrase(m_obj);
|
| 122 |
+
|
| 123 |
+
StringTgtCand::Tokens const& factorStrings=cands[i].tokens;
|
| 124 |
+
Scores const& probVector=cands[i].scores;
|
| 125 |
+
|
| 126 |
+
std::vector<float> scoreVector(probVector.size());
|
| 127 |
+
std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
|
| 128 |
+
TransformScore);
|
| 129 |
+
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
|
| 130 |
+
FloorScore);
|
| 131 |
+
|
| 132 |
+
//sparse features.
|
| 133 |
+
//These are already in log-space
|
| 134 |
+
for (size_t j = 0; j < cands[i].fnames.size(); ++j) {
|
| 135 |
+
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
CreateTargetPhrase(targetPhrase,factorStrings, fd, scoreVector, Scores(0),
|
| 139 |
+
&wacands[i], &src);
|
| 140 |
+
|
| 141 |
+
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
| 142 |
+
tCands.push_back(targetPhrase);
|
| 143 |
+
|
| 144 |
+
sourcePhrases.push_back(src);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
ret = PruneTargetCandidates(tCands,costs, sourcePhrases);
|
| 148 |
+
if(ret->IsEmpty()) {
|
| 149 |
+
ret.reset();
|
| 150 |
+
} else {
|
| 151 |
+
if(useCache) piter.first->second = ret;
|
| 152 |
+
m_tgtColls.push_back(ret);
|
| 153 |
+
}
|
| 154 |
+
return ret;
|
| 155 |
+
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
void PDTAimp::Create(const std::vector<FactorType> &input
|
| 159 |
+
, const std::vector<FactorType> &output
|
| 160 |
+
, const std::string &filePath
|
| 161 |
+
, const std::vector<float> &weight
|
| 162 |
+
)
|
| 163 |
+
{
|
| 164 |
+
|
| 165 |
+
// set my members
|
| 166 |
+
m_dict=new PhraseDictionaryTree();
|
| 167 |
+
m_input=input;
|
| 168 |
+
m_output=output;
|
| 169 |
+
|
| 170 |
+
const StaticData &staticData = StaticData::Instance();
|
| 171 |
+
m_dict->NeedAlignmentInfo(staticData.NeedAlignmentInfo());
|
| 172 |
+
|
| 173 |
+
std::string binFname=filePath+".binphr.idx";
|
| 174 |
+
if(!FileExists(binFname.c_str())) {
|
| 175 |
+
UTIL_THROW2( "bin ttable does not exist");
|
| 176 |
+
//TRACE_ERR( "bin ttable does not exist -> create it\n");
|
| 177 |
+
//InputFileStream in(filePath);
|
| 178 |
+
//m_dict->Create(in,filePath);
|
| 179 |
+
}
|
| 180 |
+
VERBOSE(1,"reading bin ttable\n");
|
| 181 |
+
// m_dict->Read(filePath);
|
| 182 |
+
bool res=m_dict->Read(filePath);
|
| 183 |
+
if (!res) {
|
| 184 |
+
std::cerr << "bin ttable was read in a wrong way\n";
|
| 185 |
+
exit(1);
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
void PDTAimp::CacheSource(ConfusionNet const& src)
|
| 191 |
+
{
|
| 192 |
+
assert(m_dict);
|
| 193 |
+
const size_t srcSize=src.GetSize();
|
| 194 |
+
|
| 195 |
+
std::vector<size_t> exploredPaths(srcSize+1,0);
|
| 196 |
+
std::vector<double> exPathsD(srcSize+1,-1.0);
|
| 197 |
+
|
| 198 |
+
// collect some statistics
|
| 199 |
+
std::vector<size_t> cnDepths(srcSize,0);
|
| 200 |
+
for(size_t i=0; i<srcSize; ++i) cnDepths[i]=src[i].size();
|
| 201 |
+
|
| 202 |
+
for(size_t len=1; len<=srcSize; ++len)
|
| 203 |
+
for(size_t i=0; i<=srcSize-len; ++i) {
|
| 204 |
+
double pd=0.0;
|
| 205 |
+
for(size_t k=i; k<i+len; ++k) pd+=log(1.0*cnDepths[k]);
|
| 206 |
+
exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
// update global statistics
|
| 210 |
+
if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
|
| 211 |
+
for(size_t len=1; len<=srcSize; ++len)
|
| 212 |
+
pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
|
| 213 |
+
|
| 214 |
+
if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
|
| 215 |
+
for(size_t len=1; len<=srcSize; ++len) path1Best[len]+=srcSize-len+1;
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size()) {
|
| 219 |
+
TRACE_ERR("path stats for current CN: \nCN (full): ");
|
| 220 |
+
std::transform(exPathsD.begin()+1
|
| 221 |
+
,exPathsD.end()
|
| 222 |
+
,std::ostream_iterator<double>(std::cerr," ")
|
| 223 |
+
,Exp);
|
| 224 |
+
TRACE_ERR("\n");
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
typedef std::map<StringTgtCand::Tokens,TScores> E2Costs;
|
| 228 |
+
|
| 229 |
+
std::map<Range,E2Costs> cov2cand;
|
| 230 |
+
std::vector<State> stack;
|
| 231 |
+
for(Position i=0 ; i < srcSize ; ++i)
|
| 232 |
+
stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
|
| 233 |
+
|
| 234 |
+
std::vector<float> weightTrans = StaticData::Instance().GetWeights(m_obj);
|
| 235 |
+
std::vector<float> weightInput = StaticData::Instance().GetWeights(m_inputFeature);
|
| 236 |
+
float weightWP = StaticData::Instance().GetWeightWordPenalty();
|
| 237 |
+
|
| 238 |
+
while(!stack.empty()) {
|
| 239 |
+
State curr(stack.back());
|
| 240 |
+
stack.pop_back();
|
| 241 |
+
|
| 242 |
+
UTIL_THROW_IF2(curr.end() >= srcSize, "Error");
|
| 243 |
+
const ConfusionNet::Column &currCol=src[curr.end()];
|
| 244 |
+
// in a given column, loop over all possibilities
|
| 245 |
+
for(size_t colidx=0; colidx<currCol.size(); ++colidx) {
|
| 246 |
+
const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
|
| 247 |
+
std::string s;
|
| 248 |
+
Factors2String(w,s);
|
| 249 |
+
bool isEpsilon=(s=="" || s==EPSILON);
|
| 250 |
+
|
| 251 |
+
//assert that we have the right number of link params in this CN option
|
| 252 |
+
UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
|
| 253 |
+
"Incorrect number of input scores");
|
| 254 |
+
|
| 255 |
+
// do not start with epsilon (except at first position)
|
| 256 |
+
if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
|
| 257 |
+
|
| 258 |
+
// At a given node in the prefix tree, look to see if w defines an edge to
|
| 259 |
+
// another node (Extend). Stay at the same node if w==EPSILON
|
| 260 |
+
PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
|
| 261 |
+
|
| 262 |
+
if(nextP) { // w is a word that should be considered
|
| 263 |
+
Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
|
| 264 |
+
|
| 265 |
+
//add together the link scores from the current state and the new arc
|
| 266 |
+
float inputScoreSum = 0;
|
| 267 |
+
std::vector<float> newInputScores(m_numInputScores,0.0);
|
| 268 |
+
if (m_numInputScores) {
|
| 269 |
+
std::transform(currCol[colidx].second.denseScores.begin(), currCol[colidx].second.denseScores.end(),
|
| 270 |
+
curr.GetScores().begin(),
|
| 271 |
+
newInputScores.begin(),
|
| 272 |
+
std::plus<float>());
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
//we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
|
| 276 |
+
//if the sum is too low, then we won't expand this.
|
| 277 |
+
//TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
|
| 278 |
+
inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
Phrase newSrc(curr.src);
|
| 282 |
+
if(!isEpsilon) newSrc.AddWord(w);
|
| 283 |
+
if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE) {
|
| 284 |
+
// if there is more room to grow, add a new state onto the queue
|
| 285 |
+
// to be explored that represents [begin, curEnd+)
|
| 286 |
+
stack.push_back(State(newRange,nextP,newInputScores));
|
| 287 |
+
stack.back().src=newSrc;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
std::vector<StringTgtCand> tcands;
|
| 291 |
+
// now, look up the target candidates (aprx. TargetPhraseCollection) for
|
| 292 |
+
// the current path through the CN
|
| 293 |
+
m_dict->GetTargetCandidates(nextP,tcands);
|
| 294 |
+
|
| 295 |
+
if(newRange.second>=exploredPaths.size()+newRange.first)
|
| 296 |
+
exploredPaths.resize(newRange.second-newRange.first+1,0);
|
| 297 |
+
++exploredPaths[newRange.second-newRange.first];
|
| 298 |
+
|
| 299 |
+
totalE+=tcands.size();
|
| 300 |
+
|
| 301 |
+
if(tcands.size()) {
|
| 302 |
+
E2Costs& e2costs=cov2cand[newRange];
|
| 303 |
+
Phrase const* srcPtr=uniqSrcPhr(newSrc);
|
| 304 |
+
for(size_t i=0; i<tcands.size(); ++i) {
|
| 305 |
+
//put input scores in first - already logged, just drop in directly
|
| 306 |
+
std::vector<float> transcores(m_obj->GetNumScoreComponents());
|
| 307 |
+
UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
|
| 308 |
+
"Incorrect number of translation scores");
|
| 309 |
+
|
| 310 |
+
//put in phrase table scores, logging as we insert
|
| 311 |
+
std::transform(tcands[i].scores.begin()
|
| 312 |
+
,tcands[i].scores.end()
|
| 313 |
+
,transcores.begin()
|
| 314 |
+
,TransformScore);
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
//tally up
|
| 318 |
+
float score=std::inner_product(transcores.begin(), transcores.end(), weightTrans.begin(), 0.0f);
|
| 319 |
+
|
| 320 |
+
// input feature
|
| 321 |
+
score +=std::inner_product(newInputScores.begin(), newInputScores.end(), weightInput.begin(), 0.0f);
|
| 322 |
+
|
| 323 |
+
//count word penalty
|
| 324 |
+
score-=tcands[i].tokens.size() * weightWP;
|
| 325 |
+
|
| 326 |
+
std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].tokens,TScores()));
|
| 327 |
+
|
| 328 |
+
if(p.second) ++distinctE;
|
| 329 |
+
|
| 330 |
+
TScores & scores=p.first->second;
|
| 331 |
+
if(p.second || scores.total<score) {
|
| 332 |
+
scores.total=score;
|
| 333 |
+
scores.transScore=transcores;
|
| 334 |
+
scores.inputScores=newInputScores;
|
| 335 |
+
scores.src=srcPtr;
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
}
|
| 339 |
+
}
|
| 340 |
+
}
|
| 341 |
+
} // end while(!stack.empty())
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size()) {
|
| 345 |
+
TRACE_ERR("CN (explored): ");
|
| 346 |
+
std::copy(exploredPaths.begin()+1,exploredPaths.end(),
|
| 347 |
+
std::ostream_iterator<size_t>(std::cerr," "));
|
| 348 |
+
TRACE_ERR("\n");
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
if(pathExplored.size()<exploredPaths.size())
|
| 352 |
+
pathExplored.resize(exploredPaths.size(),0);
|
| 353 |
+
for(size_t len=1; len<=srcSize; ++len)
|
| 354 |
+
pathExplored[len]+=exploredPaths[len];
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
// m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
| 358 |
+
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize()));
|
| 359 |
+
|
| 360 |
+
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
|
| 361 |
+
assert(i->first.first<m_rangeCache.size());
|
| 362 |
+
assert(i->first.second>0);
|
| 363 |
+
assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
|
| 364 |
+
assert(m_rangeCache[i->first.first][i->first.second-1]==0);
|
| 365 |
+
|
| 366 |
+
std::vector<TargetPhrase> tCands;
|
| 367 |
+
tCands.reserve(i->second.size());
|
| 368 |
+
|
| 369 |
+
std::vector<std::pair<float,size_t> > costs;
|
| 370 |
+
costs.reserve(i->second.size());
|
| 371 |
+
|
| 372 |
+
std::vector<Phrase> sourcePhrases;
|
| 373 |
+
sourcePhrases.reserve(i->second.size());
|
| 374 |
+
|
| 375 |
+
for(E2Costs::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
|
| 376 |
+
TScores const & scores=j->second;
|
| 377 |
+
TargetPhrase targetPhrase(m_obj);
|
| 378 |
+
CreateTargetPhrase(targetPhrase
|
| 379 |
+
, j ->first
|
| 380 |
+
, m_obj->options()->output.factor_delimiter
|
| 381 |
+
, scores.transScore
|
| 382 |
+
, scores.inputScores
|
| 383 |
+
, NULL
|
| 384 |
+
, scores.src);
|
| 385 |
+
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
| 386 |
+
tCands.push_back(targetPhrase);
|
| 387 |
+
|
| 388 |
+
sourcePhrases.push_back(*scores.src);
|
| 389 |
+
|
| 390 |
+
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
| 394 |
+
rv = PruneTargetCandidates(tCands, costs, sourcePhrases);
|
| 395 |
+
|
| 396 |
+
if(rv->IsEmpty())
|
| 397 |
+
rv.reset();
|
| 398 |
+
else {
|
| 399 |
+
m_rangeCache[i->first.first][i->first.second-1]=rv;
|
| 400 |
+
m_tgtColls.push_back(rv);
|
| 401 |
+
}
|
| 402 |
+
}
|
| 403 |
+
// free memory
|
| 404 |
+
m_dict->FreeMemory();
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
| 408 |
+
StringTgtCand::Tokens const& factorStrings,
|
| 409 |
+
std::string const& factorDelimiter,
|
| 410 |
+
Scores const& transVector,
|
| 411 |
+
Scores const& inputVector,
|
| 412 |
+
const std::string *alignmentString,
|
| 413 |
+
Phrase const* srcPtr) const
|
| 414 |
+
{
|
| 415 |
+
FactorCollection &factorCollection = FactorCollection::Instance();
|
| 416 |
+
|
| 417 |
+
for(size_t k=0; k<factorStrings.size(); ++k) {
|
| 418 |
+
util::TokenIter<util::MultiCharacter, false>
|
| 419 |
+
word(*factorStrings[k], factorDelimiter);
|
| 420 |
+
Word& w=targetPhrase.AddWord();
|
| 421 |
+
for(size_t l=0; l<m_output.size(); ++l, ++word) {
|
| 422 |
+
w[m_output[l]]= factorCollection.AddFactor(*word);
|
| 423 |
+
}
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
if (alignmentString) {
|
| 427 |
+
targetPhrase.SetAlignmentInfo(*alignmentString);
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
if (m_numInputScores) {
|
| 431 |
+
targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
|
| 435 |
+
targetPhrase.EvaluateInIsolation(*srcPtr, m_obj->GetFeaturesToApply());
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr
|
| 439 |
+
PDTAimp::PruneTargetCandidates
|
| 440 |
+
(const std::vector<TargetPhrase> & tCands,
|
| 441 |
+
std::vector<std::pair<float,size_t> >& costs,
|
| 442 |
+
const std::vector<Phrase> &sourcePhrases) const
|
| 443 |
+
{
|
| 444 |
+
// convert into TargetPhraseCollection
|
| 445 |
+
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
| 446 |
+
"Number of target phrases must equal number of source phrases");
|
| 447 |
+
|
| 448 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr rv;
|
| 449 |
+
rv.reset(new TargetPhraseCollectionWithSourcePhrase);
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
// set limit to tableLimit or actual size, whatever is smaller
|
| 453 |
+
std::vector<std::pair<float,size_t> >::iterator nth =
|
| 454 |
+
costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
|
| 455 |
+
m_obj->m_tableLimit < costs.size()) ?
|
| 456 |
+
m_obj->m_tableLimit : costs.size());
|
| 457 |
+
|
| 458 |
+
// find the nth phrase according to future cost
|
| 459 |
+
NTH_ELEMENT3(costs.begin(),nth ,costs.end());
|
| 460 |
+
|
| 461 |
+
// add n top phrases to the return list
|
| 462 |
+
for(std::vector<std::pair<float,size_t> >::iterator
|
| 463 |
+
it = costs.begin(); it != nth; ++it) {
|
| 464 |
+
size_t ind = it->second;
|
| 465 |
+
TargetPhrase *targetPhrase = new TargetPhrase(tCands[ind]);
|
| 466 |
+
const Phrase &sourcePhrase = sourcePhrases[ind];
|
| 467 |
+
rv->Add(targetPhrase, sourcePhrase);
|
| 468 |
+
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
return rv;
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
|
mosesdecoder/moses/Parameter.cpp
ADDED
|
@@ -0,0 +1,1690 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <ctime>
|
| 23 |
+
#include <iostream>
|
| 24 |
+
#include <iterator>
|
| 25 |
+
#include <fstream>
|
| 26 |
+
#include <sstream>
|
| 27 |
+
#include <algorithm>
|
| 28 |
+
#include <boost/algorithm/string/predicate.hpp>
|
| 29 |
+
#include "Parameter.h"
|
| 30 |
+
#include "Util.h"
|
| 31 |
+
#include "InputFileStream.h"
|
| 32 |
+
#include "StaticData.h"
|
| 33 |
+
#include "util/string_stream.hh"
|
| 34 |
+
#include "util/exception.hh"
|
| 35 |
+
#include "util/random.hh"
|
| 36 |
+
#include <boost/program_options.hpp>
|
| 37 |
+
|
| 38 |
+
#ifdef HAVE_XMLRPC_C
|
| 39 |
+
#include <xmlrpc_server.h>
|
| 40 |
+
#endif
|
| 41 |
+
|
| 42 |
+
using namespace std;
|
| 43 |
+
using namespace boost::algorithm;
|
| 44 |
+
namespace po = boost::program_options;
|
| 45 |
+
|
| 46 |
+
namespace Moses
|
| 47 |
+
{
|
| 48 |
+
|
| 49 |
+
/** define allowed parameters */
|
| 50 |
+
Parameter::Parameter()
|
| 51 |
+
{
|
| 52 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 53 |
+
// general options
|
| 54 |
+
po::options_description main_opts("Main Options");
|
| 55 |
+
AddParam(main_opts,"config", "f", "location of the configuration file");
|
| 56 |
+
AddParam(main_opts,"input-file", "i", "location of the input file to be translated");
|
| 57 |
+
|
| 58 |
+
AddParam(main_opts,"verbose", "v", "verbosity level of the logging");
|
| 59 |
+
AddParam(main_opts,"version", "show version of Moses and libraries used");
|
| 60 |
+
AddParam(main_opts,"show-weights", "print feature weights and exit");
|
| 61 |
+
AddParam(main_opts,"time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
|
| 62 |
+
AddParam(main_opts,"segment-time-out", "seconds for single segment after which is interrupted (-1=no time-out, default is -1)");
|
| 63 |
+
|
| 64 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 65 |
+
// factorization options
|
| 66 |
+
po::options_description factor_opts("General Factorization Options");
|
| 67 |
+
AddParam(factor_opts,"factor-delimiter", "fd", "specify a different factor delimiter than the default");
|
| 68 |
+
// one should be able to specify different factor delimiters for intput and output
|
| 69 |
+
AddParam(factor_opts,"mapping", "description of decoding steps"); // whatever that means ...
|
| 70 |
+
AddParam(factor_opts,"placeholder-factor", "Which source factor to use to store the original text for placeholders. The factor must not be used by a translation or gen model");
|
| 71 |
+
|
| 72 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 73 |
+
// general search options
|
| 74 |
+
po::options_description search_opts("Search Options");
|
| 75 |
+
string desc = "Which search algorithm to use.\n";
|
| 76 |
+
desc += "0=normal stack (default)\n";
|
| 77 |
+
desc += "1=cube pruning\n";
|
| 78 |
+
desc += "3=chart (with cube pruning)\n";
|
| 79 |
+
desc += "4=stack with batched lm requests\n";
|
| 80 |
+
desc += "5=chart (with incremental search)\n";
|
| 81 |
+
desc += "6=string-to-tree\n";
|
| 82 |
+
desc += "7=tree-to-string\n";
|
| 83 |
+
desc += "8=tree-to-string (SCFG-based)\n";
|
| 84 |
+
desc += "9=forest-to-string";
|
| 85 |
+
AddParam(search_opts,"search-algorithm", desc);
|
| 86 |
+
AddParam(search_opts,"beam-threshold", "b", "threshold for threshold pruning");
|
| 87 |
+
AddParam(search_opts,"early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
|
| 88 |
+
AddParam(search_opts,"stack", "s", "maximum stack size for histogram pruning. 0 = unlimited stack size");
|
| 89 |
+
AddParam(search_opts,"stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
|
| 90 |
+
|
| 91 |
+
// feature weight-related options
|
| 92 |
+
AddParam(search_opts,"weight-file", "wf", "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");
|
| 93 |
+
AddParam(search_opts,"weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
|
| 94 |
+
|
| 95 |
+
AddParam(search_opts,"feature-overwrite", "Override arguments in a particular feature function with a particular key. Format: -feature-overwrite \"FeatureName key=value\"");
|
| 96 |
+
|
| 97 |
+
po::options_description tune_opts("Options used in tuning.");
|
| 98 |
+
AddParam(tune_opts,"weight-overwrite", "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
|
| 99 |
+
AddParam(tune_opts,"feature-add", "Add a feature function on the command line. Used by mira to add BLEU feature");
|
| 100 |
+
AddParam(tune_opts,"weight-add", "Add weight for FF if it doesn't exist, i.e weights here are added 1st, and can be override by the ini file or on the command line. Used to specify initial weights for FF that was also specified on the copmmand line");
|
| 101 |
+
|
| 102 |
+
// phrase table limitations:
|
| 103 |
+
AddParam(search_opts,"max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
|
| 104 |
+
AddParam(search_opts,"max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
|
| 105 |
+
AddParam(search_opts,"max-phrase-length", "maximum phrase length (default 20)");
|
| 106 |
+
AddParam(search_opts,"translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
|
| 107 |
+
|
| 108 |
+
// miscellaneous search options
|
| 109 |
+
AddParam(search_opts,"disable-discarding", "dd", "disable hypothesis discarding"); // ??? memory management? UG
|
| 110 |
+
AddParam(search_opts,"phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
|
| 111 |
+
AddParam(search_opts,"threads","th", "number of threads to use in decoding (defaults to single-threaded)");
|
| 112 |
+
|
| 113 |
+
// distortion options
|
| 114 |
+
po::options_description disto_opts("Distortion options");
|
| 115 |
+
AddParam(disto_opts,"distortion-limit", "dl", "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
|
| 116 |
+
AddParam(disto_opts,"monotone-at-punctuation", "mp", "do not reorder over punctuation");
|
| 117 |
+
AddParam(disto_opts,"early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
|
| 118 |
+
AddParam(disto_opts,"distortion", "configurations for each factorized/lexicalized reordering model."); // zombie parameter?
|
| 119 |
+
|
| 120 |
+
// cube pruning
|
| 121 |
+
po::options_description cube_opts("Cube pruning options.");
|
| 122 |
+
AddParam(cube_opts,"cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
|
| 123 |
+
AddParam(cube_opts,"cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
|
| 124 |
+
AddParam(cube_opts,"cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
|
| 125 |
+
AddParam(cube_opts,"cube-pruning-deterministic-search", "cbds", "Break ties deterministically during search");
|
| 126 |
+
|
| 127 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 128 |
+
// minimum bayes risk decoding
|
| 129 |
+
po::options_description mbr_opts("Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding");
|
| 130 |
+
|
| 131 |
+
AddParam(mbr_opts,"minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
|
| 132 |
+
AddParam(mbr_opts,"mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
|
| 133 |
+
AddParam(mbr_opts,"mbr-scale", "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
|
| 134 |
+
|
| 135 |
+
AddParam(mbr_opts,"lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
|
| 136 |
+
AddParam(mbr_opts,"consensus-decoding", "con", "use consensus decoding (De Nero et. al. 2009)");
|
| 137 |
+
|
| 138 |
+
po::options_description lmbr_opts("Options specific to Lattic MBR");
|
| 139 |
+
AddParam(lmbr_opts,"lmbr-p", "unigram precision value for lattice mbr");
|
| 140 |
+
AddParam(lmbr_opts,"lmbr-r", "ngram precision decay value for lattice mbr");
|
| 141 |
+
AddParam(lmbr_opts,"lmbr-thetas", "theta(s) for lattice mbr calculation");
|
| 142 |
+
AddParam(mbr_opts,"lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
|
| 143 |
+
AddParam(mbr_opts,"lmbr-pruning-factor", "average number of nodes/word wanted in pruned lattice");
|
| 144 |
+
AddParam(mbr_opts,"lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
|
| 145 |
+
|
| 146 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 147 |
+
// OOV handling options
|
| 148 |
+
po::options_description oov_opts("OOV Handling Options");
|
| 149 |
+
AddParam(oov_opts,"drop-unknown", "du", "drop unknown words instead of copying them");
|
| 150 |
+
AddParam(oov_opts,"mark-unknown", "mu", "mark unknown words in output");
|
| 151 |
+
AddParam(oov_opts,"unknown-word-prefix", "prefix to unknwon word when marked (default: 'UNK')");
|
| 152 |
+
AddParam(oov_opts,"unknown-word-suffix", "suffix to unknwon word when marked (default: '')");
|
| 153 |
+
AddParam(oov_opts,"lmodel-oov-feature", "add language model oov feature, one per model");
|
| 154 |
+
AddParam(oov_opts,"output-unknowns", "Output the unknown (OOV) words to the given file, one line per sentence");
|
| 155 |
+
AddParam(oov_opts,"always-create-direct-transopt", "Always create a translation that translates the source word ad-verbatim");
|
| 156 |
+
|
| 157 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 158 |
+
// input options
|
| 159 |
+
po::options_description input_opts("Input Format Options");
|
| 160 |
+
AddParam(input_opts,"input-factors", "list of factors in the input");
|
| 161 |
+
AddParam(input_opts,"inputtype", "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
|
| 162 |
+
AddParam(input_opts,"xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
|
| 163 |
+
AddParam(input_opts,"xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
|
| 164 |
+
AddParam(input_opts,"start-translation-id", "Id of 1st input. Default = 0");
|
| 165 |
+
AddParam(input_opts,"alternate-weight-setting", "aws", "alternate set of weights to used per xml specification");
|
| 166 |
+
|
| 167 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 168 |
+
// output options
|
| 169 |
+
po::options_description output_opts("Output Options");
|
| 170 |
+
AddParam(output_opts,"report-all-factors", "report all factors in output, not just first");
|
| 171 |
+
AddParam(output_opts,"output-factors", "list if factors in the output");
|
| 172 |
+
AddParam(output_opts,"print-id", "prefix translations with id. Default if false");
|
| 173 |
+
AddParam(output_opts,"print-passthrough", "output the sgml tag <passthrough> without any computation on that. Default is false");
|
| 174 |
+
AddParam(output_opts,"print-passthrough-in-n-best", "output the sgml tag <passthrough> without any computation on that in each entry of the n-best-list. Default is false");
|
| 175 |
+
AddParam(output_opts,"output-factors", "list of factors in the output");
|
| 176 |
+
AddParam(output_opts,"print-all-derivations", "to print all derivations in search graph");
|
| 177 |
+
AddParam(output_opts,"translation-details", "T", "for each best hypothesis, report translation details to the given file");
|
| 178 |
+
|
| 179 |
+
AddParam(output_opts,"output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
|
| 180 |
+
AddParam(output_opts,"output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
|
| 181 |
+
AddParam(output_opts,"tree-translation-details", "Ttree", "for each hypothesis, report translation details with tree fragment info to given file");
|
| 182 |
+
AddParam(output_opts,"print-alignment-info", "Output word-to-word alignment to standard out, separated from translation by |||. Word-to-word alignments are takne from the phrase table if any. Default is false");
|
| 183 |
+
AddParam(output_opts,"alignment-output-file", "print output word alignments into given file");
|
| 184 |
+
AddParam(output_opts,"sort-word-alignment", "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
|
| 185 |
+
AddParam(output_opts,"report-segmentation", "t", "report phrase segmentation in the output");
|
| 186 |
+
AddParam(output_opts,"report-segmentation-enriched", "tt", "report phrase segmentation in the output with additional information");
|
| 187 |
+
|
| 188 |
+
// translation-all-details was introduced in the context of DIMwid: Decoder Inspection for Moses (using Widgets)
|
| 189 |
+
// see here: https://ufal.mff.cuni.cz/pbml/100/art-kurtz-seemann-braune-maletti.pdf
|
| 190 |
+
AddParam(output_opts,"translation-all-details", "Tall", "for all hypotheses, report translation details to the given file");
|
| 191 |
+
|
| 192 |
+
po::options_description osg_opts("Options for outputting search graphs");
|
| 193 |
+
AddParam(osg_opts,"output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
|
| 194 |
+
AddParam(osg_opts,"output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
|
| 195 |
+
AddParam(osg_opts,"unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
|
| 196 |
+
AddParam(osg_opts,"output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF) - the flag should be followed by a directory name, which must exist");
|
| 197 |
+
AddParam(output_opts,"include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
|
| 198 |
+
#ifdef HAVE_PROTOBUF
|
| 199 |
+
AddParam(osg_opts,"output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
|
| 200 |
+
#endif
|
| 201 |
+
AddParam(osg_opts,"output-search-graph-hypergraph", "DEPRECATED! Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder). This flag is followed by 3 values: 'true (gz|txt|bz) directory-name'");
|
| 202 |
+
|
| 203 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 204 |
+
// nbest-options
|
| 205 |
+
po::options_description nbest_opts("N-best Options");
|
| 206 |
+
AddParam(nbest_opts,"n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
|
| 207 |
+
// AddParam(nbest_opts,"n-best-list-file", "file of n-best-list to be generated; specify - as the file in order to write to STDOUT");
|
| 208 |
+
// AddParam(nbest_opts,"n-best-list-size", "size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
|
| 209 |
+
AddParam(nbest_opts,"labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
|
| 210 |
+
AddParam(nbest_opts,"n-best-trees", "Write n-best target-side trees to n-best-list");
|
| 211 |
+
AddParam(nbest_opts,"n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
|
| 212 |
+
AddParam(nbest_opts,"report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
|
| 213 |
+
AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
|
| 214 |
+
AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false");
|
| 215 |
+
AddParam(nbest_opts,"print-alignment-info-in-n-best",
|
| 216 |
+
"Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
|
| 217 |
+
|
| 218 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 219 |
+
// server options
|
| 220 |
+
po::options_description server_opts("Moses Server Options");
|
| 221 |
+
AddParam(server_opts,"server", "Run moses as a translation server.");
|
| 222 |
+
AddParam(server_opts,"daemon", "Run moses as a translation server in the background.");
|
| 223 |
+
AddParam(server_opts,"server-port", "Port for moses server");
|
| 224 |
+
AddParam(server_opts,"server-log", "Log destination for moses server");
|
| 225 |
+
AddParam(server_opts,"serial", "Run server in serial mode, processing only one request at a time.");
|
| 226 |
+
|
| 227 |
+
AddParam(server_opts,"server-maxconn",
|
| 228 |
+
"Max. No of simultaneous HTTP transactions allowed by the server.");
|
| 229 |
+
AddParam(server_opts,"server-maxconn-backlog",
|
| 230 |
+
"Max. No. of requests the OS will queue if the server is busy.");
|
| 231 |
+
AddParam(server_opts,"server-keepalive-maxconn",
|
| 232 |
+
"Max. No. of requests the server will accept on a single TCP connection.");
|
| 233 |
+
AddParam(server_opts,"server-keepalive-timeout",
|
| 234 |
+
"Max. number of seconds the server will keep a persistent connection alive.");
|
| 235 |
+
AddParam(server_opts,"server-timeout",
|
| 236 |
+
"Max. number of seconds the server will wait for a client to submit a request once a connection has been established.");
|
| 237 |
+
// session timeout and session cache size are for moses translation session handling
|
| 238 |
+
// they have nothing to do with the abyss server (but relate to the moses server)
|
| 239 |
+
AddParam(server_opts,"session-timeout",
|
| 240 |
+
"Timeout for sessions, e.g. '2h30m' or 1d (=24h)");
|
| 241 |
+
AddParam(server_opts,"session-cache-size", string("Max. number of sessions cached.")
|
| 242 |
+
+"Least recently used session is dumped first.");
|
| 243 |
+
|
| 244 |
+
po::options_description irstlm_opts("IRSTLM Options");
|
| 245 |
+
AddParam(irstlm_opts,"clean-lm-cache",
|
| 246 |
+
"clean language model caches after N translations (default N=1)");
|
| 247 |
+
|
| 248 |
+
po::options_description chart_opts("Chart Decoding Options");
|
| 249 |
+
AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
|
| 250 |
+
AddParam(chart_opts,"non-terminals", "list of non-term symbols, space separated");
|
| 251 |
+
AddParam(chart_opts,"rule-limit", "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
|
| 252 |
+
AddParam(chart_opts,"source-label-overlap", "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
|
| 253 |
+
AddParam(chart_opts,"unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
|
| 254 |
+
|
| 255 |
+
po::options_description misc_opts("Miscellaneous Options");
|
| 256 |
+
AddParam(misc_opts,"mira", "do mira training");
|
| 257 |
+
AddParam(misc_opts,"description", "Source language, target language, description");
|
| 258 |
+
AddParam(misc_opts,"no-cache", "Disable all phrase-table caching. Default = false (ie. enable caching)");
|
| 259 |
+
AddParam(misc_opts,"default-non-term-for-empty-range-only", "Don't add [X] to all ranges, just ranges where there isn't a source non-term. Default = false (ie. add [X] everywhere)");
|
| 260 |
+
AddParam(misc_opts,"s2t-parsing-algorithm", "Which S2T parsing algorithm to use. 0=recursive CYK+, 1=scope-3 (default = 0)");
|
| 261 |
+
|
| 262 |
+
//AddParam(o,"continue-partial-translation", "cpt", "start from nonempty hypothesis");
|
| 263 |
+
AddParam(misc_opts,"decoding-graph-backoff", "dpb", "only use subsequent decoding paths for unknown spans of given length");
|
| 264 |
+
AddParam(misc_opts,"references", "Reference file(s) - used for bleu score feature");
|
| 265 |
+
AddParam(misc_opts,"recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
|
| 266 |
+
AddParam(misc_opts,"link-param-count", "Number of parameters on word links when using confusion networks or lattices (default = 1)");
|
| 267 |
+
AddParam(misc_opts,"feature-name-overwrite", "Override feature name (NOT arguments). Eg. SRILM-->KENLM, PhraseDictionaryMemory-->PhraseDictionaryScope3");
|
| 268 |
+
|
| 269 |
+
AddParam(misc_opts,"feature", "All the feature functions should be here");
|
| 270 |
+
AddParam(misc_opts,"context-string",
|
| 271 |
+
"A (tokenized) string containing context words for context-sensitive translation.");
|
| 272 |
+
AddParam(misc_opts,"context-weights", "A key-value map for context-sensitive translation.");
|
| 273 |
+
AddParam(misc_opts,"context-window",
|
| 274 |
+
"Context window (in words) for context-sensitive translation: {+|-|+-}<number>.");
|
| 275 |
+
|
| 276 |
+
// Compact phrase table and reordering table.
|
| 277 |
+
po::options_description cpt_opts("Options when using compact phrase and reordering tables.");
|
| 278 |
+
AddParam(cpt_opts,"minphr-memory", "Load phrase table in minphr format into memory");
|
| 279 |
+
AddParam(cpt_opts,"minlexr-memory", "Load lexical reordering table in minlexr format into memory");
|
| 280 |
+
|
| 281 |
+
po::options_description spe_opts("Simulated Post-editing Options");
|
| 282 |
+
AddParam(spe_opts,"spe-src", "Simulated post-editing. Source filename");
|
| 283 |
+
AddParam(spe_opts,"spe-trg", "Simulated post-editing. Target filename");
|
| 284 |
+
AddParam(spe_opts,"spe-aln", "Simulated post-editing. Alignment filename");
|
| 285 |
+
|
| 286 |
+
///////////////////////////////////////////////////////////////////////////////////////
|
| 287 |
+
// DEPRECATED options
|
| 288 |
+
po::options_description deprec_opts("Deprecated Options");
|
| 289 |
+
AddParam(deprec_opts,"link-param-count", "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
|
| 290 |
+
AddParam(deprec_opts,"weight-slm", "slm", "DEPRECATED. DO NOT USE. weight(s) for syntactic language model");
|
| 291 |
+
AddParam(deprec_opts,"weight-bl", "bl", "DEPRECATED. DO NOT USE. weight for bleu score feature");
|
| 292 |
+
AddParam(deprec_opts,"weight-d", "d", "DEPRECATED. DO NOT USE. weight(s) for distortion (reordering components)");
|
| 293 |
+
AddParam(deprec_opts,"weight-dlm", "dlm", "DEPRECATED. DO NOT USE. weight for discriminative LM feature function (on top of sparse weights)");
|
| 294 |
+
AddParam(deprec_opts,"weight-lr", "lr", "DEPRECATED. DO NOT USE. weight(s) for lexicalized reordering, if not included in weight-d");
|
| 295 |
+
AddParam(deprec_opts,"weight-generation", "g", "DEPRECATED. DO NOT USE. weight(s) for generation components");
|
| 296 |
+
AddParam(deprec_opts,"weight-i", "I", "DEPRECATED. DO NOT USE. weight(s) for word insertion - used for parameters from confusion network and lattice input links");
|
| 297 |
+
AddParam(deprec_opts,"weight-l", "lm", "DEPRECATED. DO NOT USE. weight(s) for language models");
|
| 298 |
+
AddParam(deprec_opts,"weight-lex", "lex", "DEPRECATED. DO NOT USE. weight for global lexical model");
|
| 299 |
+
AddParam(deprec_opts,"weight-glm", "glm", "DEPRECATED. DO NOT USE. weight for global lexical feature, sparse producer");
|
| 300 |
+
AddParam(deprec_opts,"weight-wt", "wt", "DEPRECATED. DO NOT USE. weight for word translation feature");
|
| 301 |
+
AddParam(deprec_opts,"weight-pp", "pp", "DEPRECATED. DO NOT USE. weight for phrase pair feature");
|
| 302 |
+
AddParam(deprec_opts,"weight-pb", "pb", "DEPRECATED. DO NOT USE. weight for phrase boundary feature");
|
| 303 |
+
AddParam(deprec_opts,"weight-t", "tm", "DEPRECATED. DO NOT USE. weights for translation model components");
|
| 304 |
+
AddParam(deprec_opts,"weight-p", "w", "DEPRECATED. DO NOT USE. weight for phrase penalty");
|
| 305 |
+
AddParam(deprec_opts,"weight-w", "w", "DEPRECATED. DO NOT USE. weight for word penalty");
|
| 306 |
+
AddParam(deprec_opts,"weight-u", "u", "DEPRECATED. DO NOT USE. weight for unknown word penalty");
|
| 307 |
+
AddParam(deprec_opts,"weight-e", "e", "DEPRECATED. DO NOT USE. weight for word deletion");
|
| 308 |
+
AddParam(deprec_opts,"text-type", "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
|
| 309 |
+
AddParam(deprec_opts,"input-scores", "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
|
| 310 |
+
AddParam(deprec_opts,"dlm-model", "DEPRECATED. DO NOT USE. Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
|
| 311 |
+
AddParam(deprec_opts,"generation-file", "DEPRECATED. DO NOT USE. location and properties of the generation table");
|
| 312 |
+
AddParam(deprec_opts,"global-lexical-file", "gl", "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation model file");
|
| 313 |
+
AddParam(deprec_opts,"glm-feature", "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation feature, sparse producer");
|
| 314 |
+
AddParam(deprec_opts,"lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models");
|
| 315 |
+
AddParam(deprec_opts,"lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
|
| 316 |
+
#ifdef HAVE_SYNLM
|
| 317 |
+
AddParam(deprec_opts,"slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
|
| 318 |
+
AddParam(deprec_opts,"slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
|
| 319 |
+
AddParam(deprec_opts,"slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
|
| 320 |
+
#endif
|
| 321 |
+
AddParam(deprec_opts,"ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
|
| 322 |
+
AddParam(deprec_opts,"phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
|
| 323 |
+
AddParam(deprec_opts,"phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
|
| 324 |
+
AddParam(deprec_opts,"phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
|
| 325 |
+
AddParam(deprec_opts,"phrase-length-feature", "DEPRECATED. DO NOT USE. Count features for source length, target length, both of each phrase");
|
| 326 |
+
AddParam(deprec_opts,"target-word-insertion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned target word");
|
| 327 |
+
AddParam(deprec_opts,"source-word-deletion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned source word");
|
| 328 |
+
AddParam(deprec_opts,"word-translation-feature", "DEPRECATED. DO NOT USE. Count feature for word translation according to word alignment");
|
| 329 |
+
|
| 330 |
+
po::options_description zombie_opts("Zombie Options");
|
| 331 |
+
AddParam(zombie_opts,"distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
mbr_opts.add(lmbr_opts);
|
| 335 |
+
search_opts.add(cube_opts);
|
| 336 |
+
search_opts.add(mbr_opts);
|
| 337 |
+
search_opts.add(disto_opts);
|
| 338 |
+
search_opts.add(chart_opts);
|
| 339 |
+
|
| 340 |
+
input_opts.add(spe_opts);
|
| 341 |
+
|
| 342 |
+
output_opts.add(nbest_opts);
|
| 343 |
+
output_opts.add(osg_opts);
|
| 344 |
+
|
| 345 |
+
m_options.add(main_opts);
|
| 346 |
+
m_options.add(server_opts);
|
| 347 |
+
m_options.add(input_opts);
|
| 348 |
+
m_options.add(search_opts);
|
| 349 |
+
m_options.add(output_opts);
|
| 350 |
+
m_options.add(oov_opts);
|
| 351 |
+
m_options.add(factor_opts);
|
| 352 |
+
m_options.add(cpt_opts);
|
| 353 |
+
m_options.add(irstlm_opts);
|
| 354 |
+
m_options.add(tune_opts);
|
| 355 |
+
m_options.add(misc_opts);
|
| 356 |
+
m_options.add(deprec_opts);
|
| 357 |
+
m_options.add(zombie_opts);
|
| 358 |
+
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
Parameter::~Parameter()
|
| 362 |
+
{
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
const PARAM_VEC *Parameter::GetParam(const std::string ¶mName) const
|
| 366 |
+
{
|
| 367 |
+
PARAM_MAP::const_iterator iter = m_setting.find( paramName );
|
| 368 |
+
if (iter == m_setting.end()) {
|
| 369 |
+
return NULL;
|
| 370 |
+
} else {
|
| 371 |
+
return &iter->second;
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
/** initialize a parameter, sub of constructor */
|
| 377 |
+
void
|
| 378 |
+
Parameter::
|
| 379 |
+
AddParam(po::options_description& optgroup,
|
| 380 |
+
string const& paramName,
|
| 381 |
+
string const& description)
|
| 382 |
+
{
|
| 383 |
+
m_valid[paramName] = true;
|
| 384 |
+
m_description[paramName] = description;
|
| 385 |
+
optgroup.add_options()(paramName.c_str(), description.c_str());
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
/** initialize a parameter (including abbreviation), sub of constructor */
|
| 389 |
+
void
|
| 390 |
+
Parameter::
|
| 391 |
+
AddParam(po::options_description& optgroup,
|
| 392 |
+
string const& paramName,
|
| 393 |
+
string const& abbrevName,
|
| 394 |
+
string const& description)
|
| 395 |
+
{
|
| 396 |
+
m_valid[paramName] = true;
|
| 397 |
+
m_valid[abbrevName] = true;
|
| 398 |
+
m_abbreviation[paramName] = abbrevName;
|
| 399 |
+
m_fullname[abbrevName] = paramName;
|
| 400 |
+
m_description[paramName] = description;
|
| 401 |
+
string optname = paramName;
|
| 402 |
+
if (abbrevName.size() == 1) {
|
| 403 |
+
optname += string(",")+abbrevName;
|
| 404 |
+
// m_confusable[abbrevName[0]].insert(paramName);
|
| 405 |
+
}
|
| 406 |
+
optgroup.add_options()(optname.c_str(),description.c_str());
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
/** print descriptions of all parameters */
|
| 410 |
+
void
|
| 411 |
+
Parameter::
|
| 412 |
+
Explain()
|
| 413 |
+
{
|
| 414 |
+
cerr << "Usage:" << endl;
|
| 415 |
+
cerr << m_options << endl;
|
| 416 |
+
// for(PARAM_STRING::const_iterator iterParam = m_description.begin();
|
| 417 |
+
// iterParam != m_description.end(); iterParam++)
|
| 418 |
+
// {
|
| 419 |
+
// const string paramName = iterParam->first;
|
| 420 |
+
// const string paramDescription = iterParam->second;
|
| 421 |
+
// cerr << "\t-" << paramName;
|
| 422 |
+
// PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
|
| 423 |
+
// if ( iterAbbr != m_abbreviation.end() )
|
| 424 |
+
// cerr << " (" << iterAbbr->second << ")";
|
| 425 |
+
// cerr << ": " << paramDescription << endl;
|
| 426 |
+
// }
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
/** check whether an item on the command line is a switch or a value
|
| 430 |
+
* \param token token on the command line to checked **/
|
| 431 |
+
|
| 432 |
+
bool
|
| 433 |
+
Parameter::
|
| 434 |
+
isOption(const char* token)
|
| 435 |
+
{
|
| 436 |
+
if (! token) return false;
|
| 437 |
+
std::string tokenString(token);
|
| 438 |
+
size_t length = tokenString.size();
|
| 439 |
+
if (length <= 1) return false;
|
| 440 |
+
if (!starts_with(tokenString, "-")) return false;
|
| 441 |
+
if (tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
|
| 442 |
+
return false;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
/** load all parameters from the configuration file and the command line switches */
|
| 446 |
+
bool
|
| 447 |
+
Parameter::
|
| 448 |
+
LoadParam(const string &filePath)
|
| 449 |
+
{
|
| 450 |
+
const char *argv[] = {"executable", "-f", filePath.c_str() };
|
| 451 |
+
return LoadParam(3, (char const**) argv);
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
/// Print out version information about the things that went into this
|
| 455 |
+
/// executable.
|
| 456 |
+
void show_version()
|
| 457 |
+
{
|
| 458 |
+
std::cout << "\nMoses code version (git tag or commit hash):\n "
|
| 459 |
+
<< MOSES_VERSION_ID << std::endl
|
| 460 |
+
<< "Libraries used:" << std::endl
|
| 461 |
+
<< " Boost version "
|
| 462 |
+
<< BOOST_VERSION / 100000 << "." // major version
|
| 463 |
+
<< BOOST_VERSION / 100 % 1000 << "." // minor version
|
| 464 |
+
<< BOOST_VERSION % 100 // patch level
|
| 465 |
+
<< std::endl;
|
| 466 |
+
#ifdef HAVE_XMLRPC_C
|
| 467 |
+
unsigned int major, minor, point;
|
| 468 |
+
xmlrpc_server_version(&major, &minor, &point);
|
| 469 |
+
std::cout << " Xmlrpc-c version "
|
| 470 |
+
<< major << "." << minor << "." << point << std::endl;
|
| 471 |
+
#endif
|
| 472 |
+
#ifdef HAVE_CMPH
|
| 473 |
+
// there's no easy way to determine the cmph version at compile time
|
| 474 |
+
std::cout << " CMPH (version unknown)" << std::endl;
|
| 475 |
+
#endif
|
| 476 |
+
|
| 477 |
+
#ifdef MMT_VERSION_ID
|
| 478 |
+
std::cout << string(20,'-')
|
| 479 |
+
<< "\nMMT extras version: " << MMT_VERSION_ID << std::endl;
|
| 480 |
+
#endif
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
/** load all parameters from the configuration file and the command line switches */
|
| 484 |
+
bool
|
| 485 |
+
Parameter::
|
| 486 |
+
LoadParam(int argc, char const* xargv[])
|
| 487 |
+
{
|
| 488 |
+
// legacy parameter handling: all parameters are expected
|
| 489 |
+
// to start with a single dash
|
| 490 |
+
char const* argv[argc+1];
|
| 491 |
+
for (int i = 0; i < argc; ++i) {
|
| 492 |
+
argv[i] = xargv[i];
|
| 493 |
+
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
|
| 494 |
+
++argv[i];
|
| 495 |
+
if (!strcmp(argv[i],"-version")) {
|
| 496 |
+
show_version();
|
| 497 |
+
exit(0);
|
| 498 |
+
}
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
// config file (-f) arg mandatory
|
| 504 |
+
string configPath;
|
| 505 |
+
if ( (configPath = FindParam("-f", argc, argv)) == ""
|
| 506 |
+
&& (configPath = FindParam("-config", argc, argv)) == "") {
|
| 507 |
+
PrintCredit();
|
| 508 |
+
Explain();
|
| 509 |
+
PrintFF();
|
| 510 |
+
|
| 511 |
+
cerr << endl;
|
| 512 |
+
cerr << "No configuration file was specified. Use -config or -f";
|
| 513 |
+
cerr << endl;
|
| 514 |
+
return false;
|
| 515 |
+
} else {
|
| 516 |
+
if (!ReadConfigFile(configPath)) {
|
| 517 |
+
std::cerr << "Could not read " << configPath;
|
| 518 |
+
return false;
|
| 519 |
+
}
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// overwrite parameters with values from switches
|
| 523 |
+
for(PARAM_STRING::const_iterator iterParam = m_description.begin();
|
| 524 |
+
iterParam != m_description.end(); iterParam++) {
|
| 525 |
+
const string paramName = iterParam->first;
|
| 526 |
+
OverwriteParam("-" + paramName, paramName, argc, argv);
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
// ... also shortcuts
|
| 530 |
+
for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin();
|
| 531 |
+
iterParam != m_abbreviation.end(); iterParam++) {
|
| 532 |
+
const string paramName = iterParam->first;
|
| 533 |
+
const string paramShortName = iterParam->second;
|
| 534 |
+
OverwriteParam("-" + paramShortName, paramName, argc, argv);
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
AddFeaturesCmd();
|
| 538 |
+
|
| 539 |
+
// logging of parameters that were set in either config or switch
|
| 540 |
+
int verbose = 1;
|
| 541 |
+
if (m_setting.find("verbose") != m_setting.end() &&
|
| 542 |
+
m_setting["verbose"].size() > 0)
|
| 543 |
+
verbose = Scan<int>(m_setting["verbose"][0]);
|
| 544 |
+
if (verbose >= 1) { // only if verbose
|
| 545 |
+
TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
|
| 546 |
+
for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ;
|
| 547 |
+
iterParam != m_setting.end(); iterParam++) {
|
| 548 |
+
TRACE_ERR( "\t" << iterParam->first << ": ");
|
| 549 |
+
for ( size_t i = 0; i < iterParam->second.size(); i++ )
|
| 550 |
+
TRACE_ERR( iterParam->second[i] << " ");
|
| 551 |
+
TRACE_ERR( endl);
|
| 552 |
+
}
|
| 553 |
+
}
|
| 554 |
+
|
| 555 |
+
// don't mix old and new format
|
| 556 |
+
if ((GetParam("feature") || GetParam("weight"))
|
| 557 |
+
&& (GetParam("weight-slm") || GetParam("weight-bl") || GetParam("weight-d") ||
|
| 558 |
+
GetParam("weight-dlm") || GetParam("weight-lrl") || GetParam("weight-generation") ||
|
| 559 |
+
GetParam("weight-i") || GetParam("weight-l") || GetParam("weight-lex") ||
|
| 560 |
+
GetParam("weight-glm") || GetParam("weight-wt") || GetParam("weight-pp") ||
|
| 561 |
+
GetParam("weight-pb") || GetParam("weight-t") || GetParam("weight-w") ||
|
| 562 |
+
GetParam("weight-p") ||
|
| 563 |
+
GetParam("weight-u") || GetParam("weight-e") ||
|
| 564 |
+
GetParam("dlm-mode") || GetParam("generation-file") || GetParam("global-lexical-file") ||
|
| 565 |
+
GetParam("glm-feature") || GetParam("lmodel-file") || GetParam("lmodel-dub") ||
|
| 566 |
+
GetParam("slmodel-file") || GetParam("slmodel-factor") ||
|
| 567 |
+
GetParam("slmodel-beam") || GetParam("ttable-file") || GetParam("phrase-pair-feature") ||
|
| 568 |
+
GetParam("phrase-boundary-source-feature") || GetParam("phrase-boundary-target-feature") || GetParam("phrase-length-feature") ||
|
| 569 |
+
GetParam("target-word-insertion-feature") || GetParam("source-word-deletion-feature") || GetParam("word-translation-feature")
|
| 570 |
+
)
|
| 571 |
+
) {
|
| 572 |
+
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
// convert old weights args to new format
|
| 576 |
+
if (GetParam("feature") == NULL) {
|
| 577 |
+
ConvertWeightArgs();
|
| 578 |
+
}
|
| 579 |
+
CreateWeightsMap();
|
| 580 |
+
WeightOverwrite();
|
| 581 |
+
|
| 582 |
+
// check for illegal parameters
|
| 583 |
+
bool noErrorFlag = true;
|
| 584 |
+
for (int i = 0 ; i < argc ; i++) {
|
| 585 |
+
if (isOption(argv[i])) {
|
| 586 |
+
string paramSwitch = (string) argv[i];
|
| 587 |
+
string paramName = paramSwitch.substr(1);
|
| 588 |
+
if (m_valid.find(paramName) == m_valid.end()) {
|
| 589 |
+
std::cerr << "illegal switch: " << paramSwitch;
|
| 590 |
+
noErrorFlag = false;
|
| 591 |
+
}
|
| 592 |
+
}
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
//Save("/tmp/moses.ini.new");
|
| 596 |
+
|
| 597 |
+
// check if parameters make sense
|
| 598 |
+
return Validate() && noErrorFlag;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
void
|
| 602 |
+
Parameter::
|
| 603 |
+
AddFeaturesCmd()
|
| 604 |
+
{
|
| 605 |
+
const PARAM_VEC *params = GetParam("feature-add");
|
| 606 |
+
if (params) {
|
| 607 |
+
PARAM_VEC::const_iterator iter;
|
| 608 |
+
for (iter = params->begin(); iter != params->end(); ++iter) {
|
| 609 |
+
const string &line = *iter;
|
| 610 |
+
AddFeature(line);
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
m_setting.erase("feature-add");
|
| 614 |
+
}
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
std::vector<float>
|
| 618 |
+
Parameter::
|
| 619 |
+
GetWeights(const std::string &name)
|
| 620 |
+
{
|
| 621 |
+
std::vector<float> ret = m_weights[name];
|
| 622 |
+
|
| 623 |
+
// cerr << "WEIGHT " << name << "=";
|
| 624 |
+
// for (size_t i = 0; i < ret.size(); ++i) {
|
| 625 |
+
// cerr << ret[i] << ",";
|
| 626 |
+
// }
|
| 627 |
+
// cerr << endl;
|
| 628 |
+
return ret;
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
void
|
| 632 |
+
Parameter::
|
| 633 |
+
SetWeight(const std::string &name, size_t ind, float weight)
|
| 634 |
+
{
|
| 635 |
+
PARAM_VEC &newWeights = m_setting["weight"];
|
| 636 |
+
string line = name + SPrint(ind) + "= " + SPrint(weight);
|
| 637 |
+
newWeights.push_back(line);
|
| 638 |
+
}
|
| 639 |
+
|
| 640 |
+
void Parameter::SetWeight(const std::string &name, size_t ind, const vector<float> &weights)
|
| 641 |
+
{
|
| 642 |
+
PARAM_VEC &newWeights = m_setting["weight"];
|
| 643 |
+
string line = name + SPrint(ind) + "=";
|
| 644 |
+
|
| 645 |
+
for (size_t i = 0; i < weights.size(); ++i) {
|
| 646 |
+
line += " " + SPrint(weights[i]);
|
| 647 |
+
}
|
| 648 |
+
newWeights.push_back(line);
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
void
|
| 652 |
+
Parameter::
|
| 653 |
+
AddWeight(const std::string &name, size_t ind,
|
| 654 |
+
const std::vector<float> &weights)
|
| 655 |
+
{
|
| 656 |
+
PARAM_VEC &newWeights = m_setting["weight"];
|
| 657 |
+
|
| 658 |
+
string sought = name + SPrint(ind) + "=";
|
| 659 |
+
for (size_t i = 0; i < newWeights.size(); ++i) {
|
| 660 |
+
string &line = newWeights[i];
|
| 661 |
+
if (line.find(sought) == 0) {
|
| 662 |
+
// found existing weight, most likely to be input weights. Append to this line
|
| 663 |
+
for (size_t i = 0; i < weights.size(); ++i) {
|
| 664 |
+
line += " " + SPrint(weights[i]);
|
| 665 |
+
}
|
| 666 |
+
return;
|
| 667 |
+
}
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
// nothing found. Just set
|
| 671 |
+
SetWeight(name, ind, weights);
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
void
|
| 675 |
+
Parameter::
|
| 676 |
+
ConvertWeightArgsSingleWeight(const string &oldWeightName, const string &newWeightName)
|
| 677 |
+
{
|
| 678 |
+
size_t ind = 0;
|
| 679 |
+
PARAM_MAP::iterator iterMap;
|
| 680 |
+
|
| 681 |
+
iterMap = m_setting.find(oldWeightName);
|
| 682 |
+
if (iterMap != m_setting.end()) {
|
| 683 |
+
const PARAM_VEC &weights = iterMap->second;
|
| 684 |
+
for (size_t i = 0; i < weights.size(); ++i) {
|
| 685 |
+
SetWeight(newWeightName, ind, Scan<float>(weights[i]));
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
m_setting.erase(iterMap);
|
| 689 |
+
}
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
void
|
| 693 |
+
Parameter::
|
| 694 |
+
ConvertWeightArgsPhraseModel(const string &oldWeightName)
|
| 695 |
+
{
|
| 696 |
+
const PARAM_VEC *params;
|
| 697 |
+
|
| 698 |
+
// process input weights 1st
|
| 699 |
+
params = GetParam("weight-i");
|
| 700 |
+
if (params) {
|
| 701 |
+
vector<float> inputWeights = Scan<float>(*params);
|
| 702 |
+
PARAM_VEC &numInputScores = m_setting["input-scores"];
|
| 703 |
+
if (inputWeights.size() == 1) {
|
| 704 |
+
UTIL_THROW_IF2(numInputScores.size() != 0, "No [input-scores] section allowed");
|
| 705 |
+
numInputScores.push_back("1");
|
| 706 |
+
numInputScores.push_back("0");
|
| 707 |
+
} else if (inputWeights.size() == 2) {
|
| 708 |
+
UTIL_THROW_IF2(numInputScores.size() != 0, "No [input-scores] section allowed");
|
| 709 |
+
numInputScores.push_back("1");
|
| 710 |
+
numInputScores.push_back("1");
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
SetWeight("PhraseDictionaryBinary", 0, inputWeights);
|
| 714 |
+
}
|
| 715 |
+
|
| 716 |
+
// convert actually pt feature
|
| 717 |
+
VERBOSE(2,"Creating phrase table features" << endl);
|
| 718 |
+
|
| 719 |
+
size_t numInputScores = 0;
|
| 720 |
+
size_t numRealWordsInInput = 0;
|
| 721 |
+
map<string, size_t> ptIndices;
|
| 722 |
+
|
| 723 |
+
params = GetParam("input-scores");
|
| 724 |
+
if (params) {
|
| 725 |
+
numInputScores = Scan<size_t>(params->at(0));
|
| 726 |
+
|
| 727 |
+
if (params->size() > 1) {
|
| 728 |
+
numRealWordsInInput = Scan<size_t>(params->at(1));
|
| 729 |
+
}
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
// load phrase translation tables
|
| 733 |
+
params = GetParam("ttable-file");
|
| 734 |
+
if (params) {
|
| 735 |
+
// weights
|
| 736 |
+
const vector<string> translationVector = *params;
|
| 737 |
+
|
| 738 |
+
vector<size_t> maxTargetPhrase;
|
| 739 |
+
params = GetParam("ttable-limit");
|
| 740 |
+
if (params) {
|
| 741 |
+
maxTargetPhrase = Scan<size_t>(*params);
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
|
| 745 |
+
VERBOSE(1, "Using uniform ttable-limit of " << maxTargetPhrase[0] << " for all translation tables." << endl);
|
| 746 |
+
for(size_t i = 1; i < translationVector.size(); i++)
|
| 747 |
+
maxTargetPhrase.push_back(maxTargetPhrase[0]);
|
| 748 |
+
} else if(maxTargetPhrase.size() != 1 && maxTargetPhrase.size() < translationVector.size()) {
|
| 749 |
+
std::cerr << "You specified " << translationVector.size() << " translation tables, but only " << maxTargetPhrase.size() << " ttable-limits.";
|
| 750 |
+
return;
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
// MAIN LOOP
|
| 754 |
+
const PARAM_VEC &oldWeights = m_setting[oldWeightName];
|
| 755 |
+
|
| 756 |
+
size_t currOldInd = 0;
|
| 757 |
+
for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
|
| 758 |
+
util::StringStream ptLine;
|
| 759 |
+
|
| 760 |
+
vector<string> token = Tokenize(translationVector[currDict]);
|
| 761 |
+
|
| 762 |
+
if(currDict == 0 && token.size() == 4) {
|
| 763 |
+
std::cerr << "Phrase table specification in old 4-field format. No longer supported";
|
| 764 |
+
return;
|
| 765 |
+
}
|
| 766 |
+
UTIL_THROW_IF2(token.size() < 5, "Phrase table must have at least 5 scores");
|
| 767 |
+
|
| 768 |
+
int implementation = Scan<int>(token[0]);
|
| 769 |
+
|
| 770 |
+
string ptType;
|
| 771 |
+
switch (implementation) {
|
| 772 |
+
case 0: // Memory
|
| 773 |
+
ptType = "PhraseDictionaryMemory";
|
| 774 |
+
break;
|
| 775 |
+
case 1: // Binary
|
| 776 |
+
ptType = "PhraseDictionaryBinary";
|
| 777 |
+
break;
|
| 778 |
+
case 2: // OnDisk
|
| 779 |
+
ptType = "PhraseDictionaryOnDisk";
|
| 780 |
+
break;
|
| 781 |
+
case 6: // SCFG
|
| 782 |
+
ptType = "PhraseDictionaryMemory";
|
| 783 |
+
break;
|
| 784 |
+
case 12: // Compact
|
| 785 |
+
ptType = "PhraseDictionaryCompact";
|
| 786 |
+
break;
|
| 787 |
+
case 8: // SuffixArray
|
| 788 |
+
ptType = "PhraseDictionarySuffixArray";
|
| 789 |
+
break;
|
| 790 |
+
case 14: // DSuffixArray
|
| 791 |
+
ptType = "PhraseDictionaryDynSuffixArray";
|
| 792 |
+
break;
|
| 793 |
+
case 15: // DCacheBased:
|
| 794 |
+
ptType = "PhraseDictionaryDynamicCacheBased";
|
| 795 |
+
break;
|
| 796 |
+
case 16: // CachePT:
|
| 797 |
+
ptType = "PhraseDictionaryCache";
|
| 798 |
+
break;
|
| 799 |
+
default:
|
| 800 |
+
break;
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
size_t ptInd;
|
| 804 |
+
if (ptIndices.find(ptType) == ptIndices.end()) {
|
| 805 |
+
ptIndices[ptType] = 0;
|
| 806 |
+
ptInd = 0;
|
| 807 |
+
} else {
|
| 808 |
+
ptInd = ++ptIndices[ptType];
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
// weights
|
| 812 |
+
size_t numFFInd = (token.size() == 4) ? 2 : 3;
|
| 813 |
+
size_t numFF = Scan<size_t>(token[numFFInd]);
|
| 814 |
+
|
| 815 |
+
vector<float> weights(numFF);
|
| 816 |
+
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
| 817 |
+
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
| 818 |
+
"Errors converting old phrase-table weights to new weights");
|
| 819 |
+
float weight = Scan<float>(oldWeights[currOldInd]);
|
| 820 |
+
weights[currFF] = weight;
|
| 821 |
+
|
| 822 |
+
++currOldInd;
|
| 823 |
+
}
|
| 824 |
+
|
| 825 |
+
// cerr << weights.size() << " PHRASE TABLE WEIGHTS "
|
| 826 |
+
// << __FILE__ << ":" << __LINE__ << endl;
|
| 827 |
+
AddWeight(ptType, ptInd, weights);
|
| 828 |
+
|
| 829 |
+
// actual pt
|
| 830 |
+
ptLine << ptType << " ";
|
| 831 |
+
ptLine << "input-factor=" << token[1] << " ";
|
| 832 |
+
ptLine << "output-factor=" << token[2] << " ";
|
| 833 |
+
ptLine << "path=" << token[4] << " ";
|
| 834 |
+
|
| 835 |
+
//characteristics of the phrase table
|
| 836 |
+
|
| 837 |
+
vector<FactorType> input = Tokenize<FactorType>(token[1], ",")
|
| 838 |
+
,output = Tokenize<FactorType>(token[2], ",");
|
| 839 |
+
size_t numScoreComponent = Scan<size_t>(token[3]);
|
| 840 |
+
string filePath= token[4];
|
| 841 |
+
|
| 842 |
+
if(currDict==0) {
|
| 843 |
+
// only the 1st pt. THis is shit
|
| 844 |
+
// TODO. find what the assumptions made by confusion network about phrase table output which makes
|
| 845 |
+
// it only work with binary file. This is a hack
|
| 846 |
+
numScoreComponent += numInputScores + numRealWordsInInput;
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
ptLine << "num-features=" << numScoreComponent << " ";
|
| 850 |
+
ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";
|
| 851 |
+
|
| 852 |
+
if (implementation == 8 || implementation == 14) {
|
| 853 |
+
ptLine << "target-path=" << token[5] << " ";
|
| 854 |
+
ptLine << "alignment-path=" << token[6] << " ";
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
AddFeature(ptLine.str());
|
| 858 |
+
} // for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
|
| 859 |
+
} // if (GetParam("ttable-file").size() > 0) {
|
| 860 |
+
|
| 861 |
+
m_setting.erase("weight-i");
|
| 862 |
+
m_setting.erase(oldWeightName);
|
| 863 |
+
m_setting.erase("ttable-file");
|
| 864 |
+
m_setting.erase("ttable-limit");
|
| 865 |
+
|
| 866 |
+
}
|
| 867 |
+
|
| 868 |
+
void
|
| 869 |
+
Parameter::
|
| 870 |
+
AddFeature(const std::string &line)
|
| 871 |
+
{
|
| 872 |
+
PARAM_VEC &features = m_setting["feature"];
|
| 873 |
+
features.push_back(line);
|
| 874 |
+
}
|
| 875 |
+
|
| 876 |
+
void
|
| 877 |
+
Parameter::
|
| 878 |
+
ConvertWeightArgsDistortion()
|
| 879 |
+
{
|
| 880 |
+
const string oldWeightName = "weight-d";
|
| 881 |
+
const string oldLexReordingName = "distortion-file";
|
| 882 |
+
|
| 883 |
+
// distortion / lex distortion
|
| 884 |
+
const PARAM_VEC *oldWeights = GetParam(oldWeightName);
|
| 885 |
+
|
| 886 |
+
if (oldWeights) {
|
| 887 |
+
const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
|
| 888 |
+
if (searchAlgo == NULL ||
|
| 889 |
+
(searchAlgo->size() > 0
|
| 890 |
+
&& (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
|
| 891 |
+
)
|
| 892 |
+
) {
|
| 893 |
+
// phrase-based. Add distance distortion to list of features
|
| 894 |
+
AddFeature("Distortion");
|
| 895 |
+
SetWeight("Distortion", 0, Scan<float>(oldWeights->at(0)));
|
| 896 |
+
}
|
| 897 |
+
|
| 898 |
+
// everything but the last is lex reordering model
|
| 899 |
+
|
| 900 |
+
size_t currOldInd = 1;
|
| 901 |
+
const PARAM_VEC *lextable = GetParam(oldLexReordingName);
|
| 902 |
+
|
| 903 |
+
for (size_t indTable = 0; lextable && indTable < lextable->size(); ++indTable) {
|
| 904 |
+
const string &line = lextable->at(indTable);
|
| 905 |
+
vector<string> toks = Tokenize(line);
|
| 906 |
+
|
| 907 |
+
size_t numFF = Scan<size_t>(toks[2]);
|
| 908 |
+
|
| 909 |
+
vector<float> weights(numFF);
|
| 910 |
+
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
| 911 |
+
UTIL_THROW_IF2(oldWeights && currOldInd >= oldWeights->size(),
|
| 912 |
+
"Errors converting old distortion weights to new weights");
|
| 913 |
+
float weight = Scan<float>(oldWeights->at(currOldInd));
|
| 914 |
+
weights[currFF] = weight;
|
| 915 |
+
|
| 916 |
+
++currOldInd;
|
| 917 |
+
}
|
| 918 |
+
SetWeight("LexicalReordering", indTable, weights);
|
| 919 |
+
|
| 920 |
+
util::StringStream strme;
|
| 921 |
+
strme << "LexicalReordering "
|
| 922 |
+
<< "type=" << toks[1] << " ";
|
| 923 |
+
|
| 924 |
+
vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
|
| 925 |
+
UTIL_THROW_IF2(factors.size() != 2,
|
| 926 |
+
"Error in old factor specification for lexicalized reordering model: "
|
| 927 |
+
<< toks[0]);
|
| 928 |
+
strme << "input-factor=" << factors[0]
|
| 929 |
+
<< " output-factor=" << factors[1] << " ";
|
| 930 |
+
|
| 931 |
+
strme << "num-features=" << toks[2] << " ";
|
| 932 |
+
strme << "path=" << toks[3];
|
| 933 |
+
|
| 934 |
+
AddFeature(strme.str());
|
| 935 |
+
}
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
m_setting.erase(oldWeightName);
|
| 939 |
+
m_setting.erase(oldLexReordingName);
|
| 940 |
+
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
+
void
|
| 944 |
+
Parameter::
|
| 945 |
+
ConvertWeightArgsLM()
|
| 946 |
+
{
|
| 947 |
+
const string oldWeightName = "weight-l";
|
| 948 |
+
const string oldFeatureName = "lmodel-file";
|
| 949 |
+
const PARAM_VEC *params;
|
| 950 |
+
|
| 951 |
+
bool isChartDecoding = true;
|
| 952 |
+
|
| 953 |
+
params = GetParam("search-algorithm");
|
| 954 |
+
if (params == NULL ||
|
| 955 |
+
(params->size() > 0
|
| 956 |
+
&& (Trim(params->at(0)) == "0" || Trim(params->at(0)) == "1")
|
| 957 |
+
)
|
| 958 |
+
) {
|
| 959 |
+
isChartDecoding = false;
|
| 960 |
+
}
|
| 961 |
+
|
| 962 |
+
vector<int> oovWeights;
|
| 963 |
+
params = GetParam("lmodel-oov-feature");
|
| 964 |
+
if (params) {
|
| 965 |
+
oovWeights = Scan<int>(*params);
|
| 966 |
+
}
|
| 967 |
+
|
| 968 |
+
PARAM_MAP::iterator iterMap;
|
| 969 |
+
|
| 970 |
+
iterMap = m_setting.find(oldWeightName);
|
| 971 |
+
if (iterMap != m_setting.end()) {
|
| 972 |
+
|
| 973 |
+
size_t currOldInd = 0;
|
| 974 |
+
const PARAM_VEC &weights = iterMap->second;
|
| 975 |
+
const PARAM_VEC &models = m_setting[oldFeatureName];
|
| 976 |
+
for (size_t lmIndex = 0; lmIndex < models.size(); ++lmIndex) {
|
| 977 |
+
const string &line = models[lmIndex];
|
| 978 |
+
vector<string> modelToks = Tokenize(line);
|
| 979 |
+
|
| 980 |
+
int lmType = Scan<int>(modelToks[0]);
|
| 981 |
+
|
| 982 |
+
string newFeatureName;
|
| 983 |
+
switch (lmType) {
|
| 984 |
+
case 0:
|
| 985 |
+
newFeatureName = "SRILM";
|
| 986 |
+
break;
|
| 987 |
+
case 1:
|
| 988 |
+
newFeatureName = "IRSTLM";
|
| 989 |
+
break;
|
| 990 |
+
case 8:
|
| 991 |
+
case 9:
|
| 992 |
+
newFeatureName = "KENLM";
|
| 993 |
+
break;
|
| 994 |
+
default:
|
| 995 |
+
UTIL_THROW2("Unkown language model type id:" << lmType);
|
| 996 |
+
}
|
| 997 |
+
|
| 998 |
+
size_t numFF = 1;
|
| 999 |
+
if (oovWeights.size() > lmIndex)
|
| 1000 |
+
numFF += oovWeights[lmIndex];
|
| 1001 |
+
|
| 1002 |
+
vector<float> weightsLM(numFF);
|
| 1003 |
+
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
| 1004 |
+
UTIL_THROW_IF2(currOldInd >= weights.size(),
|
| 1005 |
+
"Errors converting old LM weights to new weights");
|
| 1006 |
+
weightsLM[currFF] = Scan<float>(weights[currOldInd]);
|
| 1007 |
+
if (isChartDecoding) {
|
| 1008 |
+
weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]);
|
| 1009 |
+
}
|
| 1010 |
+
|
| 1011 |
+
++currOldInd;
|
| 1012 |
+
}
|
| 1013 |
+
|
| 1014 |
+
SetWeight(newFeatureName, lmIndex, weightsLM);
|
| 1015 |
+
|
| 1016 |
+
string featureLine = newFeatureName + " "
|
| 1017 |
+
+ "factor=" + modelToks[1] + " " // factor
|
| 1018 |
+
+ "order=" + modelToks[2] + " " // order
|
| 1019 |
+
+ "num-features=" + SPrint(numFF) + " ";
|
| 1020 |
+
if (lmType == 9) {
|
| 1021 |
+
featureLine += "load=lazy ";
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
if(oovWeights.size() > lmIndex)
|
| 1025 |
+
featureLine += "oov-feature=1 ";
|
| 1026 |
+
|
| 1027 |
+
featureLine += "path=" + modelToks[3]; // file
|
| 1028 |
+
|
| 1029 |
+
AddFeature(featureLine);
|
| 1030 |
+
} // for (size_t lmIndex = 0; lmIndex < models.size(); ++lmIndex) {
|
| 1031 |
+
|
| 1032 |
+
m_setting.erase(iterMap);
|
| 1033 |
+
}
|
| 1034 |
+
|
| 1035 |
+
m_setting.erase(oldFeatureName);
|
| 1036 |
+
}
|
| 1037 |
+
|
| 1038 |
+
void
|
| 1039 |
+
Parameter::
|
| 1040 |
+
ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName)
|
| 1041 |
+
{
|
| 1042 |
+
string oldFeatureName = "generation-file";
|
| 1043 |
+
|
| 1044 |
+
// distortion / lex distortion
|
| 1045 |
+
PARAM_VEC &oldWeights = m_setting[oldWeightName];
|
| 1046 |
+
|
| 1047 |
+
if (oldWeights.size() > 0) {
|
| 1048 |
+
size_t currOldInd = 0;
|
| 1049 |
+
PARAM_VEC &models = m_setting[oldFeatureName];
|
| 1050 |
+
|
| 1051 |
+
for (size_t indTable = 0; indTable < models.size(); ++indTable) {
|
| 1052 |
+
string &line = models[indTable];
|
| 1053 |
+
vector<string> modelToks = Tokenize(line);
|
| 1054 |
+
|
| 1055 |
+
size_t numFF = Scan<size_t>(modelToks[2]);
|
| 1056 |
+
|
| 1057 |
+
vector<float> weights(numFF);
|
| 1058 |
+
for (size_t currFF = 0; currFF < numFF; ++currFF) {
|
| 1059 |
+
UTIL_THROW_IF2(currOldInd >= oldWeights.size(),
|
| 1060 |
+
"Errors converting old generation weights to new weights");
|
| 1061 |
+
float weight = Scan<float>(oldWeights[currOldInd]);
|
| 1062 |
+
weights[currFF] = weight;
|
| 1063 |
+
|
| 1064 |
+
++currOldInd;
|
| 1065 |
+
}
|
| 1066 |
+
SetWeight(newWeightName, indTable, weights);
|
| 1067 |
+
|
| 1068 |
+
util::StringStream strme;
|
| 1069 |
+
strme << "Generation "
|
| 1070 |
+
<< "input-factor=" << modelToks[0] << " "
|
| 1071 |
+
<< "output-factor=" << modelToks[1] << " "
|
| 1072 |
+
<< "num-features=" << modelToks[2] << " "
|
| 1073 |
+
<< "path=" << modelToks[3];
|
| 1074 |
+
AddFeature(strme.str());
|
| 1075 |
+
}
|
| 1076 |
+
}
|
| 1077 |
+
|
| 1078 |
+
m_setting.erase(oldWeightName);
|
| 1079 |
+
m_setting.erase(oldFeatureName);
|
| 1080 |
+
}
|
| 1081 |
+
|
| 1082 |
+
void
|
| 1083 |
+
Parameter::
|
| 1084 |
+
ConvertWeightArgsWordPenalty()
|
| 1085 |
+
{
|
| 1086 |
+
const std::string oldWeightName = "weight-w";
|
| 1087 |
+
const std::string newWeightName = "WordPenalty";
|
| 1088 |
+
|
| 1089 |
+
bool isChartDecoding = true;
|
| 1090 |
+
const PARAM_VEC *searchAlgo = GetParam("search-algorithm");
|
| 1091 |
+
if (searchAlgo == NULL ||
|
| 1092 |
+
(searchAlgo->size() > 0
|
| 1093 |
+
&& (Trim(searchAlgo->at(0)) == "0" || Trim(searchAlgo->at(0)) == "1")
|
| 1094 |
+
)
|
| 1095 |
+
) {
|
| 1096 |
+
isChartDecoding = false;
|
| 1097 |
+
}
|
| 1098 |
+
|
| 1099 |
+
PARAM_MAP::iterator iterMap;
|
| 1100 |
+
|
| 1101 |
+
iterMap = m_setting.find(oldWeightName);
|
| 1102 |
+
if (iterMap != m_setting.end()) {
|
| 1103 |
+
const PARAM_VEC &weights = iterMap->second;
|
| 1104 |
+
for (size_t i = 0; i < weights.size(); ++i) {
|
| 1105 |
+
float weight = Scan<float>(weights[i]);
|
| 1106 |
+
if (isChartDecoding) {
|
| 1107 |
+
weight *= 0.434294482;
|
| 1108 |
+
}
|
| 1109 |
+
SetWeight(newWeightName, i, weight);
|
| 1110 |
+
}
|
| 1111 |
+
|
| 1112 |
+
m_setting.erase(iterMap);
|
| 1113 |
+
}
|
| 1114 |
+
|
| 1115 |
+
}
|
| 1116 |
+
|
| 1117 |
+
void
|
| 1118 |
+
Parameter::
|
| 1119 |
+
ConvertPhrasePenalty()
|
| 1120 |
+
{
|
| 1121 |
+
string oldWeightName = "weight-p";
|
| 1122 |
+
const PARAM_VEC *params = GetParam(oldWeightName);
|
| 1123 |
+
if (params) {
|
| 1124 |
+
UTIL_THROW_IF2(params->size() != 1,
|
| 1125 |
+
"There should be only 1 phrase-penalty weight");
|
| 1126 |
+
float weight = Scan<float>(params->at(0));
|
| 1127 |
+
AddFeature("PhrasePenalty");
|
| 1128 |
+
SetWeight("PhrasePenalty", 0, weight);
|
| 1129 |
+
|
| 1130 |
+
m_setting.erase(oldWeightName);
|
| 1131 |
+
}
|
| 1132 |
+
}
|
| 1133 |
+
|
| 1134 |
+
void
|
| 1135 |
+
Parameter::
|
| 1136 |
+
ConvertWeightArgs()
|
| 1137 |
+
{
|
| 1138 |
+
// can't handle discr LM. must do it manually 'cos of bigram/n-gram split
|
| 1139 |
+
UTIL_THROW_IF2( m_setting.count("weight-dlm") != 0,
|
| 1140 |
+
"Can't handle discr LM. must do it manually 'cos of bigram/n-gram split");
|
| 1141 |
+
|
| 1142 |
+
// check that old & new format aren't mixed
|
| 1143 |
+
if (m_setting.count("weight") &&
|
| 1144 |
+
(m_setting.count("weight-i") || m_setting.count("weight-t") || m_setting.count("weight-w") ||
|
| 1145 |
+
m_setting.count("weight-l") || m_setting.count("weight-u") || m_setting.count("weight-lex") ||
|
| 1146 |
+
m_setting.count("weight-generation") || m_setting.count("weight-lr") || m_setting.count("weight-d")
|
| 1147 |
+
)) {
|
| 1148 |
+
cerr << "Do not mix old and new format for specify weights";
|
| 1149 |
+
}
|
| 1150 |
+
|
| 1151 |
+
ConvertWeightArgsWordPenalty();
|
| 1152 |
+
ConvertWeightArgsLM();
|
| 1153 |
+
ConvertWeightArgsSingleWeight("weight-slm", "SyntacticLM");
|
| 1154 |
+
ConvertWeightArgsSingleWeight("weight-u", "UnknownWordPenalty");
|
| 1155 |
+
ConvertWeightArgsGeneration("weight-generation", "Generation");
|
| 1156 |
+
ConvertWeightArgsDistortion();
|
| 1157 |
+
|
| 1158 |
+
// don't know or can't be bothered converting these weights
|
| 1159 |
+
ConvertWeightArgsSingleWeight("weight-lr", "LexicalReordering");
|
| 1160 |
+
ConvertWeightArgsSingleWeight("weight-bl", "BleuScoreFeature");
|
| 1161 |
+
ConvertWeightArgsSingleWeight("weight-glm", "GlobalLexicalModel");
|
| 1162 |
+
ConvertWeightArgsSingleWeight("weight-wt", "WordTranslationFeature");
|
| 1163 |
+
ConvertWeightArgsSingleWeight("weight-pp", "PhrasePairFeature");
|
| 1164 |
+
ConvertWeightArgsSingleWeight("weight-pb", "PhraseBoundaryFeature");
|
| 1165 |
+
|
| 1166 |
+
ConvertWeightArgsSingleWeight("weight-e", "WordDeletion"); // TODO Can't find real name
|
| 1167 |
+
ConvertWeightArgsSingleWeight("weight-lex", "GlobalLexicalReordering"); // TODO Can't find real name
|
| 1168 |
+
|
| 1169 |
+
ConvertPhrasePenalty();
|
| 1170 |
+
|
| 1171 |
+
AddFeature("WordPenalty");
|
| 1172 |
+
AddFeature("UnknownWordPenalty");
|
| 1173 |
+
|
| 1174 |
+
ConvertWeightArgsPhraseModel("weight-t");
|
| 1175 |
+
|
| 1176 |
+
}
|
| 1177 |
+
|
| 1178 |
+
void
|
| 1179 |
+
Parameter::
|
| 1180 |
+
CreateWeightsMap()
|
| 1181 |
+
{
|
| 1182 |
+
CreateWeightsMap(m_setting["weight-add"]);
|
| 1183 |
+
CreateWeightsMap(m_setting["weight"]);
|
| 1184 |
+
}
|
| 1185 |
+
|
| 1186 |
+
void
|
| 1187 |
+
Parameter::
|
| 1188 |
+
CreateWeightsMap(const PARAM_VEC &vec)
|
| 1189 |
+
{
|
| 1190 |
+
for (size_t i = 0; i < vec.size(); ++i) {
|
| 1191 |
+
const string &line = vec[i];
|
| 1192 |
+
vector<string> toks = Tokenize(line);
|
| 1193 |
+
UTIL_THROW_IF2(toks.size() < 2,
|
| 1194 |
+
"Error in format of weights: " << line);
|
| 1195 |
+
|
| 1196 |
+
string name = toks[0];
|
| 1197 |
+
name = name.substr(0, name.size() - 1);
|
| 1198 |
+
|
| 1199 |
+
vector<float> weights(toks.size() - 1);
|
| 1200 |
+
for (size_t i = 1; i < toks.size(); ++i) {
|
| 1201 |
+
float weight = Scan<float>(toks[i]);
|
| 1202 |
+
weights[i - 1] = weight;
|
| 1203 |
+
}
|
| 1204 |
+
m_weights[name] = weights;
|
| 1205 |
+
}
|
| 1206 |
+
}
|
| 1207 |
+
|
| 1208 |
+
void
|
| 1209 |
+
Parameter::
|
| 1210 |
+
WeightOverwrite()
|
| 1211 |
+
{
|
| 1212 |
+
PARAM_VEC &vec = m_setting["weight-overwrite"];
|
| 1213 |
+
|
| 1214 |
+
if (vec.size() == 0)
|
| 1215 |
+
return;
|
| 1216 |
+
|
| 1217 |
+
// should only be on 1 line
|
| 1218 |
+
UTIL_THROW_IF2(vec.size() != 1,
|
| 1219 |
+
"weight-overwrite should only be on 1 line");
|
| 1220 |
+
|
| 1221 |
+
string name("");
|
| 1222 |
+
vector<float> weights;
|
| 1223 |
+
vector<string> toks = Tokenize(vec[0]);
|
| 1224 |
+
size_t cnt = 0;
|
| 1225 |
+
const std::vector<float>* oldWeights = NULL;
|
| 1226 |
+
for (size_t i = 0; i < toks.size(); ++i) {
|
| 1227 |
+
const string &tok = toks[i];
|
| 1228 |
+
|
| 1229 |
+
if (ends_with(tok, "=")) {
|
| 1230 |
+
// start of new feature
|
| 1231 |
+
|
| 1232 |
+
if (name != "") {
|
| 1233 |
+
// save previous ff
|
| 1234 |
+
m_weights[name] = weights;
|
| 1235 |
+
weights.clear();
|
| 1236 |
+
}
|
| 1237 |
+
|
| 1238 |
+
name = tok.substr(0, tok.size() - 1);
|
| 1239 |
+
std::map<std::string, std::vector<float> >::const_iterator found = m_weights.find(name);
|
| 1240 |
+
if (found!=m_weights.end()) {
|
| 1241 |
+
oldWeights = &(found->second);
|
| 1242 |
+
} else {
|
| 1243 |
+
oldWeights = NULL;
|
| 1244 |
+
}
|
| 1245 |
+
cnt = 0;
|
| 1246 |
+
} else {
|
| 1247 |
+
// a weight for curr ff
|
| 1248 |
+
if (toks[i] == "x") {
|
| 1249 |
+
UTIL_THROW_IF2(!oldWeights || cnt>=oldWeights->size(),
|
| 1250 |
+
"Keeping previous weight failed in weight-overwrite");
|
| 1251 |
+
weights.push_back(oldWeights->at(cnt));
|
| 1252 |
+
} else {
|
| 1253 |
+
float weight = Scan<float>(toks[i]);
|
| 1254 |
+
weights.push_back(weight);
|
| 1255 |
+
}
|
| 1256 |
+
++cnt;
|
| 1257 |
+
}
|
| 1258 |
+
}
|
| 1259 |
+
|
| 1260 |
+
if (name != "") {
|
| 1261 |
+
m_weights[name] = weights;
|
| 1262 |
+
}
|
| 1263 |
+
|
| 1264 |
+
}
|
| 1265 |
+
|
| 1266 |
+
/** check that parameter settings make sense */
|
| 1267 |
+
bool
|
| 1268 |
+
Parameter::
|
| 1269 |
+
Validate()
|
| 1270 |
+
{
|
| 1271 |
+
bool noErrorFlag = true;
|
| 1272 |
+
|
| 1273 |
+
PARAM_MAP::const_iterator iterParams;
|
| 1274 |
+
for (iterParams = m_setting.begin(); iterParams != m_setting.end(); ++iterParams) {
|
| 1275 |
+
const std::string &key = iterParams->first;
|
| 1276 |
+
|
| 1277 |
+
if (m_valid.find(key) == m_valid.end()) {
|
| 1278 |
+
std::cerr << "Unknown parameter " << key;
|
| 1279 |
+
noErrorFlag = false;
|
| 1280 |
+
}
|
| 1281 |
+
}
|
| 1282 |
+
|
| 1283 |
+
if (m_setting["lmodel-dub"].size() > 0) {
|
| 1284 |
+
if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) {
|
| 1285 |
+
std::cerr << "Config and parameters specify "
|
| 1286 |
+
<< static_cast<int>(m_setting["lmodel-file"].size())
|
| 1287 |
+
<< " language model files (lmodel-file), but "
|
| 1288 |
+
<< static_cast<int>(m_setting["lmodel-dub"].size())
|
| 1289 |
+
<< " LM upperbounds (lmodel-dub)"
|
| 1290 |
+
<< endl;
|
| 1291 |
+
noErrorFlag = false;
|
| 1292 |
+
}
|
| 1293 |
+
}
|
| 1294 |
+
|
| 1295 |
+
// do files exist?
|
| 1296 |
+
|
| 1297 |
+
// input file
|
| 1298 |
+
if (noErrorFlag && m_setting["input-file"].size() == 1) {
|
| 1299 |
+
noErrorFlag = FileExists(m_setting["input-file"][0]);
|
| 1300 |
+
if (!noErrorFlag) {
|
| 1301 |
+
std::cerr << endl << "Input file " << m_setting["input-file"][0] << " does not exist";
|
| 1302 |
+
}
|
| 1303 |
+
}
|
| 1304 |
+
// generation tables
|
| 1305 |
+
if (noErrorFlag) {
|
| 1306 |
+
std::vector<std::string> ext;
|
| 1307 |
+
//raw tables in either un compressed or compressed form
|
| 1308 |
+
ext.push_back("");
|
| 1309 |
+
ext.push_back(".gz");
|
| 1310 |
+
noErrorFlag = FilesExist("generation-file", 3, ext);
|
| 1311 |
+
}
|
| 1312 |
+
// distortion
|
| 1313 |
+
if (noErrorFlag) {
|
| 1314 |
+
std::vector<std::string> ext;
|
| 1315 |
+
//raw tables in either un compressed or compressed form
|
| 1316 |
+
ext.push_back("");
|
| 1317 |
+
ext.push_back(".gz");
|
| 1318 |
+
//prefix tree format
|
| 1319 |
+
ext.push_back(".binlexr.idx");
|
| 1320 |
+
//prefix tree format
|
| 1321 |
+
ext.push_back(".minlexr");
|
| 1322 |
+
noErrorFlag = FilesExist("distortion-file", 3, ext);
|
| 1323 |
+
}
|
| 1324 |
+
return noErrorFlag;
|
| 1325 |
+
}
|
| 1326 |
+
|
| 1327 |
+
/** check whether a file exists */
|
| 1328 |
+
bool
|
| 1329 |
+
Parameter::
|
| 1330 |
+
FilesExist(const string ¶mName, int fieldNo,
|
| 1331 |
+
std::vector<std::string> const& extensions)
|
| 1332 |
+
{
|
| 1333 |
+
typedef std::vector<std::string> StringVec;
|
| 1334 |
+
StringVec::const_iterator iter;
|
| 1335 |
+
|
| 1336 |
+
PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
|
| 1337 |
+
if (iterParam == m_setting.end()) {
|
| 1338 |
+
// no param. therefore nothing to check
|
| 1339 |
+
return true;
|
| 1340 |
+
}
|
| 1341 |
+
const StringVec &pathVec = (*iterParam).second;
|
| 1342 |
+
for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter) {
|
| 1343 |
+
StringVec vec = Tokenize(*iter);
|
| 1344 |
+
|
| 1345 |
+
size_t tokenizeIndex;
|
| 1346 |
+
if (fieldNo == -1)
|
| 1347 |
+
tokenizeIndex = vec.size() - 1;
|
| 1348 |
+
else
|
| 1349 |
+
tokenizeIndex = static_cast<size_t>(fieldNo);
|
| 1350 |
+
|
| 1351 |
+
if (tokenizeIndex >= vec.size()) {
|
| 1352 |
+
std::cerr << "Expected at least " << (tokenizeIndex+1) << " tokens per entry in '"
|
| 1353 |
+
<< paramName << "', but only found "
|
| 1354 |
+
<< vec.size();
|
| 1355 |
+
return false;
|
| 1356 |
+
}
|
| 1357 |
+
const string &pathStr = vec[tokenizeIndex];
|
| 1358 |
+
|
| 1359 |
+
bool fileFound=0;
|
| 1360 |
+
for(size_t i=0; i<extensions.size() && !fileFound; ++i) {
|
| 1361 |
+
fileFound|=FileExists(pathStr + extensions[i]);
|
| 1362 |
+
}
|
| 1363 |
+
if(!fileFound) {
|
| 1364 |
+
std::cerr << "File " << pathStr << " does not exist";
|
| 1365 |
+
return false;
|
| 1366 |
+
}
|
| 1367 |
+
}
|
| 1368 |
+
return true;
|
| 1369 |
+
}
|
| 1370 |
+
|
| 1371 |
+
/** look for a switch in arg, update parameter */
|
| 1372 |
+
// TODO arg parsing like this does not belong in the library, it belongs
|
| 1373 |
+
// in moses-cmd
|
| 1374 |
+
string
|
| 1375 |
+
Parameter::
|
| 1376 |
+
FindParam(const string ¶mSwitch, int argc, char const* argv[])
|
| 1377 |
+
{
|
| 1378 |
+
for (int i = 0 ; i < argc ; i++) {
|
| 1379 |
+
if (string(argv[i]) == paramSwitch) {
|
| 1380 |
+
if (i+1 < argc) {
|
| 1381 |
+
return argv[i+1];
|
| 1382 |
+
} else {
|
| 1383 |
+
std::cerr << "Option " << paramSwitch << " requires a parameter!";
|
| 1384 |
+
// TODO return some sort of error, not the empty string
|
| 1385 |
+
}
|
| 1386 |
+
}
|
| 1387 |
+
}
|
| 1388 |
+
return "";
|
| 1389 |
+
}
|
| 1390 |
+
|
| 1391 |
+
/** update parameter settings with command line switches
|
| 1392 |
+
* \param paramSwitch (potentially short) name of switch
|
| 1393 |
+
* \param paramName full name of parameter
|
| 1394 |
+
* \param argc number of arguments on command line
|
| 1395 |
+
* \param argv values of paramters on command line */
|
| 1396 |
+
void
|
| 1397 |
+
Parameter::
|
| 1398 |
+
OverwriteParam(const string ¶mSwitch, const string ¶mName,
|
| 1399 |
+
int argc, char const* argv[])
|
| 1400 |
+
{
|
| 1401 |
+
int startPos = -1;
|
| 1402 |
+
for (int i = 0 ; i < argc ; i++) {
|
| 1403 |
+
if (string(argv[i]) == paramSwitch) {
|
| 1404 |
+
startPos = i+1;
|
| 1405 |
+
break;
|
| 1406 |
+
}
|
| 1407 |
+
}
|
| 1408 |
+
if (startPos < 0)
|
| 1409 |
+
return;
|
| 1410 |
+
|
| 1411 |
+
int index = 0;
|
| 1412 |
+
m_setting[paramName]; // defines the parameter, important for boolean switches
|
| 1413 |
+
while (startPos < argc && (!isOption(argv[startPos]))) {
|
| 1414 |
+
if (m_setting[paramName].size() > (size_t)index)
|
| 1415 |
+
m_setting[paramName][index] = argv[startPos];
|
| 1416 |
+
else
|
| 1417 |
+
m_setting[paramName].push_back(argv[startPos]);
|
| 1418 |
+
index++;
|
| 1419 |
+
startPos++;
|
| 1420 |
+
}
|
| 1421 |
+
}
|
| 1422 |
+
|
| 1423 |
+
|
| 1424 |
+
/** read parameters from a configuration file */
|
| 1425 |
+
bool
|
| 1426 |
+
Parameter::
|
| 1427 |
+
ReadConfigFile(const string &filePath )
|
| 1428 |
+
{
|
| 1429 |
+
InputFileStream inFile(filePath);
|
| 1430 |
+
string line, paramName;
|
| 1431 |
+
while(getline(inFile, line)) {
|
| 1432 |
+
// comments
|
| 1433 |
+
size_t comPos = line.find_first_of("#");
|
| 1434 |
+
if (comPos != string::npos)
|
| 1435 |
+
line = line.substr(0, comPos);
|
| 1436 |
+
// trim leading and trailing spaces/tabs
|
| 1437 |
+
line = Trim(line);
|
| 1438 |
+
|
| 1439 |
+
if (line.size() == 0) {
|
| 1440 |
+
// blank line. do nothing.
|
| 1441 |
+
} else if (line[0]=='[') {
|
| 1442 |
+
// new parameter
|
| 1443 |
+
for (size_t currPos = 0 ; currPos < line.size() ; currPos++) {
|
| 1444 |
+
if (line[currPos] == ']') {
|
| 1445 |
+
paramName = line.substr(1, currPos - 1);
|
| 1446 |
+
break;
|
| 1447 |
+
}
|
| 1448 |
+
}
|
| 1449 |
+
} else {
|
| 1450 |
+
// add value to parameter
|
| 1451 |
+
m_setting[paramName].push_back(line);
|
| 1452 |
+
}
|
| 1453 |
+
}
|
| 1454 |
+
return true;
|
| 1455 |
+
}
|
| 1456 |
+
|
| 1457 |
+
struct Credit {
|
| 1458 |
+
string name, contact, currentPursuits, areaResponsibility;
|
| 1459 |
+
int sortId;
|
| 1460 |
+
|
| 1461 |
+
Credit(string name, string contact, string currentPursuits, string areaResponsibility) {
|
| 1462 |
+
this->name = name ;
|
| 1463 |
+
this->contact = contact ;
|
| 1464 |
+
this->currentPursuits = currentPursuits ;
|
| 1465 |
+
this->areaResponsibility = areaResponsibility;
|
| 1466 |
+
this->sortId = util::rand_excl(1000);
|
| 1467 |
+
}
|
| 1468 |
+
|
| 1469 |
+
bool operator<(const Credit &other) const {
|
| 1470 |
+
/*
|
| 1471 |
+
if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0)
|
| 1472 |
+
return true;
|
| 1473 |
+
if (areaResponsibility.size() == 0 && other.areaResponsibility.size() !=0)
|
| 1474 |
+
return false;
|
| 1475 |
+
|
| 1476 |
+
return name < other.name;
|
| 1477 |
+
*/
|
| 1478 |
+
return sortId < other.sortId;
|
| 1479 |
+
}
|
| 1480 |
+
|
| 1481 |
+
};
|
| 1482 |
+
|
| 1483 |
+
std::ostream& operator<<(std::ostream &os, const Credit &credit)
|
| 1484 |
+
{
|
| 1485 |
+
os << credit.name;
|
| 1486 |
+
if (credit.contact != "")
|
| 1487 |
+
os << "\t contact: " << credit.contact;
|
| 1488 |
+
if (credit.currentPursuits != "")
|
| 1489 |
+
os << " " << credit.currentPursuits;
|
| 1490 |
+
if (credit.areaResponsibility != "")
|
| 1491 |
+
os << " I'll answer question on: " << credit.areaResponsibility;
|
| 1492 |
+
return os;
|
| 1493 |
+
}
|
| 1494 |
+
|
| 1495 |
+
void
|
| 1496 |
+
Parameter::
|
| 1497 |
+
PrintCredit()
|
| 1498 |
+
{
|
| 1499 |
+
vector<Credit> everyone;
|
| 1500 |
+
srand ( time(NULL) );
|
| 1501 |
+
|
| 1502 |
+
everyone.push_back(Credit("Nicola Bertoldi"
|
| 1503 |
+
, "911"
|
| 1504 |
+
, ""
|
| 1505 |
+
, "scripts & other stuff"));
|
| 1506 |
+
everyone.push_back(Credit("Ondrej Bojar"
|
| 1507 |
+
, ""
|
| 1508 |
+
, "czech this out!"
|
| 1509 |
+
, ""));
|
| 1510 |
+
everyone.push_back(Credit("Chris Callison-Burch"
|
| 1511 |
+
, "anytime, anywhere"
|
| 1512 |
+
, "international playboy"
|
| 1513 |
+
, ""));
|
| 1514 |
+
everyone.push_back(Credit("Alexandra Constantin"
|
| 1515 |
+
, ""
|
| 1516 |
+
, "eu sunt varza"
|
| 1517 |
+
, ""));
|
| 1518 |
+
everyone.push_back(Credit("Brooke Cowan"
|
| 1519 |
+
, "brooke@csail.mit.edu"
|
| 1520 |
+
, "if you're going to san francisco, be sure to wear a flower in your hair"
|
| 1521 |
+
, ""));
|
| 1522 |
+
everyone.push_back(Credit("Chris Dyer"
|
| 1523 |
+
, "can't. i'll be out driving my mustang"
|
| 1524 |
+
, "driving my mustang"
|
| 1525 |
+
, ""));
|
| 1526 |
+
everyone.push_back(Credit("Marcello Federico"
|
| 1527 |
+
, "federico at itc at it"
|
| 1528 |
+
, "Researcher at ITC-irst, Trento, Italy"
|
| 1529 |
+
, "IRST language model"));
|
| 1530 |
+
everyone.push_back(Credit("Evan Herbst"
|
| 1531 |
+
, "Small college in upstate New York"
|
| 1532 |
+
, ""
|
| 1533 |
+
, ""));
|
| 1534 |
+
everyone.push_back(Credit("Philipp Koehn"
|
| 1535 |
+
, "only between 2 and 4am"
|
| 1536 |
+
, ""
|
| 1537 |
+
, "Nothing fazes this dude"));
|
| 1538 |
+
everyone.push_back(Credit("Christine Moran"
|
| 1539 |
+
, "weird building at MIT"
|
| 1540 |
+
, ""
|
| 1541 |
+
, ""));
|
| 1542 |
+
everyone.push_back(Credit("Wade Shen"
|
| 1543 |
+
, "via morse code"
|
| 1544 |
+
, "buying another laptop"
|
| 1545 |
+
, ""));
|
| 1546 |
+
everyone.push_back(Credit("Richard Zens"
|
| 1547 |
+
, "richard at aachen dot de"
|
| 1548 |
+
, ""
|
| 1549 |
+
, "ambiguous source input, confusion networks, confusing source code"));
|
| 1550 |
+
everyone.push_back(Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/"
|
| 1551 |
+
, "phd student at Edinburgh Uni. Original Moses developer"
|
| 1552 |
+
, "general queries/ flames on Moses."));
|
| 1553 |
+
|
| 1554 |
+
sort(everyone.begin(), everyone.end());
|
| 1555 |
+
|
| 1556 |
+
|
| 1557 |
+
cerr << "Moses - A beam search decoder for phrase-based statistical machine translation models" << endl
|
| 1558 |
+
<< "Copyright (C) 2006 University of Edinburgh" << endl << endl
|
| 1559 |
+
|
| 1560 |
+
<< "This library is free software; you can redistribute it and/or" << endl
|
| 1561 |
+
<< "modify it under the terms of the GNU Lesser General Public" << endl
|
| 1562 |
+
<< "License as published by the Free Software Foundation; either" << endl
|
| 1563 |
+
<< "version 2.1 of the License, or (at your option) any later version." << endl << endl
|
| 1564 |
+
|
| 1565 |
+
<< "This library is distributed in the hope that it will be useful," << endl
|
| 1566 |
+
<< "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
|
| 1567 |
+
<< "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU" << endl
|
| 1568 |
+
<< "Lesser General Public License for more details." << endl << endl
|
| 1569 |
+
|
| 1570 |
+
<< "You should have received a copy of the GNU Lesser General Public" << endl
|
| 1571 |
+
<< "License along with this library; if not, write to the Free Software" << endl
|
| 1572 |
+
<< "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA" << endl << endl
|
| 1573 |
+
<< "***********************************************************************" << endl << endl
|
| 1574 |
+
<< "Built on " << __DATE__ << " at " __TIME__ << endl << endl
|
| 1575 |
+
<< "WHO'S FAULT IS THIS GODDAM SOFTWARE:" << endl;
|
| 1576 |
+
|
| 1577 |
+
ostream_iterator<Credit> out(cerr, "\n");
|
| 1578 |
+
copy(everyone.begin(), everyone.end(), out);
|
| 1579 |
+
cerr << endl << endl;
|
| 1580 |
+
}
|
| 1581 |
+
|
| 1582 |
+
/** update parameter settings with command line switches
|
| 1583 |
+
* \param paramName full name of parameter
|
| 1584 |
+
* \param values inew values for paramName */
|
| 1585 |
+
void
|
| 1586 |
+
Parameter::
|
| 1587 |
+
OverwriteParam(const string ¶mName, PARAM_VEC values)
|
| 1588 |
+
{
|
| 1589 |
+
VERBOSE(2,"Overwriting parameter " << paramName);
|
| 1590 |
+
|
| 1591 |
+
m_setting[paramName]; // defines the parameter, important for boolean switches
|
| 1592 |
+
if (m_setting[paramName].size() > 1) {
|
| 1593 |
+
VERBOSE(2," (the parameter had " << m_setting[paramName].size() << " previous values)");
|
| 1594 |
+
UTIL_THROW_IF2(m_setting[paramName].size() != values.size(),
|
| 1595 |
+
"Number of weight override for " << paramName
|
| 1596 |
+
<< " is not the same as the original number of weights");
|
| 1597 |
+
} else {
|
| 1598 |
+
VERBOSE(2," (the parameter does not have previous values)");
|
| 1599 |
+
m_setting[paramName].resize(values.size());
|
| 1600 |
+
}
|
| 1601 |
+
VERBOSE(2," with the following values:");
|
| 1602 |
+
int i=0;
|
| 1603 |
+
for (PARAM_VEC::iterator iter = values.begin(); iter != values.end() ; iter++, i++) {
|
| 1604 |
+
m_setting[paramName][i] = *iter;
|
| 1605 |
+
VERBOSE(2, " " << *iter);
|
| 1606 |
+
}
|
| 1607 |
+
VERBOSE(2, std::endl);
|
| 1608 |
+
}
|
| 1609 |
+
|
| 1610 |
+
void
|
| 1611 |
+
Parameter::
|
| 1612 |
+
PrintFF() const
|
| 1613 |
+
{
|
| 1614 |
+
StaticData::Instance().GetFeatureRegistry().PrintFF();
|
| 1615 |
+
}
|
| 1616 |
+
|
| 1617 |
+
std::set<std::string>
|
| 1618 |
+
Parameter::
|
| 1619 |
+
GetWeightNames() const
|
| 1620 |
+
{
|
| 1621 |
+
std::set<std::string> ret;
|
| 1622 |
+
std::map<std::string, std::vector<float> >::const_iterator iter;
|
| 1623 |
+
for (iter = m_weights.begin(); iter != m_weights.end(); ++iter) {
|
| 1624 |
+
const string &key = iter->first;
|
| 1625 |
+
ret.insert(key);
|
| 1626 |
+
}
|
| 1627 |
+
return ret;
|
| 1628 |
+
}
|
| 1629 |
+
|
| 1630 |
+
void
|
| 1631 |
+
Parameter::
|
| 1632 |
+
Save(const std::string path)
|
| 1633 |
+
{
|
| 1634 |
+
ofstream file;
|
| 1635 |
+
file.open(path.c_str());
|
| 1636 |
+
|
| 1637 |
+
PARAM_MAP::const_iterator iterOuter;
|
| 1638 |
+
for (iterOuter = m_setting.begin(); iterOuter != m_setting.end(); ++iterOuter) {
|
| 1639 |
+
const std::string §ionName = iterOuter->first;
|
| 1640 |
+
file << "[" << sectionName << "]" << endl;
|
| 1641 |
+
|
| 1642 |
+
const PARAM_VEC &values = iterOuter->second;
|
| 1643 |
+
|
| 1644 |
+
PARAM_VEC::const_iterator iterInner;
|
| 1645 |
+
for (iterInner = values.begin(); iterInner != values.end(); ++iterInner) {
|
| 1646 |
+
const std::string &value = *iterInner;
|
| 1647 |
+
file << value << endl;
|
| 1648 |
+
}
|
| 1649 |
+
|
| 1650 |
+
file << endl;
|
| 1651 |
+
}
|
| 1652 |
+
|
| 1653 |
+
|
| 1654 |
+
file.close();
|
| 1655 |
+
}
|
| 1656 |
+
|
| 1657 |
+
template<>
|
| 1658 |
+
void
|
| 1659 |
+
Parameter::
|
| 1660 |
+
SetParameter<bool>(bool ¶meter, std::string const& parameterName,
|
| 1661 |
+
bool const& defaultValue) const
|
| 1662 |
+
{
|
| 1663 |
+
const PARAM_VEC *params = GetParam(parameterName);
|
| 1664 |
+
|
| 1665 |
+
// default value if nothing is specified
|
| 1666 |
+
parameter = defaultValue;
|
| 1667 |
+
if (params == NULL) {
|
| 1668 |
+
return;
|
| 1669 |
+
}
|
| 1670 |
+
|
| 1671 |
+
// if parameter is just specified as, e.g. "-parameter" set it true
|
| 1672 |
+
if (params->size() == 0) {
|
| 1673 |
+
parameter = true;
|
| 1674 |
+
}
|
| 1675 |
+
// if paramter is specified "-parameter true" or "-parameter false"
|
| 1676 |
+
else if (params->size() == 1) {
|
| 1677 |
+
parameter = Scan<bool>( params->at(0));
|
| 1678 |
+
}
|
| 1679 |
+
}
|
| 1680 |
+
|
| 1681 |
+
void
|
| 1682 |
+
Parameter::
|
| 1683 |
+
SetParameter(bool& var, std::string const& name)
|
| 1684 |
+
{
|
| 1685 |
+
SetParameter(var,name,false);
|
| 1686 |
+
}
|
| 1687 |
+
|
| 1688 |
+
} // namespace
|
| 1689 |
+
|
| 1690 |
+
|
mosesdecoder/moses/Parameter.h
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_Parameter_h
|
| 23 |
+
#define moses_Parameter_h
|
| 24 |
+
|
| 25 |
+
#include <string>
|
| 26 |
+
#include <set>
|
| 27 |
+
#include <map>
|
| 28 |
+
#include <vector>
|
| 29 |
+
#include "TypeDef.h"
|
| 30 |
+
#include "Util.h"
|
| 31 |
+
#include <boost/program_options.hpp>
|
| 32 |
+
|
| 33 |
+
namespace Moses
|
| 34 |
+
{
|
| 35 |
+
typedef std::vector<std::string> PARAM_VEC;
|
| 36 |
+
typedef std::map<std::string, PARAM_VEC > PARAM_MAP;
|
| 37 |
+
typedef std::map<std::string, bool> PARAM_BOOL;
|
| 38 |
+
typedef std::map<std::string, std::string > PARAM_STRING;
|
| 39 |
+
|
| 40 |
+
/** Handles parameter values set in config file or on command line.
|
| 41 |
+
* Process raw parameter data (names and values as strings) for StaticData
|
| 42 |
+
* to parse; to get useful values, see StaticData.
|
| 43 |
+
*/
|
| 44 |
+
class Parameter
|
| 45 |
+
{
|
| 46 |
+
typedef boost::program_options::options_description options_description;
|
| 47 |
+
typedef boost::program_options::value_semantic value_semantic;
|
| 48 |
+
protected:
|
| 49 |
+
PARAM_MAP m_setting;
|
| 50 |
+
PARAM_BOOL m_valid;
|
| 51 |
+
PARAM_STRING m_abbreviation;
|
| 52 |
+
PARAM_STRING m_description;
|
| 53 |
+
PARAM_STRING m_fullname;
|
| 54 |
+
// std::map<char,std::set<std::string> > m_confusable;
|
| 55 |
+
// stores long parameter names that start with a letter that is also a short option.
|
| 56 |
+
options_description m_options;
|
| 57 |
+
|
| 58 |
+
std::map<std::string, std::vector<float> > m_weights;
|
| 59 |
+
|
| 60 |
+
std::string FindParam(const std::string ¶mSwitch, int argc, char const* argv[]);
|
| 61 |
+
void OverwriteParam(const std::string ¶mSwitch, const std::string ¶mName,
|
| 62 |
+
int argc, char const* argv[]);
|
| 63 |
+
bool ReadConfigFile(const std::string &filePath );
|
| 64 |
+
bool FilesExist(const std::string ¶mName, int fieldNo, std::vector<std::string> const& fileExtension=std::vector<std::string>(1,""));
|
| 65 |
+
bool isOption(const char* token);
|
| 66 |
+
bool Validate();
|
| 67 |
+
|
| 68 |
+
void
|
| 69 |
+
AddParam(options_description& optgroup,
|
| 70 |
+
value_semantic const* optvalue,
|
| 71 |
+
std::string const& paramName,
|
| 72 |
+
std::string const& description);
|
| 73 |
+
|
| 74 |
+
void
|
| 75 |
+
AddParam(options_description& optgroup,
|
| 76 |
+
std::string const ¶mName,
|
| 77 |
+
std::string const &description);
|
| 78 |
+
|
| 79 |
+
void
|
| 80 |
+
AddParam(options_description& optgroup,
|
| 81 |
+
value_semantic const* optvalue,
|
| 82 |
+
std::string const& paramName,
|
| 83 |
+
std::string const& abbrevName,
|
| 84 |
+
std::string const& description);
|
| 85 |
+
|
| 86 |
+
void
|
| 87 |
+
AddParam(options_description& optgroup,
|
| 88 |
+
std::string const& paramName,
|
| 89 |
+
std::string const& abbrevName,
|
| 90 |
+
std::string const& description);
|
| 91 |
+
|
| 92 |
+
void PrintCredit();
|
| 93 |
+
void PrintFF() const;
|
| 94 |
+
|
| 95 |
+
void SetWeight(const std::string &name, size_t ind, float weight);
|
| 96 |
+
void SetWeight(const std::string &name, size_t ind, const std::vector<float> &weights);
|
| 97 |
+
void AddWeight(const std::string &name, size_t ind, const std::vector<float> &weights);
|
| 98 |
+
void ConvertWeightArgs();
|
| 99 |
+
void ConvertWeightArgsSingleWeight(const std::string &oldWeightName, const std::string &newWeightName);
|
| 100 |
+
void ConvertWeightArgsPhraseModel(const std::string &oldWeightName);
|
| 101 |
+
void ConvertWeightArgsLM();
|
| 102 |
+
void ConvertWeightArgsDistortion();
|
| 103 |
+
void ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName);
|
| 104 |
+
void ConvertWeightArgsPhrasePenalty();
|
| 105 |
+
void ConvertWeightArgsWordPenalty();
|
| 106 |
+
void ConvertPhrasePenalty();
|
| 107 |
+
void CreateWeightsMap();
|
| 108 |
+
void CreateWeightsMap(const PARAM_VEC &vec);
|
| 109 |
+
void WeightOverwrite();
|
| 110 |
+
void AddFeature(const std::string &line);
|
| 111 |
+
void AddFeaturesCmd();
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
public:
|
| 115 |
+
Parameter();
|
| 116 |
+
~Parameter();
|
| 117 |
+
bool LoadParam(int argc, char const* argv[]);
|
| 118 |
+
bool LoadParam(const std::string &filePath);
|
| 119 |
+
void Explain();
|
| 120 |
+
|
| 121 |
+
/** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
|
| 122 |
+
const PARAM_VEC *GetParam(const std::string ¶mName) const;
|
| 123 |
+
|
| 124 |
+
/** check if parameter is defined (either in moses.ini or as switch) */
|
| 125 |
+
bool isParamSpecified(const std::string ¶mName) const {
|
| 126 |
+
return m_setting.find( paramName ) != m_setting.end();
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
void OverwriteParam(const std::string ¶mName, PARAM_VEC values);
|
| 130 |
+
|
| 131 |
+
std::vector<float> GetWeights(const std::string &name);
|
| 132 |
+
const std::map<std::string, std::vector<float> > &GetAllWeights() const {
|
| 133 |
+
return m_weights;
|
| 134 |
+
}
|
| 135 |
+
std::set<std::string> GetWeightNames() const;
|
| 136 |
+
|
| 137 |
+
const PARAM_MAP &GetParams() const {
|
| 138 |
+
return m_setting;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
void Save(const std::string path);
|
| 142 |
+
|
| 143 |
+
template<typename T>
|
| 144 |
+
void SetParameter(T &var, const std::string &name, const T &defaultValue) const {
|
| 145 |
+
const PARAM_VEC *params = GetParam(name);
|
| 146 |
+
if (params && params->size()) {
|
| 147 |
+
var = Scan<T>( params->at(0));
|
| 148 |
+
} else {
|
| 149 |
+
var = defaultValue;
|
| 150 |
+
}
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
void SetParameter(bool& var, std::string const& name);
|
| 154 |
+
|
| 155 |
+
bool SetBooleanSwitch(bool& val, std::string const name) {
|
| 156 |
+
// issues a warning if format is wrong
|
| 157 |
+
const PARAM_VEC *params = GetParam(name);
|
| 158 |
+
val = (params && params->size());
|
| 159 |
+
if (val && params->size() != 1) {
|
| 160 |
+
TRACE_ERR("ERROR: wrong format for switch -" << name);
|
| 161 |
+
return false;
|
| 162 |
+
}
|
| 163 |
+
return true;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
};
|
| 167 |
+
|
| 168 |
+
template<>
|
| 169 |
+
void Parameter::SetParameter<bool>(bool &var, const std::string &name, const bool &defaultValue) const;
|
| 170 |
+
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
#endif
|
mosesdecoder/moses/Phrase.h
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2006 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#ifndef moses_Phrase_h
|
| 24 |
+
#define moses_Phrase_h
|
| 25 |
+
|
| 26 |
+
#include <iostream>
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include <list>
|
| 29 |
+
#include <string>
|
| 30 |
+
|
| 31 |
+
#include <boost/functional/hash.hpp>
|
| 32 |
+
|
| 33 |
+
#include "Word.h"
|
| 34 |
+
#include "Util.h"
|
| 35 |
+
|
| 36 |
+
#include "util/string_piece.hh"
|
| 37 |
+
#include "util/exception.hh"
|
| 38 |
+
#include "parameters/AllOptions.h"
|
| 39 |
+
|
| 40 |
+
namespace Moses
|
| 41 |
+
{
|
| 42 |
+
class FactorMask;
|
| 43 |
+
class Range;
|
| 44 |
+
class ContextScope;
|
| 45 |
+
|
| 46 |
+
/** Representation of a phrase, ie. a contiguous number of words.
|
| 47 |
+
* Wrapper for vector of words
|
| 48 |
+
*/
|
| 49 |
+
class Phrase
|
| 50 |
+
{
|
| 51 |
+
friend std::ostream& operator<<(std::ostream&, const Phrase&);
|
| 52 |
+
// private:
|
| 53 |
+
protected:
|
| 54 |
+
std::vector<Word> m_words;
|
| 55 |
+
|
| 56 |
+
public:
|
| 57 |
+
|
| 58 |
+
virtual bool HasScope() const {
|
| 59 |
+
return false;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
virtual SPTR<ContextScope> GetScope() const {
|
| 63 |
+
return SPTR<ContextScope>();
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
/** No longer does anything as not using mem pool for Phrase class anymore */
|
| 68 |
+
static void InitializeMemPool();
|
| 69 |
+
static void FinalizeMemPool();
|
| 70 |
+
|
| 71 |
+
/** create empty phrase
|
| 72 |
+
*/
|
| 73 |
+
Phrase();
|
| 74 |
+
explicit Phrase(size_t reserveSize);
|
| 75 |
+
/** create phrase from vectors of words */
|
| 76 |
+
explicit Phrase(const std::vector< const Word* > &mergeWords);
|
| 77 |
+
|
| 78 |
+
/* This isn't a swap function because classes inherit from Phrase and might
|
| 79 |
+
* not override swap, which would be bad.
|
| 80 |
+
*/
|
| 81 |
+
void SwapWords(Phrase &other) {
|
| 82 |
+
swap(m_words, other.m_words);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/** destructor */
|
| 86 |
+
virtual ~Phrase();
|
| 87 |
+
|
| 88 |
+
/**
|
| 89 |
+
* Fills phrase with words from format string, typically from phrase table or sentence input
|
| 90 |
+
*
|
| 91 |
+
* \param factorOrder factor types of each element in 2D string vector
|
| 92 |
+
* \param phraseString formatted input string to parse
|
| 93 |
+
* \param lhs returns the non-terminal Word for the left-hand side of an SCFG rule, may be NULL for phrase-based
|
| 94 |
+
*/
|
| 95 |
+
void CreateFromString(FactorDirection direction,
|
| 96 |
+
const std::vector<FactorType> &factorOrder,
|
| 97 |
+
const StringPiece &phraseString,
|
| 98 |
+
Word **lhs);
|
| 99 |
+
|
| 100 |
+
/** copy factors from the other phrase to this phrase.
|
| 101 |
+
IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten
|
| 102 |
+
*/
|
| 103 |
+
void MergeFactors(const Phrase ©);
|
| 104 |
+
//! copy a single factor (specified by factorType)
|
| 105 |
+
void MergeFactors(const Phrase ©, FactorType factorType);
|
| 106 |
+
//! copy all factors specified in factorVec and none others
|
| 107 |
+
void MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec);
|
| 108 |
+
|
| 109 |
+
/** compare 2 phrases to ensure no factors are lost if the phrases are merged
|
| 110 |
+
* must run IsCompatible() to ensure incompatible factors aren't being overwritten
|
| 111 |
+
*/
|
| 112 |
+
bool IsCompatible(const Phrase &inputPhrase) const;
|
| 113 |
+
bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
|
| 114 |
+
bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
|
| 115 |
+
|
| 116 |
+
//! number of words
|
| 117 |
+
inline size_t GetSize() const {
|
| 118 |
+
return m_words.size();
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
//! word at a particular position
|
| 122 |
+
inline const Word &GetWord(size_t pos) const {
|
| 123 |
+
return m_words[pos];
|
| 124 |
+
}
|
| 125 |
+
inline Word &GetWord(size_t pos) {
|
| 126 |
+
return m_words[pos];
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
inline Word &Front() {
|
| 130 |
+
return m_words[0];
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
inline Word &Back() {
|
| 134 |
+
return m_words[GetSize() - 1];
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
inline const Word &Front() const {
|
| 138 |
+
return m_words[0];
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
inline const Word &Back() const {
|
| 142 |
+
return m_words[GetSize() - 1];
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
//! particular factor at a particular position
|
| 146 |
+
inline const Factor *GetFactor(size_t pos, FactorType factorType) const {
|
| 147 |
+
const Word &ptr = m_words[pos];
|
| 148 |
+
return ptr[factorType];
|
| 149 |
+
}
|
| 150 |
+
inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor) {
|
| 151 |
+
Word &ptr = m_words[pos];
|
| 152 |
+
ptr[factorType] = factor;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
size_t GetNumTerminals() const;
|
| 156 |
+
size_t GetNumNonTerminals() const {
|
| 157 |
+
return GetSize() - GetNumTerminals();
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
//! whether the 2D vector is a substring of this phrase
|
| 161 |
+
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
|
| 162 |
+
, const std::vector<FactorType> &inputFactor) const;
|
| 163 |
+
|
| 164 |
+
size_t Find(const Phrase &sought, int maxUnknown) const;
|
| 165 |
+
|
| 166 |
+
//! create an empty word at the end of the phrase
|
| 167 |
+
Word &AddWord();
|
| 168 |
+
//! create copy of input word at the end of the phrase
|
| 169 |
+
void AddWord(const Word &newWord) {
|
| 170 |
+
AddWord() = newWord;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
/** appends a phrase at the end of current phrase **/
|
| 174 |
+
void Append(const Phrase &endPhrase);
|
| 175 |
+
void PrependWord(const Word &newWord);
|
| 176 |
+
|
| 177 |
+
void Clear() {
|
| 178 |
+
m_words.clear();
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
void RemoveWord(size_t pos) {
|
| 182 |
+
UTIL_THROW_IF2(pos >= m_words.size(),
|
| 183 |
+
"Referencing position " << pos << " out of bound");
|
| 184 |
+
m_words.erase(m_words.begin() + pos);
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
void InitStartEndWord();
|
| 188 |
+
|
| 189 |
+
//! create new phrase class that is a substring of this phrase
|
| 190 |
+
Phrase GetSubString(const Range &range) const;
|
| 191 |
+
Phrase GetSubString(const Range &range, FactorType factorType) const;
|
| 192 |
+
|
| 193 |
+
//! return a string rep of the phrase;
|
| 194 |
+
// w/ factors delimited by FactorDelimiter
|
| 195 |
+
std::string
|
| 196 |
+
GetStringRep(std::vector<FactorType> const& factorsToPrint,
|
| 197 |
+
AllOptions const* opts=NULL) const;
|
| 198 |
+
|
| 199 |
+
TO_STRING();
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
int Compare(const Phrase &other) const;
|
| 203 |
+
|
| 204 |
+
/** transitive comparison between 2 phrases
|
| 205 |
+
* used to insert & find phrase in dictionary
|
| 206 |
+
*/
|
| 207 |
+
bool operator< (const Phrase &compare) const {
|
| 208 |
+
return Compare(compare) < 0;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
size_t hash() const;
|
| 212 |
+
|
| 213 |
+
bool operator==(const Phrase &compare) const;
|
| 214 |
+
bool operator!=(const Phrase &compare) const {
|
| 215 |
+
return ! (*this == compare);
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
void OnlyTheseFactors(const FactorMask &factors);
|
| 219 |
+
|
| 220 |
+
};
|
| 221 |
+
|
| 222 |
+
inline size_t hash_value(const Phrase& phrase)
|
| 223 |
+
{
|
| 224 |
+
return phrase.hash();
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
struct PhrasePtrComparator {
|
| 228 |
+
inline bool operator()(const Phrase* lhs, const Phrase* rhs) const {
|
| 229 |
+
return *lhs == *rhs;
|
| 230 |
+
}
|
| 231 |
+
};
|
| 232 |
+
|
| 233 |
+
struct PhrasePtrHasher {
|
| 234 |
+
inline size_t operator()(const Phrase* phrase) const {
|
| 235 |
+
size_t seed = 0;
|
| 236 |
+
boost::hash_combine(seed,*phrase);
|
| 237 |
+
return seed;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
};
|
| 241 |
+
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
#endif
|
mosesdecoder/moses/PrefixTree.h
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/* ---------------------------------------------------------------- */
|
| 4 |
+
/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
|
| 5 |
+
/* Richard Zens */
|
| 6 |
+
/* ---------------------------------------------------------------- */
|
| 7 |
+
#ifndef moses_PrefixTree_h
|
| 8 |
+
#define moses_PrefixTree_h
|
| 9 |
+
|
| 10 |
+
#include <vector>
|
| 11 |
+
#include <algorithm>
|
| 12 |
+
#include <deque>
|
| 13 |
+
#include "Util.h"
|
| 14 |
+
#include "FilePtr.h"
|
| 15 |
+
#include "File.h"
|
| 16 |
+
|
| 17 |
+
namespace Moses
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
/** @todo How is this used in the pb binary phrase table?
|
| 21 |
+
*/
|
| 22 |
+
template<typename T,typename D>
|
| 23 |
+
class PrefixTreeSA
|
| 24 |
+
{
|
| 25 |
+
public:
|
| 26 |
+
typedef T Key;
|
| 27 |
+
typedef D Data;
|
| 28 |
+
|
| 29 |
+
typedef PrefixTreeSA<T,D> Self;
|
| 30 |
+
typedef std::vector<T> VT;
|
| 31 |
+
typedef std::vector<Self*> VP;
|
| 32 |
+
typedef std::vector<D> VD;
|
| 33 |
+
|
| 34 |
+
VT keys;
|
| 35 |
+
VP ptr;
|
| 36 |
+
VD data;
|
| 37 |
+
|
| 38 |
+
static Data def;
|
| 39 |
+
|
| 40 |
+
public:
|
| 41 |
+
PrefixTreeSA() {}
|
| 42 |
+
|
| 43 |
+
~PrefixTreeSA() {
|
| 44 |
+
for(size_t i=0; i<ptr.size(); ++i) delete ptr[i];
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
static const Data& getDefault() {
|
| 48 |
+
return def;
|
| 49 |
+
}
|
| 50 |
+
static void setDefault(const Data& x) {
|
| 51 |
+
def=x;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
// insert sequence
|
| 56 |
+
template<typename fwiter> Data& insert(fwiter b,fwiter e) {
|
| 57 |
+
typename VT::iterator i=std::lower_bound(keys.begin(),keys.end(),*b);
|
| 58 |
+
typename VT::iterator kb=keys.begin();
|
| 59 |
+
size_t pos=std::distance(kb,i);
|
| 60 |
+
|
| 61 |
+
if(i==keys.end() || *i!=*b) {
|
| 62 |
+
keys.insert(i,*b);
|
| 63 |
+
data.insert(data.begin()+pos,def);
|
| 64 |
+
|
| 65 |
+
Self *self = NULL;
|
| 66 |
+
ptr.insert(ptr.begin()+pos, self);
|
| 67 |
+
}
|
| 68 |
+
if(++b!=e) {
|
| 69 |
+
if(!ptr[pos]) ptr[pos]=new Self;
|
| 70 |
+
return ptr[pos]->insert(b,e);
|
| 71 |
+
} else return data[pos];
|
| 72 |
+
}
|
| 73 |
+
// insert container
|
| 74 |
+
template<typename cont> Data& insert(const cont& c) {
|
| 75 |
+
return insert(c.begin(),c.end());
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
size_t size() const {
|
| 79 |
+
return keys.size();
|
| 80 |
+
}
|
| 81 |
+
const Key& getKey(size_t i) const {
|
| 82 |
+
return keys[i];
|
| 83 |
+
}
|
| 84 |
+
const Data& getData(size_t i) const {
|
| 85 |
+
return data[i];
|
| 86 |
+
}
|
| 87 |
+
const Self* getPtr(size_t i) const {
|
| 88 |
+
return ptr[i];
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
size_t findKey(const Key& k) const {
|
| 92 |
+
typename VT::const_iterator i=std::lower_bound(keys.begin(),keys.end(),k);
|
| 93 |
+
if(i==keys.end() || *i!=k) return keys.size();
|
| 94 |
+
return std::distance(keys.begin(),i);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// find sequence
|
| 98 |
+
template<typename fwiter> const Data* findPtr(fwiter b,fwiter e) const {
|
| 99 |
+
size_t pos=findKey(*b);
|
| 100 |
+
if(pos==keys.size()) return 0;
|
| 101 |
+
if(++b==e) return &data[pos];
|
| 102 |
+
if(ptr[pos]) return ptr[pos]->findPtr(b,e);
|
| 103 |
+
else return 0;
|
| 104 |
+
}
|
| 105 |
+
// find container
|
| 106 |
+
template<typename cont> const Data* findPtr(const cont& c) const {
|
| 107 |
+
return findPtr(c.begin(),c.end());
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
// find sequence
|
| 112 |
+
template<typename fwiter> const Data& find(fwiter b,fwiter e) const {
|
| 113 |
+
if(const Data* p=findPtr(b,e)) return *p;
|
| 114 |
+
else return def;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
// find container
|
| 118 |
+
template<typename cont> const Data& find(const cont& c) const {
|
| 119 |
+
return find(c.begin(),c.end());
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
void shrink() {
|
| 123 |
+
ShrinkToFit(keys);
|
| 124 |
+
ShrinkToFit(ptr);
|
| 125 |
+
ShrinkToFit(data);
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
};
|
| 129 |
+
template<typename T,typename D> D PrefixTreeSA<T,D>::def;
|
| 130 |
+
|
| 131 |
+
/////////////////////////////////////////////////////////////////////////////
|
| 132 |
+
|
| 133 |
+
/** @todo How is this used in the pb binary phrase table?
|
| 134 |
+
*/
|
| 135 |
+
template<typename T,typename D>
|
| 136 |
+
class PrefixTreeF
|
| 137 |
+
{
|
| 138 |
+
public:
|
| 139 |
+
typedef T Key;
|
| 140 |
+
typedef D Data;
|
| 141 |
+
private:
|
| 142 |
+
typedef PrefixTreeF<Key,Data> Self;
|
| 143 |
+
public:
|
| 144 |
+
typedef FilePtr<Self> Ptr;
|
| 145 |
+
private:
|
| 146 |
+
typedef std::vector<Key> VK;
|
| 147 |
+
typedef std::vector<Data> VD;
|
| 148 |
+
typedef std::vector<Ptr> VP;
|
| 149 |
+
|
| 150 |
+
VK keys;
|
| 151 |
+
VD data;
|
| 152 |
+
VP ptr;
|
| 153 |
+
|
| 154 |
+
static Data def;
|
| 155 |
+
|
| 156 |
+
OFF_T startPos;
|
| 157 |
+
FILE* f;
|
| 158 |
+
public:
|
| 159 |
+
|
| 160 |
+
PrefixTreeF(FILE* f_=0) : f(f_) {
|
| 161 |
+
if(f) read();
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
~PrefixTreeF() {
|
| 165 |
+
free();
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
void read() {
|
| 169 |
+
startPos=fTell(f);
|
| 170 |
+
fReadVector(f,keys);
|
| 171 |
+
fReadVector(f,data);
|
| 172 |
+
ptr.clear();
|
| 173 |
+
ptr.resize(keys.size());
|
| 174 |
+
std::vector<OFF_T> rawOffs(keys.size());
|
| 175 |
+
size_t bytes_read = fread(&rawOffs[0], sizeof(OFF_T), keys.size(), f);
|
| 176 |
+
UTIL_THROW_IF2(bytes_read != keys.size(), "Read error at " << HERE);
|
| 177 |
+
for(size_t i=0; i<ptr.size(); ++i)
|
| 178 |
+
if (rawOffs[i]) ptr[i].set(f, rawOffs[i]);
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
void free() {
|
| 182 |
+
for(typename VP::iterator i=ptr.begin(); i!=ptr.end(); ++i) i->free();
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
void reserve(size_t s) {
|
| 186 |
+
keys.reserve(s);
|
| 187 |
+
data.reserve(s);
|
| 188 |
+
ptr.reserve(s);
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
template<typename fwiter>
|
| 192 |
+
void changeData(fwiter b,fwiter e,const Data& d) {
|
| 193 |
+
typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b);
|
| 194 |
+
if(i==keys.end() || *i!=*b) {
|
| 195 |
+
TRACE_ERR("ERROR: key not found in changeData!\n");
|
| 196 |
+
return;
|
| 197 |
+
}
|
| 198 |
+
typename VK::const_iterator kb=keys.begin();
|
| 199 |
+
size_t pos=std::distance(kb,i);
|
| 200 |
+
if(++b==e) {
|
| 201 |
+
OFF_T p=startPos+keys.size()*sizeof(Key)+2*sizeof(unsigned)+pos*sizeof(Data);
|
| 202 |
+
TRACE_ERR("elem found at pos "<<p<<" old val: "<<data[pos]<<" startpos: "<<startPos<<"\n");
|
| 203 |
+
if(data[pos]!=d) {
|
| 204 |
+
data[pos]=d;
|
| 205 |
+
fSeek(f,p);
|
| 206 |
+
fWrite(f,d);
|
| 207 |
+
}
|
| 208 |
+
return;
|
| 209 |
+
}
|
| 210 |
+
if(ptr[pos]) ptr[pos]->changeData(b,e,d);
|
| 211 |
+
else {
|
| 212 |
+
TRACE_ERR("ERROR: seg not found!in changeData\n");
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
void create(const PrefixTreeSA<Key,Data>& psa,const std::string& fname) {
|
| 218 |
+
FILE* f=fOpen(fname.c_str(),"wb");
|
| 219 |
+
create(psa,f);
|
| 220 |
+
fclose(f);
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
void create(const PrefixTreeSA<Key,Data>& psa,FILE* f,int verbose=0) {
|
| 224 |
+
setDefault(psa.getDefault());
|
| 225 |
+
|
| 226 |
+
typedef std::pair<const PrefixTreeSA<Key,Data>*,OFF_T> P;
|
| 227 |
+
typedef std::deque<P> Queue;
|
| 228 |
+
|
| 229 |
+
Queue queue;
|
| 230 |
+
|
| 231 |
+
queue.push_back(P(&psa,fTell(f)));
|
| 232 |
+
bool isFirst=1;
|
| 233 |
+
size_t ns=1;
|
| 234 |
+
while(queue.size()) {
|
| 235 |
+
if(verbose && queue.size()>ns) {
|
| 236 |
+
TRACE_ERR("stack size in PF create: "<<queue.size()<<"\n");
|
| 237 |
+
while(ns<queue.size()) ns*=2;
|
| 238 |
+
}
|
| 239 |
+
const P& pp=queue.back();
|
| 240 |
+
const PrefixTreeSA<Key,Data>& p=*pp.first;
|
| 241 |
+
OFF_T pos=pp.second;
|
| 242 |
+
queue.pop_back();
|
| 243 |
+
|
| 244 |
+
if(!isFirst) {
|
| 245 |
+
OFF_T curr=fTell(f);
|
| 246 |
+
fSeek(f,pos);
|
| 247 |
+
fWrite(f,curr);
|
| 248 |
+
fSeek(f,curr);
|
| 249 |
+
} else isFirst=0;
|
| 250 |
+
|
| 251 |
+
size_t s=0;
|
| 252 |
+
s+=fWriteVector(f,p.keys);
|
| 253 |
+
s+=fWriteVector(f,p.data);
|
| 254 |
+
|
| 255 |
+
for(size_t i=0; i<p.ptr.size(); ++i) {
|
| 256 |
+
if(p.ptr[i])
|
| 257 |
+
queue.push_back(P(p.ptr[i],fTell(f)));
|
| 258 |
+
OFF_T ppos=0;
|
| 259 |
+
s+=fWrite(f,ppos);
|
| 260 |
+
}
|
| 261 |
+
}
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
size_t size() const {
|
| 265 |
+
return keys.size();
|
| 266 |
+
}
|
| 267 |
+
const Key& getKey(size_t i) const {
|
| 268 |
+
return keys[i];
|
| 269 |
+
}
|
| 270 |
+
const Data& getData(size_t i) const {
|
| 271 |
+
return data[i];
|
| 272 |
+
}
|
| 273 |
+
const Self* getPtr(size_t i) const {
|
| 274 |
+
return ptr[i];
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
size_t findKey(const Key& k) const {
|
| 278 |
+
typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),k);
|
| 279 |
+
if(i==keys.end() || *i!=k) return keys.size();
|
| 280 |
+
return std::distance(keys.begin(),i);
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
Ptr const* findKeyPtr(const Key& k) const {
|
| 284 |
+
size_t pos=findKey(k);
|
| 285 |
+
return (pos<keys.size() ? &ptr[pos] : 0);
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
// find sequence
|
| 289 |
+
template<typename fwiter> const Data* findPtr(fwiter b,fwiter e) const {
|
| 290 |
+
typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b);
|
| 291 |
+
if(i==keys.end() || *i!=*b) return 0;
|
| 292 |
+
size_t pos=std::distance(keys.begin(),i);
|
| 293 |
+
if(++b==e) return &data[pos];
|
| 294 |
+
if(ptr[pos]) return ptr[pos]->findPtr(b,e);
|
| 295 |
+
else return 0;
|
| 296 |
+
}
|
| 297 |
+
// find container
|
| 298 |
+
template<typename cont> const Data* findPtr(const cont& c) const {
|
| 299 |
+
return findPtr(c.begin(),c.end());
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
// find sequence
|
| 304 |
+
template<typename fwiter> const Data& find(fwiter b,fwiter e) const {
|
| 305 |
+
if(const Data* p=findPtr(b,e)) return *p;
|
| 306 |
+
else return def;
|
| 307 |
+
} //return (p?*p:def);}
|
| 308 |
+
|
| 309 |
+
// find container
|
| 310 |
+
template<typename cont> const Data& find(const cont& c) const {
|
| 311 |
+
return find(c.begin(),c.end());
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
static void setDefault(const Data& d) {
|
| 315 |
+
def=d;
|
| 316 |
+
}
|
| 317 |
+
static const Data& getDefault() {
|
| 318 |
+
return def;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
void print(std::ostream& out,const std::string s="") const {
|
| 323 |
+
|
| 324 |
+
out<<s<<"startpos: "<<startPos<<" size: "<<keys.size()<<"\n";
|
| 325 |
+
for(size_t i=0; i<keys.size(); ++i) {
|
| 326 |
+
out<<s<<i<<" - "<<keys[i]<<" "<<data[i]<<"\n";
|
| 327 |
+
}
|
| 328 |
+
for(size_t i=0; i<ptr.size(); ++i)
|
| 329 |
+
if(ptr[i])
|
| 330 |
+
ptr[i]->print(out,s+" ");
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
};
|
| 335 |
+
template<typename T,typename D> D PrefixTreeF<T,D>::def;
|
| 336 |
+
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
#endif
|
mosesdecoder/moses/Range.h
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_WordsRange_h
|
| 23 |
+
#define moses_WordsRange_h
|
| 24 |
+
|
| 25 |
+
#include <iostream>
|
| 26 |
+
#include <boost/functional/hash.hpp>
|
| 27 |
+
#include "TypeDef.h"
|
| 28 |
+
#include "Util.h"
|
| 29 |
+
#include "util/exception.hh"
|
| 30 |
+
|
| 31 |
+
#ifdef WIN32
|
| 32 |
+
#undef max
|
| 33 |
+
#endif
|
| 34 |
+
|
| 35 |
+
namespace Moses
|
| 36 |
+
{
|
| 37 |
+
|
| 38 |
+
/***
|
| 39 |
+
* Efficient version of Bitmap for contiguous ranges
|
| 40 |
+
*/
|
| 41 |
+
class Range
|
| 42 |
+
{
|
| 43 |
+
friend std::ostream& operator << (std::ostream& out, const Range& range);
|
| 44 |
+
|
| 45 |
+
// m_endPos is inclusive
|
| 46 |
+
size_t m_startPos, m_endPos;
|
| 47 |
+
public:
|
| 48 |
+
inline explicit Range() {}
|
| 49 |
+
inline Range(size_t startPos, size_t endPos) : m_startPos(startPos), m_endPos(endPos) {}
|
| 50 |
+
inline Range(const Range ©)
|
| 51 |
+
: m_startPos(copy.GetStartPos())
|
| 52 |
+
, m_endPos(copy.GetEndPos()) {
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
inline size_t GetStartPos() const {
|
| 56 |
+
return m_startPos;
|
| 57 |
+
}
|
| 58 |
+
inline size_t GetEndPos() const {
|
| 59 |
+
return m_endPos;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
//! count of words translated
|
| 63 |
+
inline size_t GetNumWordsCovered() const {
|
| 64 |
+
return (m_startPos == NOT_FOUND) ? 0 : m_endPos - m_startPos + 1;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
//! transitive comparison
|
| 68 |
+
inline bool operator<(const Range& x) const {
|
| 69 |
+
return (m_startPos<x.m_startPos
|
| 70 |
+
|| (m_startPos==x.m_startPos && m_endPos<x.m_endPos));
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
// equality operator
|
| 74 |
+
inline bool operator==(const Range& x) const {
|
| 75 |
+
return (m_startPos==x.m_startPos && m_endPos==x.m_endPos);
|
| 76 |
+
}
|
| 77 |
+
// Whether two word ranges overlap or not
|
| 78 |
+
inline bool Overlap(const Range& x) const {
|
| 79 |
+
|
| 80 |
+
if ( x.m_endPos < m_startPos || x.m_startPos > m_endPos) return false;
|
| 81 |
+
|
| 82 |
+
return true;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
inline size_t GetNumWordsBetween(const Range& x) const {
|
| 86 |
+
UTIL_THROW_IF2(Overlap(x), "Overlapping ranges");
|
| 87 |
+
|
| 88 |
+
if (x.m_endPos < m_startPos) {
|
| 89 |
+
return m_startPos - x.m_endPos - 1;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
return x.m_startPos - m_endPos - 1;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
TO_STRING();
|
| 97 |
+
};
|
| 98 |
+
|
| 99 |
+
inline size_t hash_value(const Range& range)
|
| 100 |
+
{
|
| 101 |
+
size_t seed = range.GetStartPos();
|
| 102 |
+
boost::hash_combine(seed, range.GetEndPos());
|
| 103 |
+
return seed;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
}
|
| 107 |
+
#endif
|
mosesdecoder/moses/ReorderingConstraint.cpp
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2008 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#include "ReorderingConstraint.h"
|
| 24 |
+
#include "InputType.h"
|
| 25 |
+
#include "StaticData.h"
|
| 26 |
+
#include "Bitmap.h"
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
//! allocate memory for reordering walls
|
| 32 |
+
void ReorderingConstraint::InitializeWalls(size_t size)
|
| 33 |
+
{
|
| 34 |
+
m_size = size;
|
| 35 |
+
m_wall = (bool*) malloc(sizeof(bool) * size);
|
| 36 |
+
m_localWall = (size_t*) malloc(sizeof(size_t) * size);
|
| 37 |
+
|
| 38 |
+
for (size_t pos = 0 ; pos < m_size ; pos++) {
|
| 39 |
+
m_wall[pos] = false;
|
| 40 |
+
m_localWall[pos] = NOT_A_ZONE;
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
//! set value at a particular position
|
| 46 |
+
void ReorderingConstraint::SetWall( size_t pos, bool value )
|
| 47 |
+
{
|
| 48 |
+
VERBOSE(3,"SETTING reordering wall at position " << pos << std::endl);
|
| 49 |
+
m_wall[pos] = value;
|
| 50 |
+
m_active = true;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
//! has to be called to localized walls
|
| 54 |
+
void ReorderingConstraint::FinalizeWalls()
|
| 55 |
+
{
|
| 56 |
+
for(size_t z = 0; z < m_zone.size(); z++ ) {
|
| 57 |
+
const size_t startZone = m_zone[z].first;
|
| 58 |
+
const size_t endZone = m_zone[z].second;// note: wall after endZone is not local
|
| 59 |
+
for( size_t pos = startZone; pos < endZone; pos++ ) {
|
| 60 |
+
if (m_wall[ pos ]) {
|
| 61 |
+
m_localWall[ pos ] = z;
|
| 62 |
+
m_wall[ pos ] = false;
|
| 63 |
+
VERBOSE(3,"SETTING local wall " << pos << std::endl);
|
| 64 |
+
}
|
| 65 |
+
// enforce that local walls only apply to innermost zone
|
| 66 |
+
else if (m_localWall[ pos ] != NOT_A_ZONE) {
|
| 67 |
+
size_t assigned_z = m_localWall[ pos ];
|
| 68 |
+
if ((m_zone[assigned_z].first < startZone) ||
|
| 69 |
+
(m_zone[assigned_z].second > endZone)) {
|
| 70 |
+
m_localWall[ pos ] = z;
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
//! set walls based on "-monotone-at-punctuation" flag
|
| 78 |
+
void ReorderingConstraint::SetMonotoneAtPunctuation( const Phrase &sentence )
|
| 79 |
+
{
|
| 80 |
+
for( size_t i=0; i<sentence.GetSize(); i++ ) {
|
| 81 |
+
const Word& word = sentence.GetWord(i);
|
| 82 |
+
if (word[0]->GetString() == "," ||
|
| 83 |
+
word[0]->GetString() == "." ||
|
| 84 |
+
word[0]->GetString() == "!" ||
|
| 85 |
+
word[0]->GetString() == "?" ||
|
| 86 |
+
word[0]->GetString() == ":" ||
|
| 87 |
+
word[0]->GetString() == ";" ||
|
| 88 |
+
word[0]->GetString() == "\"") {
|
| 89 |
+
// set wall before and after punc, but not at sentence start, end
|
| 90 |
+
if (i>0 && i<m_size-1) SetWall( i, true );
|
| 91 |
+
if (i>1) SetWall( i-1, true );
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
//! set a reordering zone (once entered, need to finish)
|
| 97 |
+
void ReorderingConstraint::SetZone( size_t startPos, size_t endPos )
|
| 98 |
+
{
|
| 99 |
+
VERBOSE(3,"SETTING zone " << startPos << "-" << endPos << std::endl);
|
| 100 |
+
std::pair<size_t,size_t> newZone;
|
| 101 |
+
newZone.first = startPos;
|
| 102 |
+
newZone.second = endPos;
|
| 103 |
+
m_zone.push_back( newZone );
|
| 104 |
+
m_active = true;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
//! check if the current hypothesis extension violates reordering constraints
|
| 108 |
+
bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t endPos ) const
|
| 109 |
+
{
|
| 110 |
+
// nothing to be checked, we are done
|
| 111 |
+
if (! IsActive() ) return true;
|
| 112 |
+
|
| 113 |
+
VERBOSE(3,"Check " << bitmap << " " << startPos << "-" << endPos);
|
| 114 |
+
|
| 115 |
+
// check walls
|
| 116 |
+
size_t firstGapPos = bitmap.GetFirstGapPos();
|
| 117 |
+
// filling first gap -> no wall violation possible
|
| 118 |
+
if (firstGapPos != startPos) {
|
| 119 |
+
// if there is a wall before the last word,
|
| 120 |
+
// we created a gap while moving through wall
|
| 121 |
+
// -> violation
|
| 122 |
+
for( size_t pos = firstGapPos; pos < endPos; pos++ ) {
|
| 123 |
+
if( GetWall( pos ) ) {
|
| 124 |
+
VERBOSE(3," hitting wall " << pos << std::endl);
|
| 125 |
+
return false;
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
// monotone -> no violation possible
|
| 131 |
+
size_t lastPos = bitmap.GetLastPos();
|
| 132 |
+
if ((lastPos == NOT_FOUND && startPos == 0) || // nothing translated
|
| 133 |
+
(firstGapPos > lastPos && // no gaps
|
| 134 |
+
firstGapPos == startPos)) { // translating first empty word
|
| 135 |
+
VERBOSE(3," montone, fine." << std::endl);
|
| 136 |
+
return true;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
// check zones
|
| 140 |
+
for(size_t z = 0; z < m_zone.size(); z++ ) {
|
| 141 |
+
const size_t startZone = m_zone[z].first;
|
| 142 |
+
const size_t endZone = m_zone[z].second;
|
| 143 |
+
|
| 144 |
+
// fine, if translation has not reached zone yet and phrase outside zone
|
| 145 |
+
if (lastPos < startZone && ( endPos < startZone || startPos > endZone ) ) {
|
| 146 |
+
continue;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// already completely translated zone, no violations possible
|
| 150 |
+
if (firstGapPos > endZone) {
|
| 151 |
+
continue;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
// some words are translated beyond the start
|
| 155 |
+
// let's look closer if some are in the zone
|
| 156 |
+
size_t numWordsInZoneTranslated = 0;
|
| 157 |
+
if (lastPos >= startZone) {
|
| 158 |
+
for(size_t pos = startZone; pos <= endZone; pos++ ) {
|
| 159 |
+
if( bitmap.GetValue( pos ) ) {
|
| 160 |
+
numWordsInZoneTranslated++;
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
// all words in zone translated, no violation possible
|
| 166 |
+
if (numWordsInZoneTranslated == endZone-startZone+1) {
|
| 167 |
+
continue;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
// flag if this is an active zone
|
| 171 |
+
bool activeZone = (numWordsInZoneTranslated > 0);
|
| 172 |
+
|
| 173 |
+
// fine, if zone completely untranslated and phrase outside zone
|
| 174 |
+
if (!activeZone && ( endPos < startZone || startPos > endZone ) ) {
|
| 175 |
+
continue;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
// violation, if phrase completely outside active zone
|
| 179 |
+
if (activeZone && ( endPos < startZone || startPos > endZone ) ) {
|
| 180 |
+
VERBOSE(3," outside active zone" << std::endl);
|
| 181 |
+
return false;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
// ok, this is what we know now:
|
| 185 |
+
// * the phrase is in the zone (at least partially)
|
| 186 |
+
// * either zone is already active, or it becomes active now
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
// check, if we are setting us up for a dead end due to distortion limits
|
| 190 |
+
|
| 191 |
+
// size_t distortionLimit = (size_t)StaticData::Instance().GetMaxDistortion();
|
| 192 |
+
size_t distortionLimit = m_max_distortion;
|
| 193 |
+
if (startPos != firstGapPos && endZone-firstGapPos >= distortionLimit) {
|
| 194 |
+
VERBOSE(3," dead end due to distortion limit" << std::endl);
|
| 195 |
+
return false;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
// let us check on phrases that are partially outside
|
| 199 |
+
|
| 200 |
+
// phrase overlaps at the beginning, always ok
|
| 201 |
+
if (startPos <= startZone) {
|
| 202 |
+
continue;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
// phrase goes beyond end, has to fill zone completely
|
| 206 |
+
if (endPos > endZone) {
|
| 207 |
+
if (endZone-startPos+1 < // num. words filled in by phrase
|
| 208 |
+
endZone-startZone+1-numWordsInZoneTranslated) { // num. untranslated
|
| 209 |
+
VERBOSE(3," overlap end, but not completing" << std::endl);
|
| 210 |
+
return false;
|
| 211 |
+
} else {
|
| 212 |
+
continue;
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
// now we are down to phrases that are completely inside the zone
|
| 217 |
+
// we have to check local walls
|
| 218 |
+
bool seenUntranslatedBeforeStartPos = false;
|
| 219 |
+
for(size_t pos = startZone; pos < endZone && pos < endPos; pos++ ) {
|
| 220 |
+
// be careful when there is a gap before phrase
|
| 221 |
+
if( !bitmap.GetValue( pos ) // untranslated word
|
| 222 |
+
&& pos < startPos ) { // before startPos
|
| 223 |
+
seenUntranslatedBeforeStartPos = true;
|
| 224 |
+
}
|
| 225 |
+
if( seenUntranslatedBeforeStartPos && GetLocalWall( pos, z ) ) {
|
| 226 |
+
VERBOSE(3," local wall violation" << std::endl);
|
| 227 |
+
return false;
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
// passed all checks for this zone, on to the next one
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// passed all checks, no violations
|
| 235 |
+
VERBOSE(3," fine." << std::endl);
|
| 236 |
+
return true;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj)
|
| 240 |
+
{
|
| 241 |
+
out << "Zones:";
|
| 242 |
+
for (size_t i = 0; i < obj.m_zone.size(); ++i) {
|
| 243 |
+
const std::pair<size_t,size_t> &zone1 = obj.m_zone[i];
|
| 244 |
+
out << zone1.first << "-" << zone1.second << " ";
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
out << "Walls:";
|
| 248 |
+
for (size_t i = 0; i < obj.m_size; ++i) {
|
| 249 |
+
out << obj.m_wall[i];
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
out << " Local walls:";
|
| 253 |
+
for (size_t i = 0; i < obj.m_size; ++i) {
|
| 254 |
+
out << obj.m_localWall[i] << " ";
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
return out;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
}
|
mosesdecoder/moses/ReorderingConstraint.h
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
| 2 |
+
// $Id$
|
| 3 |
+
// vim:tabstop=2
|
| 4 |
+
|
| 5 |
+
/***********************************************************************
|
| 6 |
+
Moses - factored phrase-based language decoder
|
| 7 |
+
Copyright (C) 2008 University of Edinburgh
|
| 8 |
+
|
| 9 |
+
This library is free software; you can redistribute it and/or
|
| 10 |
+
modify it under the terms of the GNU Lesser General Public
|
| 11 |
+
License as published by the Free Software Foundation; either
|
| 12 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 13 |
+
|
| 14 |
+
This library is distributed in the hope that it will be useful,
|
| 15 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 16 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 17 |
+
Lesser General Public License for more details.
|
| 18 |
+
|
| 19 |
+
You should have received a copy of the GNU Lesser General Public
|
| 20 |
+
License along with this library; if not, write to the Free Software
|
| 21 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 22 |
+
***********************************************************************/
|
| 23 |
+
|
| 24 |
+
#ifndef moses_ReorderingConstraint_h
|
| 25 |
+
#define moses_ReorderingConstraint_h
|
| 26 |
+
|
| 27 |
+
//#include <malloc.h>
|
| 28 |
+
#include <limits>
|
| 29 |
+
#include <vector>
|
| 30 |
+
#include <iostream>
|
| 31 |
+
#include <cstring>
|
| 32 |
+
#include <cmath>
|
| 33 |
+
#include "TypeDef.h"
|
| 34 |
+
#include "Word.h"
|
| 35 |
+
#include "Phrase.h"
|
| 36 |
+
|
| 37 |
+
namespace Moses
|
| 38 |
+
{
|
| 39 |
+
|
| 40 |
+
class InputType;
|
| 41 |
+
class Bitmap;
|
| 42 |
+
|
| 43 |
+
#define NOT_A_ZONE 999999999
|
| 44 |
+
/** A list of zones and walls to limit which reordering can occur
|
| 45 |
+
*/
|
| 46 |
+
class ReorderingConstraint
|
| 47 |
+
{
|
| 48 |
+
friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj);
|
| 49 |
+
protected:
|
| 50 |
+
// const size_t m_size; /**< number of words in sentence */
|
| 51 |
+
size_t m_size; /**< number of words in sentence */
|
| 52 |
+
bool *m_wall; /**< flag for each word if it is a wall */
|
| 53 |
+
size_t *m_localWall; /**< flag for each word if it is a local wall */
|
| 54 |
+
std::vector< std::pair<size_t,size_t> > m_zone; /** zones that limit reordering */
|
| 55 |
+
bool m_active; /**< flag indicating, if there are any active constraints */
|
| 56 |
+
int m_max_distortion;
|
| 57 |
+
public:
|
| 58 |
+
|
| 59 |
+
//! create ReorderingConstraint of length size and initialise to zero
|
| 60 |
+
ReorderingConstraint(int max_distortion)
|
| 61 |
+
: m_wall(NULL)
|
| 62 |
+
, m_localWall(NULL)
|
| 63 |
+
, m_active(false)
|
| 64 |
+
, m_max_distortion(max_distortion)
|
| 65 |
+
{}
|
| 66 |
+
|
| 67 |
+
//! destructer
|
| 68 |
+
~ReorderingConstraint() {
|
| 69 |
+
if (m_wall != NULL) free(m_wall);
|
| 70 |
+
if (m_localWall != NULL) free(m_localWall);
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
//! allocate memory for memory for a sentence of a given size
|
| 74 |
+
void InitializeWalls(size_t size);
|
| 75 |
+
|
| 76 |
+
//! changes walls in zones into local walls
|
| 77 |
+
void FinalizeWalls();
|
| 78 |
+
|
| 79 |
+
//! set value at a particular position
|
| 80 |
+
void SetWall( size_t pos, bool value );
|
| 81 |
+
|
| 82 |
+
//! whether a word has been translated at a particular position
|
| 83 |
+
bool GetWall(size_t pos) const {
|
| 84 |
+
return m_wall[pos];
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
//! whether a word has been translated at a particular position
|
| 88 |
+
bool GetLocalWall(size_t pos, size_t zone ) const {
|
| 89 |
+
return (m_localWall[pos] == zone);
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
//! set a zone
|
| 93 |
+
void SetZone( size_t startPos, size_t endPos );
|
| 94 |
+
|
| 95 |
+
//! returns the vector of zones
|
| 96 |
+
std::vector< std::pair<size_t,size_t> > & GetZones() {
|
| 97 |
+
return m_zone;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
//! set the reordering walls based on punctuation in the sentence
|
| 101 |
+
void SetMonotoneAtPunctuation( const Phrase & sentence );
|
| 102 |
+
|
| 103 |
+
//! check if all constraints are fulfilled -> all find
|
| 104 |
+
bool Check( const Bitmap &bitmap, size_t start, size_t end ) const;
|
| 105 |
+
|
| 106 |
+
//! checks if reordering constraints will be enforced
|
| 107 |
+
bool IsActive() const {
|
| 108 |
+
return m_active;
|
| 109 |
+
}
|
| 110 |
+
};
|
| 111 |
+
|
| 112 |
+
}
|
| 113 |
+
#endif
|
mosesdecoder/moses/ScoreComponentCollectionTest.cpp
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include <stdexcept>
|
| 21 |
+
|
| 22 |
+
#include <boost/test/unit_test.hpp>
|
| 23 |
+
|
| 24 |
+
#include "moses/FF/StatelessFeatureFunction.h"
|
| 25 |
+
#include "ScoreComponentCollection.h"
|
| 26 |
+
|
| 27 |
+
using namespace Moses;
|
| 28 |
+
using namespace std;
|
| 29 |
+
|
| 30 |
+
BOOST_AUTO_TEST_SUITE(scc)
|
| 31 |
+
|
| 32 |
+
class MockStatelessFeatureFunction : public StatelessFeatureFunction
|
| 33 |
+
{
|
| 34 |
+
public:
|
| 35 |
+
MockStatelessFeatureFunction(size_t n, const string &line) :
|
| 36 |
+
StatelessFeatureFunction(n, line) {}
|
| 37 |
+
void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {}
|
| 38 |
+
void EvaluateWhenApplied(const ChartHypothesis&, ScoreComponentCollection*) const {}
|
| 39 |
+
void EvaluateWithSourceContext(const InputType &input
|
| 40 |
+
, const InputPath &inputPath
|
| 41 |
+
, const TargetPhrase &targetPhrase
|
| 42 |
+
, const StackVec *stackVec
|
| 43 |
+
, ScoreComponentCollection &scoreBreakdown
|
| 44 |
+
, ScoreComponentCollection *estimatedScores) const {
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
| 48 |
+
, const TranslationOptionList &translationOptionList) const {
|
| 49 |
+
}
|
| 50 |
+
void EvaluateInIsolation(const Phrase &source
|
| 51 |
+
, const TargetPhrase &targetPhrase
|
| 52 |
+
, ScoreComponentCollection &scoreBreakdown
|
| 53 |
+
, ScoreComponentCollection &estimatedScores) const {
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
};
|
| 57 |
+
|
| 58 |
+
class MockSingleFeature : public MockStatelessFeatureFunction
|
| 59 |
+
{
|
| 60 |
+
public:
|
| 61 |
+
MockSingleFeature(): MockStatelessFeatureFunction(1, "MockSingle") {}
|
| 62 |
+
|
| 63 |
+
bool IsUseable(const FactorMask &mask) const {
|
| 64 |
+
return true;
|
| 65 |
+
}
|
| 66 |
+
};
|
| 67 |
+
|
| 68 |
+
class MockMultiFeature : public MockStatelessFeatureFunction
|
| 69 |
+
{
|
| 70 |
+
public:
|
| 71 |
+
MockMultiFeature(): MockStatelessFeatureFunction(5, "MockMulti") {}
|
| 72 |
+
|
| 73 |
+
bool IsUseable(const FactorMask &mask) const {
|
| 74 |
+
return true;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
};
|
| 78 |
+
|
| 79 |
+
class MockSparseFeature : public MockStatelessFeatureFunction
|
| 80 |
+
{
|
| 81 |
+
public:
|
| 82 |
+
MockSparseFeature(): MockStatelessFeatureFunction(0, "MockSparse") {}
|
| 83 |
+
|
| 84 |
+
bool IsUseable(const FactorMask &mask) const {
|
| 85 |
+
return true;
|
| 86 |
+
}
|
| 87 |
+
};
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
struct MockProducers {
|
| 92 |
+
MockProducers() {
|
| 93 |
+
FeatureFunction::Register(&single);
|
| 94 |
+
FeatureFunction::Register(&multi);
|
| 95 |
+
FeatureFunction::Register(&sparse);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
MockSingleFeature single;
|
| 99 |
+
MockMultiFeature multi;
|
| 100 |
+
MockSparseFeature sparse;
|
| 101 |
+
};
|
| 102 |
+
|
| 103 |
+
BOOST_FIXTURE_TEST_CASE(ctor, MockProducers)
|
| 104 |
+
{
|
| 105 |
+
ScoreComponentCollection scc;
|
| 106 |
+
BOOST_CHECK_EQUAL(scc.GetScoreForProducer(&single),0);
|
| 107 |
+
float expected[] = {0,0,0,0,0};
|
| 108 |
+
std::vector<float> actual= scc.GetScoresForProducer(&multi);
|
| 109 |
+
BOOST_CHECK_EQUAL_COLLECTIONS(expected, expected+5, actual.begin(), actual.begin()+5);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
BOOST_FIXTURE_TEST_CASE(plusequals, MockProducers)
|
| 113 |
+
{
|
| 114 |
+
float arr1[] = {1,2,3,4,5};
|
| 115 |
+
float arr2[] = {2,4,6,8,10};
|
| 116 |
+
std::vector<float> vec1(arr1,arr1+5);
|
| 117 |
+
std::vector<float> vec2(arr2,arr2+5);
|
| 118 |
+
|
| 119 |
+
ScoreComponentCollection scc;
|
| 120 |
+
scc.PlusEquals(&single, 3.4f);
|
| 121 |
+
BOOST_CHECK_EQUAL(scc.GetScoreForProducer(&single), 3.4f);
|
| 122 |
+
scc.PlusEquals(&multi,vec1);
|
| 123 |
+
std::vector<float> actual = scc.GetScoresForProducer(&multi);
|
| 124 |
+
BOOST_CHECK_EQUAL_COLLECTIONS(vec1.begin(),vec1.end()
|
| 125 |
+
,actual.begin(), actual.end());
|
| 126 |
+
scc.PlusEquals(&multi,vec1);
|
| 127 |
+
actual = scc.GetScoresForProducer(&multi);
|
| 128 |
+
BOOST_CHECK_EQUAL_COLLECTIONS(vec2.begin(),vec2.end(),
|
| 129 |
+
actual.begin(), actual.end());
|
| 130 |
+
|
| 131 |
+
BOOST_CHECK_EQUAL(scc.GetScoreForProducer(&single), 3.4f);
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
BOOST_FIXTURE_TEST_CASE(sparse_feature, MockProducers)
|
| 135 |
+
{
|
| 136 |
+
ScoreComponentCollection scc;
|
| 137 |
+
scc.Assign(&sparse, "first", 1.3f);
|
| 138 |
+
scc.Assign(&sparse, "second", 2.1f);
|
| 139 |
+
BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"first"), 1.3f);
|
| 140 |
+
BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"second"), 2.1f);
|
| 141 |
+
BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"third"), 0.0f);
|
| 142 |
+
scc.Assign(&sparse, "first", -1.9f);
|
| 143 |
+
BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"first"), -1.9f);
|
| 144 |
+
scc.PlusEquals(&sparse, StringPiece("first"), -1.9f);
|
| 145 |
+
BOOST_CHECK_EQUAL( scc.GetScoreForProducer(&sparse,"first"), -3.8f);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
/*
|
| 149 |
+
Doesn't work because of the static registration of ScoreProducers
|
| 150 |
+
in ScoreComponentCollection.
|
| 151 |
+
BOOST_FIXTURE_TEST_CASE(save, MockProducers)
|
| 152 |
+
{
|
| 153 |
+
ScoreComponentCollection scc;
|
| 154 |
+
scc.Assign(&sparse, "first", 1.1f);
|
| 155 |
+
scc.Assign(&single, 0.25f);
|
| 156 |
+
float arr[] = {1,2.1,3,4,5};
|
| 157 |
+
std::vector<float> vec1(arr,arr+5);
|
| 158 |
+
scc.Assign(&multi,vec1);
|
| 159 |
+
ostringstream out;
|
| 160 |
+
scc.Save(out);
|
| 161 |
+
cerr << out.str() << endl;
|
| 162 |
+
istringstream in (out.str());
|
| 163 |
+
string line;
|
| 164 |
+
getline(in,line);
|
| 165 |
+
BOOST_CHECK_EQUAL(line, "MockSingle:4_1 0.25");
|
| 166 |
+
getline(in,line);
|
| 167 |
+
BOOST_CHECK_EQUAL(line, "MockMulti:4_1 1");
|
| 168 |
+
getline(in,line);
|
| 169 |
+
BOOST_CHECK_EQUAL(line, "MockMulti:4_2 2.1");
|
| 170 |
+
getline(in,line);
|
| 171 |
+
BOOST_CHECK_EQUAL(line, "MockMulti:4_3 3");
|
| 172 |
+
getline(in,line);
|
| 173 |
+
BOOST_CHECK_EQUAL(line, "MockMulti:4_4 4");
|
| 174 |
+
getline(in,line);
|
| 175 |
+
BOOST_CHECK_EQUAL(line, "MockMulti:4_5 5");
|
| 176 |
+
getline(in,line);
|
| 177 |
+
BOOST_CHECK_EQUAL(line,"MockSparse:4_first 1.1");
|
| 178 |
+
BOOST_CHECK(!getline(in,line));
|
| 179 |
+
}
|
| 180 |
+
*/
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
BOOST_AUTO_TEST_SUITE_END()
|
| 184 |
+
|
mosesdecoder/moses/Search.cpp
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "Manager.h"
|
| 2 |
+
#include "SearchCubePruning.h"
|
| 3 |
+
#include "SearchNormal.h"
|
| 4 |
+
#include "InputType.h"
|
| 5 |
+
#include "util/exception.hh"
|
| 6 |
+
|
| 7 |
+
namespace Moses
|
| 8 |
+
{
|
| 9 |
+
|
| 10 |
+
Search::Search(Manager& manager)
|
| 11 |
+
: m_manager(manager)
|
| 12 |
+
, m_source(manager.GetSource())
|
| 13 |
+
, m_options(*manager.options())
|
| 14 |
+
, m_inputPath()
|
| 15 |
+
, m_initialTransOpt()
|
| 16 |
+
, m_bitmaps(manager.GetSource().GetSize(), manager.GetSource().m_sourceCompleted)
|
| 17 |
+
, interrupted_flag(0)
|
| 18 |
+
{
|
| 19 |
+
m_initialTransOpt.SetInputPath(m_inputPath);
|
| 20 |
+
m_timer.start();
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
bool
|
| 24 |
+
Search::
|
| 25 |
+
out_of_time()
|
| 26 |
+
{
|
| 27 |
+
int const& timelimit = m_options.search.timeout;
|
| 28 |
+
if (timelimit > 0) {
|
| 29 |
+
double elapsed_time = GetUserTime();
|
| 30 |
+
if (elapsed_time > timelimit) {
|
| 31 |
+
VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
|
| 32 |
+
<< timelimit << ")" << std::endl);
|
| 33 |
+
interrupted_flag = 1;
|
| 34 |
+
return true;
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
int const& segment_timelimit = m_options.search.segment_timeout;
|
| 38 |
+
if (segment_timelimit > 0) {
|
| 39 |
+
double elapsed_time = m_timer.get_elapsed_time();
|
| 40 |
+
if (elapsed_time > segment_timelimit) {
|
| 41 |
+
VERBOSE(1,"Decoding for segment is out of time (" << elapsed_time << ","
|
| 42 |
+
<< segment_timelimit << ")" << std::endl);
|
| 43 |
+
interrupted_flag = 1;
|
| 44 |
+
return true;
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
return false;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
}
|
mosesdecoder/moses/Search.h
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef moses_Search_h
|
| 2 |
+
#define moses_Search_h
|
| 3 |
+
|
| 4 |
+
#include <vector>
|
| 5 |
+
#include "TypeDef.h"
|
| 6 |
+
#include "TranslationOption.h"
|
| 7 |
+
#include "Phrase.h"
|
| 8 |
+
#include "InputPath.h"
|
| 9 |
+
#include "Bitmaps.h"
|
| 10 |
+
#include "Timer.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
class HypothesisStack;
|
| 16 |
+
class Hypothesis;
|
| 17 |
+
class InputType;
|
| 18 |
+
class TranslationOptionCollection;
|
| 19 |
+
class Manager;
|
| 20 |
+
class Phrase;
|
| 21 |
+
|
| 22 |
+
/** Base search class used in the phrase-based decoder.
|
| 23 |
+
*
|
| 24 |
+
* Actual search class that implement the cube pruning algorithm (SearchCubePruning)
|
| 25 |
+
* or standard beam search (SearchNormal) should inherits from this class, and
|
| 26 |
+
* override pure virtual functions.
|
| 27 |
+
*/
|
| 28 |
+
class Search
|
| 29 |
+
{
|
| 30 |
+
public:
|
| 31 |
+
virtual const std::vector<HypothesisStack*>& GetHypothesisStacks() const = 0;
|
| 32 |
+
virtual const Hypothesis *GetBestHypothesis() const = 0;
|
| 33 |
+
|
| 34 |
+
//! Decode the sentence according to the specified search algorithm.
|
| 35 |
+
virtual void Decode() = 0;
|
| 36 |
+
|
| 37 |
+
explicit Search(Manager& manager);
|
| 38 |
+
virtual ~Search() {}
|
| 39 |
+
|
| 40 |
+
protected:
|
| 41 |
+
Manager& m_manager;
|
| 42 |
+
const InputType &m_source;
|
| 43 |
+
AllOptions const& m_options;
|
| 44 |
+
|
| 45 |
+
InputPath m_inputPath; // for initial hypo
|
| 46 |
+
TranslationOption m_initialTransOpt; /**< used to seed 1st hypo */
|
| 47 |
+
Bitmaps m_bitmaps;
|
| 48 |
+
|
| 49 |
+
/** flag indicating that decoder ran out of time (see switch -time-out) */
|
| 50 |
+
size_t interrupted_flag;
|
| 51 |
+
|
| 52 |
+
Timer m_timer;
|
| 53 |
+
bool out_of_time();
|
| 54 |
+
};
|
| 55 |
+
|
| 56 |
+
}
|
| 57 |
+
#endif
|
mosesdecoder/moses/SearchCubePruning.h
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef moses_SearchCubePruning_h
|
| 2 |
+
#define moses_SearchCubePruning_h
|
| 3 |
+
|
| 4 |
+
#include <vector>
|
| 5 |
+
#include "Search.h"
|
| 6 |
+
#include "HypothesisStackCubePruning.h"
|
| 7 |
+
#include "SentenceStats.h"
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
|
| 12 |
+
class InputType;
|
| 13 |
+
class TranslationOptionCollection;
|
| 14 |
+
|
| 15 |
+
/** Functions and variables you need to decoder an input using the phrase-based decoder with cube-pruning
|
| 16 |
+
* Instantiated by the Manager class
|
| 17 |
+
*/
|
| 18 |
+
class SearchCubePruning: public Search
|
| 19 |
+
{
|
| 20 |
+
protected:
|
| 21 |
+
std::vector < HypothesisStack* > m_hypoStackColl; /**< stacks to store hypotheses (partial translations) */
|
| 22 |
+
// no of elements = no of words in source + 1
|
| 23 |
+
const TranslationOptionCollection &m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
|
| 24 |
+
|
| 25 |
+
//! go thru all bitmaps in 1 stack & create backpointers to bitmaps in the stack
|
| 26 |
+
void CreateForwardTodos(HypothesisStackCubePruning &stack);
|
| 27 |
+
//! create a back pointer to this bitmap, with edge that has this words range translation
|
| 28 |
+
void CreateForwardTodos(const Bitmap &bitmap, const Range &range, BitmapContainer &bitmapContainer);
|
| 29 |
+
bool CheckDistortion(const Bitmap &bitmap, const Range &range) const;
|
| 30 |
+
|
| 31 |
+
void PrintBitmapContainerGraph();
|
| 32 |
+
|
| 33 |
+
public:
|
| 34 |
+
SearchCubePruning(Manager& manager, const TranslationOptionCollection &transOptColl);
|
| 35 |
+
~SearchCubePruning();
|
| 36 |
+
|
| 37 |
+
void Decode();
|
| 38 |
+
|
| 39 |
+
void OutputHypoStackSize();
|
| 40 |
+
void OutputHypoStack(int stack);
|
| 41 |
+
|
| 42 |
+
virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const;
|
| 43 |
+
virtual const Hypothesis *GetBestHypothesis() const;
|
| 44 |
+
};
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
}
|
| 48 |
+
#endif
|
mosesdecoder/moses/SearchNormal.cpp
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "Manager.h"
|
| 2 |
+
#include "Timer.h"
|
| 3 |
+
#include "SearchNormal.h"
|
| 4 |
+
#include "SentenceStats.h"
|
| 5 |
+
|
| 6 |
+
#include <boost/foreach.hpp>
|
| 7 |
+
|
| 8 |
+
using namespace std;
|
| 9 |
+
|
| 10 |
+
namespace Moses
|
| 11 |
+
{
|
| 12 |
+
/**
|
| 13 |
+
* Organizing main function
|
| 14 |
+
*
|
| 15 |
+
* /param source input sentence
|
| 16 |
+
* /param transOptColl collection of translation options to be used for this sentence
|
| 17 |
+
*/
|
| 18 |
+
SearchNormal::
|
| 19 |
+
SearchNormal(Manager& manager, const TranslationOptionCollection &transOptColl)
|
| 20 |
+
: Search(manager)
|
| 21 |
+
, m_hypoStackColl(manager.GetSource().GetSize() + 1)
|
| 22 |
+
, m_transOptColl(transOptColl)
|
| 23 |
+
{
|
| 24 |
+
VERBOSE(1, "Translating: " << m_source << endl);
|
| 25 |
+
|
| 26 |
+
// initialize the stacks: create data structure and set limits
|
| 27 |
+
std::vector < HypothesisStackNormal >::iterator iterStack;
|
| 28 |
+
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
|
| 29 |
+
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
|
| 30 |
+
sourceHypoColl->SetMaxHypoStackSize(this->m_options.search.stack_size,
|
| 31 |
+
this->m_options.search.stack_diversity);
|
| 32 |
+
sourceHypoColl->SetBeamWidth(this->m_options.search.beam_width);
|
| 33 |
+
m_hypoStackColl[ind] = sourceHypoColl;
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
SearchNormal::~SearchNormal()
|
| 38 |
+
{
|
| 39 |
+
RemoveAllInColl(m_hypoStackColl);
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
bool
|
| 44 |
+
SearchNormal::
|
| 45 |
+
ProcessOneStack(HypothesisStack* hstack)
|
| 46 |
+
{
|
| 47 |
+
if (this->out_of_time()) return false;
|
| 48 |
+
SentenceStats &stats = m_manager.GetSentenceStats();
|
| 49 |
+
HypothesisStackNormal &sourceHypoColl
|
| 50 |
+
= *static_cast<HypothesisStackNormal*>(hstack);
|
| 51 |
+
|
| 52 |
+
// the stack is pruned before processing (lazy pruning):
|
| 53 |
+
VERBOSE(3,"processing hypothesis from next stack");
|
| 54 |
+
IFVERBOSE(2) stats.StartTimeStack();
|
| 55 |
+
sourceHypoColl.PruneToSize(m_options.search.stack_size);
|
| 56 |
+
VERBOSE(3,std::endl);
|
| 57 |
+
sourceHypoColl.CleanupArcList();
|
| 58 |
+
IFVERBOSE(2) stats.StopTimeStack();
|
| 59 |
+
|
| 60 |
+
// go through each hypothesis on the stack and try to expand it
|
| 61 |
+
// BOOST_FOREACH(Hypothesis* h, sourceHypoColl)
|
| 62 |
+
HypothesisStackNormal::const_iterator h;
|
| 63 |
+
for (h = sourceHypoColl.begin(); h != sourceHypoColl.end(); ++h)
|
| 64 |
+
ProcessOneHypothesis(**h);
|
| 65 |
+
return true;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
/**
|
| 70 |
+
* Main decoder loop that translates a sentence by expanding
|
| 71 |
+
* hypotheses stack by stack, until the end of the sentence.
|
| 72 |
+
*/
|
| 73 |
+
void SearchNormal::Decode()
|
| 74 |
+
{
|
| 75 |
+
// initial seed hypothesis: nothing translated, no words produced
|
| 76 |
+
const Bitmap &initBitmap = m_bitmaps.GetInitialBitmap();
|
| 77 |
+
Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap, m_manager.GetNextHypoId());
|
| 78 |
+
|
| 79 |
+
m_hypoStackColl[0]->AddPrune(hypo);
|
| 80 |
+
|
| 81 |
+
// go through each stack
|
| 82 |
+
BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
|
| 83 |
+
if (!ProcessOneStack(hstack)) return;
|
| 84 |
+
IFVERBOSE(2) OutputHypoStackSize();
|
| 85 |
+
actual_hypoStack = static_cast<HypothesisStackNormal*>(hstack);
|
| 86 |
+
}
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
/** Find all translation options to expand one hypothesis, trigger expansion
|
| 91 |
+
* this is mostly a check for overlap with already covered words, and for
|
| 92 |
+
* violation of reordering limits.
|
| 93 |
+
* \param hypothesis hypothesis to be expanded upon
|
| 94 |
+
*/
|
| 95 |
+
void
|
| 96 |
+
SearchNormal::
|
| 97 |
+
ProcessOneHypothesis(const Hypothesis &hypothesis)
|
| 98 |
+
{
|
| 99 |
+
// since we check for reordering limits, its good to have that limit handy
|
| 100 |
+
bool isWordLattice = m_source.GetType() == WordLatticeInput;
|
| 101 |
+
|
| 102 |
+
const Bitmap &hypoBitmap = hypothesis.GetWordsBitmap();
|
| 103 |
+
const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
|
| 104 |
+
size_t const sourceSize = m_source.GetSize();
|
| 105 |
+
|
| 106 |
+
ReorderingConstraint const&
|
| 107 |
+
ReoConstraint = m_source.GetReorderingConstraint();
|
| 108 |
+
|
| 109 |
+
// no limit of reordering: only check for overlap
|
| 110 |
+
if (m_options.reordering.max_distortion < 0) {
|
| 111 |
+
|
| 112 |
+
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
|
| 113 |
+
TranslationOptionList const* tol;
|
| 114 |
+
size_t endPos = startPos;
|
| 115 |
+
for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
|
| 116 |
+
tol && endPos < sourceSize;
|
| 117 |
+
tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos)) {
|
| 118 |
+
if (tol->size() == 0
|
| 119 |
+
|| hypoBitmap.Overlap(Range(startPos, endPos))
|
| 120 |
+
|| !ReoConstraint.Check(hypoBitmap, startPos, endPos)) {
|
| 121 |
+
continue;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
//TODO: does this method include incompatible WordLattice hypotheses?
|
| 125 |
+
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
return; // done with special case (no reordering limit)
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
// There are reordering limits. Make sure they are not violated.
|
| 132 |
+
|
| 133 |
+
Range prevRange = hypothesis.GetCurrSourceWordsRange();
|
| 134 |
+
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
|
| 135 |
+
|
| 136 |
+
// don't bother expanding phrases if the first position is already taken
|
| 137 |
+
if(hypoBitmap.GetValue(startPos)) continue;
|
| 138 |
+
|
| 139 |
+
size_t maxSize = sourceSize - startPos;
|
| 140 |
+
size_t maxSizePhrase = m_options.search.max_phrase_length;
|
| 141 |
+
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
|
| 142 |
+
size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
|
| 143 |
+
|
| 144 |
+
if (isWordLattice) {
|
| 145 |
+
// first question: is there a path from the closest translated word to the left
|
| 146 |
+
// of the hypothesized extension to the start of the hypothesized extension?
|
| 147 |
+
// long version:
|
| 148 |
+
// - is there anything to our left?
|
| 149 |
+
// - is it farther left than where we're starting anyway?
|
| 150 |
+
// - can we get to it?
|
| 151 |
+
|
| 152 |
+
// closestLeft is exclusive: a value of 3 means 2 is covered, our
|
| 153 |
+
// arc is currently ENDING at 3 and can start at 3 implicitly
|
| 154 |
+
if (closestLeft != 0 && closestLeft != startPos
|
| 155 |
+
&& !m_source.CanIGetFromAToB(closestLeft, startPos))
|
| 156 |
+
continue;
|
| 157 |
+
|
| 158 |
+
if (prevRange.GetStartPos() != NOT_FOUND &&
|
| 159 |
+
prevRange.GetStartPos() > startPos &&
|
| 160 |
+
!m_source.CanIGetFromAToB(startPos, prevRange.GetStartPos()))
|
| 161 |
+
continue;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
Range currentStartRange(startPos, startPos);
|
| 165 |
+
if(m_source.ComputeDistortionDistance(prevRange, currentStartRange)
|
| 166 |
+
> m_options.reordering.max_distortion)
|
| 167 |
+
continue;
|
| 168 |
+
|
| 169 |
+
TranslationOptionList const* tol;
|
| 170 |
+
size_t endPos = startPos;
|
| 171 |
+
for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
|
| 172 |
+
tol && endPos < sourceSize;
|
| 173 |
+
tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos)) {
|
| 174 |
+
Range extRange(startPos, endPos);
|
| 175 |
+
if (tol->size() == 0
|
| 176 |
+
|| hypoBitmap.Overlap(extRange)
|
| 177 |
+
|| !ReoConstraint.Check(hypoBitmap, startPos, endPos)
|
| 178 |
+
|| (isWordLattice && !m_source.IsCoveragePossible(extRange))) {
|
| 179 |
+
continue;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
// ask second question here: we already know we can get to our
|
| 183 |
+
// starting point from the closest thing to the left. We now ask the
|
| 184 |
+
// follow up: can we get from our end to the closest thing on the
|
| 185 |
+
// right?
|
| 186 |
+
//
|
| 187 |
+
// long version: is anything to our right? is it farther
|
| 188 |
+
// right than our (inclusive) end? can our end reach it?
|
| 189 |
+
bool isLeftMostEdge = (hypoFirstGapPos == startPos);
|
| 190 |
+
|
| 191 |
+
size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
|
| 192 |
+
if (isWordLattice) {
|
| 193 |
+
if (closestRight != endPos
|
| 194 |
+
&& ((closestRight + 1) < sourceSize)
|
| 195 |
+
&& !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1)) {
|
| 196 |
+
continue;
|
| 197 |
+
}
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
if (isLeftMostEdge) {
|
| 201 |
+
// any length extension is okay if starting at left-most edge
|
| 202 |
+
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
| 203 |
+
} else { // starting somewhere other than left-most edge, use caution
|
| 204 |
+
// the basic idea is this: we would like to translate a phrase
|
| 205 |
+
// starting from a position further right than the left-most
|
| 206 |
+
// open gap. The distortion penalty for the following phrase
|
| 207 |
+
// will be computed relative to the ending position of the
|
| 208 |
+
// current extension, so we ask now what its maximum value will
|
| 209 |
+
// be (which will always be the value of the hypothesis starting
|
| 210 |
+
// at the left-most edge). If this value is less than the
|
| 211 |
+
// distortion limit, we don't allow this extension to be made.
|
| 212 |
+
Range bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
|
| 213 |
+
|
| 214 |
+
if (m_source.ComputeDistortionDistance(extRange, bestNextExtension)
|
| 215 |
+
> m_options.reordering.max_distortion) continue;
|
| 216 |
+
|
| 217 |
+
// everything is fine, we're good to go
|
| 218 |
+
ExpandAllHypotheses(hypothesis, startPos, endPos);
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
/**
|
| 226 |
+
* Expand a hypothesis given a list of translation options
|
| 227 |
+
* \param hypothesis hypothesis to be expanded upon
|
| 228 |
+
* \param startPos first word position of span covered
|
| 229 |
+
* \param endPos last word position of span covered
|
| 230 |
+
*/
|
| 231 |
+
|
| 232 |
+
void
|
| 233 |
+
SearchNormal::
|
| 234 |
+
ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
|
| 235 |
+
{
|
| 236 |
+
// early discarding: check if hypothesis is too bad to build
|
| 237 |
+
// this idea is explained in (Moore&Quirk, MT Summit 2007)
|
| 238 |
+
float expectedScore = 0.0f;
|
| 239 |
+
|
| 240 |
+
const Bitmap &sourceCompleted = hypothesis.GetWordsBitmap();
|
| 241 |
+
float estimatedScore = m_transOptColl.GetEstimatedScores().CalcEstimatedScore( sourceCompleted, startPos, endPos );
|
| 242 |
+
|
| 243 |
+
const Range &hypoRange = hypothesis.GetCurrSourceWordsRange();
|
| 244 |
+
//cerr << "DOING " << sourceCompleted << " [" << hypoRange.GetStartPos() << " " << hypoRange.GetEndPos() << "]"
|
| 245 |
+
// " [" << startPos << " " << endPos << "]" << endl;
|
| 246 |
+
|
| 247 |
+
if (m_options.search.UseEarlyDiscarding()) {
|
| 248 |
+
// expected score is based on score of current hypothesis
|
| 249 |
+
expectedScore = hypothesis.GetScore();
|
| 250 |
+
|
| 251 |
+
// add new future score estimate
|
| 252 |
+
expectedScore += estimatedScore;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
// loop through all translation options
|
| 256 |
+
const TranslationOptionList* tol
|
| 257 |
+
= m_transOptColl.GetTranslationOptionList(startPos, endPos);
|
| 258 |
+
if (!tol || tol->size() == 0) return;
|
| 259 |
+
|
| 260 |
+
// Create new bitmap
|
| 261 |
+
const TranslationOption &transOpt = **tol->begin();
|
| 262 |
+
const Range &nextRange = transOpt.GetSourceWordsRange();
|
| 263 |
+
const Bitmap &nextBitmap = m_bitmaps.GetBitmap(sourceCompleted, nextRange);
|
| 264 |
+
|
| 265 |
+
TranslationOptionList::const_iterator iter;
|
| 266 |
+
for (iter = tol->begin() ; iter != tol->end() ; ++iter) {
|
| 267 |
+
const TranslationOption &transOpt = **iter;
|
| 268 |
+
ExpandHypothesis(hypothesis, transOpt, expectedScore, estimatedScore, nextBitmap);
|
| 269 |
+
}
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
/**
|
| 273 |
+
* Expand one hypothesis with a translation option.
|
| 274 |
+
* this involves initial creation, scoring and adding it to the proper stack
|
| 275 |
+
* \param hypothesis hypothesis to be expanded upon
|
| 276 |
+
* \param transOpt translation option (phrase translation)
|
| 277 |
+
* that is applied to create the new hypothesis
|
| 278 |
+
* \param expectedScore base score for early discarding
|
| 279 |
+
* (base hypothesis score plus future score estimation)
|
| 280 |
+
*/
|
| 281 |
+
void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
|
| 282 |
+
const TranslationOption &transOpt,
|
| 283 |
+
float expectedScore,
|
| 284 |
+
float estimatedScore,
|
| 285 |
+
const Bitmap &bitmap)
|
| 286 |
+
{
|
| 287 |
+
SentenceStats &stats = m_manager.GetSentenceStats();
|
| 288 |
+
|
| 289 |
+
Hypothesis *newHypo;
|
| 290 |
+
if (! m_options.search.UseEarlyDiscarding()) {
|
| 291 |
+
// simple build, no questions asked
|
| 292 |
+
IFVERBOSE(2) {
|
| 293 |
+
stats.StartTimeBuildHyp();
|
| 294 |
+
}
|
| 295 |
+
newHypo = new Hypothesis(hypothesis, transOpt, bitmap, m_manager.GetNextHypoId());
|
| 296 |
+
IFVERBOSE(2) {
|
| 297 |
+
stats.StopTimeBuildHyp();
|
| 298 |
+
}
|
| 299 |
+
if (newHypo==NULL) return;
|
| 300 |
+
|
| 301 |
+
IFVERBOSE(2) {
|
| 302 |
+
m_manager.GetSentenceStats().StartTimeOtherScore();
|
| 303 |
+
}
|
| 304 |
+
newHypo->EvaluateWhenApplied(estimatedScore);
|
| 305 |
+
IFVERBOSE(2) {
|
| 306 |
+
m_manager.GetSentenceStats().StopTimeOtherScore();
|
| 307 |
+
|
| 308 |
+
// TODO: these have been meaningless for a while.
|
| 309 |
+
// At least since commit 67fb5c
|
| 310 |
+
// should now be measured in SearchNormal.cpp:254 instead, around CalcFutureScore2()
|
| 311 |
+
// CalcFutureScore2() also called in BackwardsEdge::Initialize().
|
| 312 |
+
//
|
| 313 |
+
// however, CalcFutureScore2() should be quick
|
| 314 |
+
// since it uses dynamic programming results in SquareMatrix
|
| 315 |
+
m_manager.GetSentenceStats().StartTimeEstimateScore();
|
| 316 |
+
m_manager.GetSentenceStats().StopTimeEstimateScore();
|
| 317 |
+
}
|
| 318 |
+
} else
|
| 319 |
+
// early discarding: check if hypothesis is too bad to build
|
| 320 |
+
{
|
| 321 |
+
// worst possible score may have changed -> recompute
|
| 322 |
+
size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize();
|
| 323 |
+
float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore();
|
| 324 |
+
if (m_options.search.stack_diversity) {
|
| 325 |
+
WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos());
|
| 326 |
+
float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
|
| 327 |
+
allowedScore = std::min( allowedScore, allowedScoreForBitmap );
|
| 328 |
+
}
|
| 329 |
+
allowedScore += m_options.search.early_discarding_threshold;
|
| 330 |
+
|
| 331 |
+
// add expected score of translation option
|
| 332 |
+
expectedScore += transOpt.GetFutureScore();
|
| 333 |
+
|
| 334 |
+
// check if transOpt score push it already below limit
|
| 335 |
+
if (expectedScore < allowedScore) {
|
| 336 |
+
IFVERBOSE(2) {
|
| 337 |
+
stats.AddNotBuilt();
|
| 338 |
+
}
|
| 339 |
+
return;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
// build the hypothesis without scoring
|
| 343 |
+
IFVERBOSE(2) {
|
| 344 |
+
stats.StartTimeBuildHyp();
|
| 345 |
+
}
|
| 346 |
+
newHypo = new Hypothesis(hypothesis, transOpt, bitmap, m_manager.GetNextHypoId());
|
| 347 |
+
if (newHypo==NULL) return;
|
| 348 |
+
IFVERBOSE(2) {
|
| 349 |
+
stats.StopTimeBuildHyp();
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
// ... and check if that is below the limit
|
| 353 |
+
if (expectedScore < allowedScore) {
|
| 354 |
+
IFVERBOSE(2) {
|
| 355 |
+
stats.AddEarlyDiscarded();
|
| 356 |
+
}
|
| 357 |
+
delete newHypo;
|
| 358 |
+
return;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
// logging for the curious
|
| 364 |
+
IFVERBOSE(3) {
|
| 365 |
+
newHypo->PrintHypothesis();
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
// add to hypothesis stack
|
| 369 |
+
size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();
|
| 370 |
+
IFVERBOSE(2) {
|
| 371 |
+
stats.StartTimeStack();
|
| 372 |
+
}
|
| 373 |
+
m_hypoStackColl[wordsTranslated]->AddPrune(newHypo);
|
| 374 |
+
IFVERBOSE(2) {
|
| 375 |
+
stats.StopTimeStack();
|
| 376 |
+
}
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
const std::vector < HypothesisStack* >& SearchNormal::GetHypothesisStacks() const
|
| 380 |
+
{
|
| 381 |
+
return m_hypoStackColl;
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
/**
|
| 385 |
+
* Find best hypothesis on the last stack.
|
| 386 |
+
* This is the end point of the best translation, which can be traced back from here
|
| 387 |
+
*/
|
| 388 |
+
const Hypothesis *SearchNormal::GetBestHypothesis() const
|
| 389 |
+
{
|
| 390 |
+
if (interrupted_flag == 0) {
|
| 391 |
+
const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back());
|
| 392 |
+
return hypoColl.GetBestHypothesis();
|
| 393 |
+
} else {
|
| 394 |
+
const HypothesisStackNormal &hypoColl = *actual_hypoStack;
|
| 395 |
+
return hypoColl.GetBestHypothesis();
|
| 396 |
+
}
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
/**
|
| 400 |
+
* Logging of hypothesis stack sizes
|
| 401 |
+
*/
|
| 402 |
+
void SearchNormal::OutputHypoStackSize()
|
| 403 |
+
{
|
| 404 |
+
std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin();
|
| 405 |
+
TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size());
|
| 406 |
+
for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack) {
|
| 407 |
+
TRACE_ERR( ", " << (int)(*iterStack)->size());
|
| 408 |
+
}
|
| 409 |
+
TRACE_ERR( endl);
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
void SearchNormal::OutputHypoStack()
|
| 413 |
+
{
|
| 414 |
+
// all stacks
|
| 415 |
+
int i = 0;
|
| 416 |
+
vector < HypothesisStack* >::iterator iterStack;
|
| 417 |
+
for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
|
| 418 |
+
HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
|
| 419 |
+
TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
|
| 420 |
+
}
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
}
|
mosesdecoder/moses/SquareMatrix.cpp
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2006 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#include <string>
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include "SquareMatrix.h"
|
| 26 |
+
#include "TypeDef.h"
|
| 27 |
+
#include "Util.h"
|
| 28 |
+
|
| 29 |
+
using namespace std;
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
void SquareMatrix::InitTriangle(float val)
|
| 34 |
+
{
|
| 35 |
+
for(size_t row=0; row < m_size; row++) {
|
| 36 |
+
for(size_t col=row; col<m_size; col++) {
|
| 37 |
+
SetScore(row, col, -numeric_limits<float>::infinity());
|
| 38 |
+
}
|
| 39 |
+
}
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
/**
|
| 43 |
+
* Calculate future score estimate for a given coverage bitmap
|
| 44 |
+
*
|
| 45 |
+
* /param bitmap coverage bitmap
|
| 46 |
+
*/
|
| 47 |
+
|
| 48 |
+
float SquareMatrix::CalcEstimatedScore( Bitmap const &bitmap ) const
|
| 49 |
+
{
|
| 50 |
+
const size_t notInGap= numeric_limits<size_t>::max();
|
| 51 |
+
size_t startGap = notInGap;
|
| 52 |
+
float estimatedScore = 0.0f;
|
| 53 |
+
for(size_t currPos = 0 ; currPos < bitmap.GetSize() ; currPos++) {
|
| 54 |
+
// start of a new gap?
|
| 55 |
+
if(bitmap.GetValue(currPos) == false && startGap == notInGap) {
|
| 56 |
+
startGap = currPos;
|
| 57 |
+
}
|
| 58 |
+
// end of a gap?
|
| 59 |
+
else if(bitmap.GetValue(currPos) == true && startGap != notInGap) {
|
| 60 |
+
estimatedScore += GetScore(startGap, currPos - 1);
|
| 61 |
+
startGap = notInGap;
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
// coverage ending with gap?
|
| 65 |
+
if (startGap != notInGap) {
|
| 66 |
+
estimatedScore += GetScore(startGap, bitmap.GetSize() - 1);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
return estimatedScore;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/**
|
| 73 |
+
* Calculare future score estimate for a given coverage bitmap
|
| 74 |
+
* and an additional span that is also covered. This function is used
|
| 75 |
+
* to compute future score estimates for hypotheses that we may want
|
| 76 |
+
* build, but first want to check.
|
| 77 |
+
*
|
| 78 |
+
* Note: this function is implemented a bit more complex than
|
| 79 |
+
* the basic one (w/o additional phrase) for speed reasons,
|
| 80 |
+
* which is probably overkill.
|
| 81 |
+
*
|
| 82 |
+
* /param bitmap coverage bitmap
|
| 83 |
+
* /param startPos start of the span that is added to the coverage
|
| 84 |
+
* /param endPos end of the span that is added to the coverage
|
| 85 |
+
*/
|
| 86 |
+
|
| 87 |
+
float SquareMatrix::CalcEstimatedScore( Bitmap const &bitmap, size_t startPos, size_t endPos ) const
|
| 88 |
+
{
|
| 89 |
+
const size_t notInGap= numeric_limits<size_t>::max();
|
| 90 |
+
float estimatedScore = 0.0f;
|
| 91 |
+
size_t startGap = bitmap.GetFirstGapPos();
|
| 92 |
+
if (startGap == NOT_FOUND) return estimatedScore; // everything filled
|
| 93 |
+
|
| 94 |
+
// start loop at first gap
|
| 95 |
+
size_t startLoop = startGap+1;
|
| 96 |
+
if (startPos == startGap) { // unless covered by phrase
|
| 97 |
+
startGap = notInGap;
|
| 98 |
+
startLoop = endPos+1; // -> postpone start
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
size_t lastCovered = bitmap.GetLastPos();
|
| 102 |
+
if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;
|
| 103 |
+
|
| 104 |
+
for(size_t currPos = startLoop; currPos <= lastCovered ; currPos++) {
|
| 105 |
+
// start of a new gap?
|
| 106 |
+
if(startGap == notInGap && bitmap.GetValue(currPos) == false && (currPos < startPos || currPos > endPos)) {
|
| 107 |
+
startGap = currPos;
|
| 108 |
+
}
|
| 109 |
+
// end of a gap?
|
| 110 |
+
else if(startGap != notInGap && (bitmap.GetValue(currPos) == true || (startPos <= currPos && currPos <= endPos))) {
|
| 111 |
+
estimatedScore += GetScore(startGap, currPos - 1);
|
| 112 |
+
startGap = notInGap;
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
// coverage ending with gap?
|
| 116 |
+
if (lastCovered != bitmap.GetSize() - 1) {
|
| 117 |
+
estimatedScore += GetScore(lastCovered+1, bitmap.GetSize() - 1);
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
return estimatedScore;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
TO_STRING_BODY(SquareMatrix);
|
| 124 |
+
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
|
mosesdecoder/moses/StaticData.cpp
ADDED
|
@@ -0,0 +1,966 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
| 2 |
+
// $Id$
|
| 3 |
+
// vim:tabstop=2
|
| 4 |
+
|
| 5 |
+
/***********************************************************************
|
| 6 |
+
Moses - factored phrase-based language decoder
|
| 7 |
+
Copyright (C) 2006 University of Edinburgh
|
| 8 |
+
|
| 9 |
+
This library is free software; you can redistribute it and/or
|
| 10 |
+
modify it under the terms of the GNU Lesser General Public
|
| 11 |
+
License as published by the Free Software Foundation; either
|
| 12 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 13 |
+
|
| 14 |
+
This library is distributed in the hope that it will be useful,
|
| 15 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 16 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 17 |
+
Lesser General Public License for more details.
|
| 18 |
+
|
| 19 |
+
You should have received a copy of the GNU Lesser General Public
|
| 20 |
+
License along with this library; if not, write to the Free Software
|
| 21 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 22 |
+
***********************************************************************/
|
| 23 |
+
|
| 24 |
+
#include <string>
|
| 25 |
+
#include <boost/algorithm/string/predicate.hpp>
|
| 26 |
+
|
| 27 |
+
#include "moses/FF/Factory.h"
|
| 28 |
+
#include "TypeDef.h"
|
| 29 |
+
#include "moses/FF/WordPenaltyProducer.h"
|
| 30 |
+
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
| 31 |
+
#include "moses/FF/InputFeature.h"
|
| 32 |
+
#include "moses/FF/DynamicCacheBasedLanguageModel.h"
|
| 33 |
+
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
|
| 34 |
+
|
| 35 |
+
#include "DecodeStepTranslation.h"
|
| 36 |
+
#include "DecodeStepGeneration.h"
|
| 37 |
+
#include "GenerationDictionary.h"
|
| 38 |
+
#include "StaticData.h"
|
| 39 |
+
#include "Util.h"
|
| 40 |
+
#include "FactorCollection.h"
|
| 41 |
+
#include "Timer.h"
|
| 42 |
+
#include "TranslationOption.h"
|
| 43 |
+
#include "DecodeGraph.h"
|
| 44 |
+
#include "InputFileStream.h"
|
| 45 |
+
#include "ScoreComponentCollection.h"
|
| 46 |
+
#include "DecodeGraph.h"
|
| 47 |
+
#include "TranslationModel/PhraseDictionary.h"
|
| 48 |
+
#include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
| 49 |
+
|
| 50 |
+
#ifdef WITH_THREADS
|
| 51 |
+
#include <boost/thread.hpp>
|
| 52 |
+
#endif
|
| 53 |
+
#ifdef HAVE_CMPH
|
| 54 |
+
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
|
| 55 |
+
#endif
|
| 56 |
+
#if defined HAVE_CMPH
|
| 57 |
+
#include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
|
| 58 |
+
#endif
|
| 59 |
+
|
| 60 |
+
using namespace std;
|
| 61 |
+
using namespace boost::algorithm;
|
| 62 |
+
|
| 63 |
+
namespace Moses
|
| 64 |
+
{
|
| 65 |
+
StaticData StaticData::s_instance;
|
| 66 |
+
|
| 67 |
+
StaticData::StaticData()
|
| 68 |
+
: m_options(new AllOptions)
|
| 69 |
+
, m_requireSortingAfterSourceContext(false)
|
| 70 |
+
, m_currentWeightSetting("default")
|
| 71 |
+
, m_treeStructure(NULL)
|
| 72 |
+
, m_coordSpaceNextID(1)
|
| 73 |
+
{
|
| 74 |
+
Phrase::InitializeMemPool();
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
StaticData::~StaticData()
|
| 78 |
+
{
|
| 79 |
+
RemoveAllInColl(m_decodeGraphs);
|
| 80 |
+
Phrase::FinalizeMemPool();
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
bool StaticData::LoadDataStatic(Parameter *parameter, const std::string &execPath)
|
| 84 |
+
{
|
| 85 |
+
s_instance.SetExecPath(execPath);
|
| 86 |
+
return s_instance.LoadData(parameter);
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
void
|
| 90 |
+
StaticData
|
| 91 |
+
::initialize_features()
|
| 92 |
+
{
|
| 93 |
+
std::map<std::string, std::string> featureNameOverride = OverrideFeatureNames();
|
| 94 |
+
// all features
|
| 95 |
+
map<string, int> featureIndexMap;
|
| 96 |
+
|
| 97 |
+
const PARAM_VEC* params = m_parameter->GetParam("feature");
|
| 98 |
+
for (size_t i = 0; params && i < params->size(); ++i) {
|
| 99 |
+
const string &line = Trim(params->at(i));
|
| 100 |
+
VERBOSE(1,"line=" << line << endl);
|
| 101 |
+
if (line.empty())
|
| 102 |
+
continue;
|
| 103 |
+
|
| 104 |
+
vector<string> toks = Tokenize(line);
|
| 105 |
+
|
| 106 |
+
string &feature = toks[0];
|
| 107 |
+
std::map<std::string, std::string>::const_iterator iter
|
| 108 |
+
= featureNameOverride.find(feature);
|
| 109 |
+
if (iter == featureNameOverride.end()) {
|
| 110 |
+
// feature name not override
|
| 111 |
+
m_registry.Construct(feature, line);
|
| 112 |
+
} else {
|
| 113 |
+
// replace feature name with new name
|
| 114 |
+
string newName = iter->second;
|
| 115 |
+
feature = newName;
|
| 116 |
+
string newLine = Join(" ", toks);
|
| 117 |
+
m_registry.Construct(newName, newLine);
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
NoCache();
|
| 122 |
+
OverrideFeatures();
|
| 123 |
+
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
bool
|
| 127 |
+
StaticData
|
| 128 |
+
::ini_output_options()
|
| 129 |
+
{
|
| 130 |
+
// verbose level
|
| 131 |
+
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
| 132 |
+
m_parameter->SetParameter<string>(m_outputUnknownsFile,
|
| 133 |
+
"output-unknowns", "");
|
| 134 |
+
return true;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
// threads, timeouts, etc.
|
| 138 |
+
bool
|
| 139 |
+
StaticData
|
| 140 |
+
::ini_performance_options()
|
| 141 |
+
{
|
| 142 |
+
const PARAM_VEC *params;
|
| 143 |
+
|
| 144 |
+
m_threadCount = 1;
|
| 145 |
+
params = m_parameter->GetParam("threads");
|
| 146 |
+
if (params && params->size()) {
|
| 147 |
+
if (params->at(0) == "all") {
|
| 148 |
+
#ifdef WITH_THREADS
|
| 149 |
+
m_threadCount = boost::thread::hardware_concurrency();
|
| 150 |
+
if (!m_threadCount) {
|
| 151 |
+
std::cerr << "-threads all specified but Boost doesn't know how many cores there are";
|
| 152 |
+
return false;
|
| 153 |
+
}
|
| 154 |
+
#else
|
| 155 |
+
std::cerr << "-threads all specified but moses not built with thread support";
|
| 156 |
+
return false;
|
| 157 |
+
#endif
|
| 158 |
+
} else {
|
| 159 |
+
m_threadCount = Scan<int>(params->at(0));
|
| 160 |
+
if (m_threadCount < 1) {
|
| 161 |
+
std::cerr << "Specify at least one thread.";
|
| 162 |
+
return false;
|
| 163 |
+
}
|
| 164 |
+
#ifndef WITH_THREADS
|
| 165 |
+
if (m_threadCount > 1) {
|
| 166 |
+
std::cerr << "Error: Thread count of " << params->at(0)
|
| 167 |
+
<< " but moses not built with thread support";
|
| 168 |
+
return false;
|
| 169 |
+
}
|
| 170 |
+
#endif
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
return true;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
bool StaticData::LoadData(Parameter *parameter)
|
| 177 |
+
{
|
| 178 |
+
m_parameter = parameter;
|
| 179 |
+
|
| 180 |
+
const PARAM_VEC *params;
|
| 181 |
+
|
| 182 |
+
m_options->init(*parameter);
|
| 183 |
+
if (is_syntax(m_options->search.algo))
|
| 184 |
+
m_options->syntax.LoadNonTerminals(*parameter, FactorCollection::Instance());
|
| 185 |
+
|
| 186 |
+
if (is_syntax(m_options->search.algo))
|
| 187 |
+
LoadChartDecodingParameters();
|
| 188 |
+
|
| 189 |
+
// ORDER HERE MATTERS, SO DON'T CHANGE IT UNLESS YOU KNOW WHAT YOU ARE DOING!
|
| 190 |
+
// input, output
|
| 191 |
+
|
| 192 |
+
m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
|
| 193 |
+
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
|
| 194 |
+
|
| 195 |
+
m_bookkeeping_options.init(*parameter);
|
| 196 |
+
if (!ini_output_options()) return false;
|
| 197 |
+
|
| 198 |
+
// threading etc.
|
| 199 |
+
if (!ini_performance_options()) return false;
|
| 200 |
+
|
| 201 |
+
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
| 202 |
+
|
| 203 |
+
// set class-specific default parameters
|
| 204 |
+
#if defined HAVE_CMPH
|
| 205 |
+
LexicalReorderingTableCompact::SetStaticDefaultParameters(*parameter);
|
| 206 |
+
PhraseDictionaryCompact::SetStaticDefaultParameters(*parameter);
|
| 207 |
+
#endif
|
| 208 |
+
|
| 209 |
+
initialize_features();
|
| 210 |
+
|
| 211 |
+
if (m_parameter->GetParam("show-weights") == NULL)
|
| 212 |
+
LoadFeatureFunctions();
|
| 213 |
+
|
| 214 |
+
LoadDecodeGraphs();
|
| 215 |
+
|
| 216 |
+
// sanity check that there are no weights without an associated FF
|
| 217 |
+
if (!CheckWeights()) return false;
|
| 218 |
+
|
| 219 |
+
//Load extra feature weights
|
| 220 |
+
string weightFile;
|
| 221 |
+
m_parameter->SetParameter<string>(weightFile, "weight-file", "");
|
| 222 |
+
if (!weightFile.empty()) {
|
| 223 |
+
ScoreComponentCollection extraWeights;
|
| 224 |
+
if (!extraWeights.Load(weightFile)) {
|
| 225 |
+
std::cerr << "Unable to load weights from " << weightFile;
|
| 226 |
+
return false;
|
| 227 |
+
}
|
| 228 |
+
m_allWeights.PlusEquals(extraWeights);
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
//Load sparse features from config (overrules weight file)
|
| 232 |
+
LoadSparseWeightsFromConfig();
|
| 233 |
+
|
| 234 |
+
// load alternate weight settings
|
| 235 |
+
//
|
| 236 |
+
// When and where are these used??? [UG]
|
| 237 |
+
//
|
| 238 |
+
// Update: Just checked the manual. The config file is NOT the right
|
| 239 |
+
// place to do this. [UG]
|
| 240 |
+
//
|
| 241 |
+
// <TODO>
|
| 242 |
+
// * Eliminate alternate-weight-setting. Alternate weight settings should
|
| 243 |
+
// be provided with the input, not in the config file.
|
| 244 |
+
// </TODO>
|
| 245 |
+
params = m_parameter->GetParam("alternate-weight-setting");
|
| 246 |
+
if (params && params->size() && !LoadAlternateWeightSettings())
|
| 247 |
+
return false;
|
| 248 |
+
|
| 249 |
+
return true;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
void StaticData::SetWeight(const FeatureFunction* sp, float weight)
|
| 253 |
+
{
|
| 254 |
+
m_allWeights.Resize();
|
| 255 |
+
m_allWeights.Assign(sp,weight);
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
void StaticData::SetWeights(const FeatureFunction* sp,
|
| 259 |
+
const std::vector<float>& weights)
|
| 260 |
+
{
|
| 261 |
+
m_allWeights.Resize();
|
| 262 |
+
m_allWeights.Assign(sp,weights);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
void StaticData::LoadNonTerminals()
|
| 266 |
+
{
|
| 267 |
+
string defaultNonTerminals;
|
| 268 |
+
m_parameter->SetParameter<string>(defaultNonTerminals, "non-terminals", "X");
|
| 269 |
+
|
| 270 |
+
FactorCollection &factorCollection = FactorCollection::Instance();
|
| 271 |
+
|
| 272 |
+
m_inputDefaultNonTerminal.SetIsNonTerminal(true);
|
| 273 |
+
const Factor *sourceFactor = factorCollection.AddFactor(Input, 0, defaultNonTerminals, true);
|
| 274 |
+
m_inputDefaultNonTerminal.SetFactor(0, sourceFactor);
|
| 275 |
+
|
| 276 |
+
m_outputDefaultNonTerminal.SetIsNonTerminal(true);
|
| 277 |
+
const Factor *targetFactor = factorCollection.AddFactor(Output, 0, defaultNonTerminals, true);
|
| 278 |
+
m_outputDefaultNonTerminal.SetFactor(0, targetFactor);
|
| 279 |
+
|
| 280 |
+
// for unknown words
|
| 281 |
+
const PARAM_VEC *params = m_parameter->GetParam("unknown-lhs");
|
| 282 |
+
if (params == NULL || params->size() == 0) {
|
| 283 |
+
UnknownLHSEntry entry(defaultNonTerminals, 0.0f);
|
| 284 |
+
m_unknownLHS.push_back(entry);
|
| 285 |
+
} else {
|
| 286 |
+
const string &filePath = params->at(0);
|
| 287 |
+
|
| 288 |
+
InputFileStream inStream(filePath);
|
| 289 |
+
string line;
|
| 290 |
+
while(getline(inStream, line)) {
|
| 291 |
+
vector<string> tokens = Tokenize(line);
|
| 292 |
+
UTIL_THROW_IF2(tokens.size() != 2,
|
| 293 |
+
"Incorrect unknown LHS format: " << line);
|
| 294 |
+
UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
|
| 295 |
+
m_unknownLHS.push_back(entry);
|
| 296 |
+
// const Factor *targetFactor =
|
| 297 |
+
factorCollection.AddFactor(Output, 0, tokens[0], true);
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
void StaticData::LoadChartDecodingParameters()
|
| 305 |
+
{
|
| 306 |
+
LoadNonTerminals();
|
| 307 |
+
|
| 308 |
+
// source label overlap
|
| 309 |
+
m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
|
| 310 |
+
SourceLabelOverlapAdd);
|
| 311 |
+
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
void StaticData::LoadDecodeGraphs()
|
| 315 |
+
{
|
| 316 |
+
vector<string> mappingVector;
|
| 317 |
+
vector<size_t> maxChartSpans;
|
| 318 |
+
|
| 319 |
+
const PARAM_VEC *params;
|
| 320 |
+
|
| 321 |
+
params = m_parameter->GetParam("mapping");
|
| 322 |
+
if (params && params->size()) {
|
| 323 |
+
mappingVector = *params;
|
| 324 |
+
} else {
|
| 325 |
+
mappingVector.assign(1,"0 T 0");
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
params = m_parameter->GetParam("max-chart-span");
|
| 329 |
+
if (params && params->size()) {
|
| 330 |
+
maxChartSpans = Scan<size_t>(*params);
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
vector<string> toks = Tokenize(mappingVector[0]);
|
| 334 |
+
if (toks.size() == 3) {
|
| 335 |
+
// eg 0 T 0
|
| 336 |
+
LoadDecodeGraphsOld(mappingVector, maxChartSpans);
|
| 337 |
+
} else if (toks.size() == 2) {
|
| 338 |
+
if (toks[0] == "T" || toks[0] == "G") {
|
| 339 |
+
// eg. T 0
|
| 340 |
+
LoadDecodeGraphsOld(mappingVector, maxChartSpans);
|
| 341 |
+
} else {
|
| 342 |
+
// eg. 0 TM1
|
| 343 |
+
LoadDecodeGraphsNew(mappingVector, maxChartSpans);
|
| 344 |
+
}
|
| 345 |
+
} else {
|
| 346 |
+
UTIL_THROW(util::Exception, "Malformed mapping");
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
void
|
| 351 |
+
StaticData::
|
| 352 |
+
LoadDecodeGraphsOld(const vector<string> &mappingVector,
|
| 353 |
+
const vector<size_t> &maxChartSpans)
|
| 354 |
+
{
|
| 355 |
+
const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
|
| 356 |
+
const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
|
| 357 |
+
|
| 358 |
+
const std::vector<FeatureFunction*> *featuresRemaining
|
| 359 |
+
= &FeatureFunction::GetFeatureFunctions();
|
| 360 |
+
DecodeStep *prev = 0;
|
| 361 |
+
size_t prevDecodeGraphInd = 0;
|
| 362 |
+
|
| 363 |
+
for(size_t i=0; i<mappingVector.size(); i++) {
|
| 364 |
+
vector<string> token = Tokenize(mappingVector[i]);
|
| 365 |
+
size_t decodeGraphInd;
|
| 366 |
+
DecodeType decodeType;
|
| 367 |
+
size_t index;
|
| 368 |
+
if (token.size() == 2) {
|
| 369 |
+
// eg. T 0
|
| 370 |
+
decodeGraphInd = 0;
|
| 371 |
+
decodeType = token[0] == "T" ? Translate : Generate;
|
| 372 |
+
index = Scan<size_t>(token[1]);
|
| 373 |
+
} else if (token.size() == 3) {
|
| 374 |
+
// eg. 0 T 0
|
| 375 |
+
// For specifying multiple translation model
|
| 376 |
+
decodeGraphInd = Scan<size_t>(token[0]);
|
| 377 |
+
//the vectorList index can only increment by one
|
| 378 |
+
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
|
| 379 |
+
&& decodeGraphInd != prevDecodeGraphInd + 1,
|
| 380 |
+
"Malformed mapping");
|
| 381 |
+
if (decodeGraphInd > prevDecodeGraphInd) {
|
| 382 |
+
prev = NULL;
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
if (prevDecodeGraphInd < decodeGraphInd) {
|
| 386 |
+
featuresRemaining = &FeatureFunction::GetFeatureFunctions();
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
decodeType = token[1] == "T" ? Translate : Generate;
|
| 390 |
+
index = Scan<size_t>(token[2]);
|
| 391 |
+
} else {
|
| 392 |
+
UTIL_THROW(util::Exception, "Malformed mapping");
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
DecodeStep* decodeStep = NULL;
|
| 396 |
+
switch (decodeType) {
|
| 397 |
+
case Translate:
|
| 398 |
+
if(index>=pts.size()) {
|
| 399 |
+
util::StringStream strme;
|
| 400 |
+
strme << "No phrase dictionary with index "
|
| 401 |
+
<< index << " available!";
|
| 402 |
+
UTIL_THROW(util::Exception, strme.str());
|
| 403 |
+
}
|
| 404 |
+
decodeStep = new DecodeStepTranslation(pts[index], prev, *featuresRemaining);
|
| 405 |
+
break;
|
| 406 |
+
case Generate:
|
| 407 |
+
if(index>=gens.size()) {
|
| 408 |
+
util::StringStream strme;
|
| 409 |
+
strme << "No generation dictionary with index "
|
| 410 |
+
<< index << " available!";
|
| 411 |
+
UTIL_THROW(util::Exception, strme.str());
|
| 412 |
+
}
|
| 413 |
+
decodeStep = new DecodeStepGeneration(gens[index], prev, *featuresRemaining);
|
| 414 |
+
break;
|
| 415 |
+
default:
|
| 416 |
+
UTIL_THROW(util::Exception, "Unknown decode step");
|
| 417 |
+
break;
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
featuresRemaining = &decodeStep->GetFeaturesRemaining();
|
| 421 |
+
|
| 422 |
+
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
|
| 423 |
+
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
|
| 424 |
+
DecodeGraph *decodeGraph;
|
| 425 |
+
if (is_syntax(m_options->search.algo)) {
|
| 426 |
+
size_t maxChartSpan;
|
| 427 |
+
if (decodeGraphInd < maxChartSpans.size()) {
|
| 428 |
+
maxChartSpan = maxChartSpans[decodeGraphInd];
|
| 429 |
+
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
|
| 430 |
+
} else {
|
| 431 |
+
maxChartSpan = DEFAULT_MAX_CHART_SPAN;
|
| 432 |
+
}
|
| 433 |
+
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
|
| 434 |
+
} else {
|
| 435 |
+
decodeGraph = new DecodeGraph(m_decodeGraphs.size());
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
|
| 442 |
+
prev = decodeStep;
|
| 443 |
+
prevDecodeGraphInd = decodeGraphInd;
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
// set maximum n-gram size for backoff approach to decoding paths
|
| 447 |
+
// default is always use subsequent paths (value = 0)
|
| 448 |
+
// if specified, record maxmimum unseen n-gram size
|
| 449 |
+
const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
|
| 450 |
+
for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
|
| 451 |
+
DecodeGraph &decodeGraph = *m_decodeGraphs[i];
|
| 452 |
+
|
| 453 |
+
if (i < backoffVector->size()) {
|
| 454 |
+
decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
|
| 455 |
+
}
|
| 456 |
+
}
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans)
|
| 460 |
+
{
|
| 461 |
+
const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
|
| 462 |
+
DecodeStep *prev = 0;
|
| 463 |
+
size_t prevDecodeGraphInd = 0;
|
| 464 |
+
|
| 465 |
+
for(size_t i=0; i<mappingVector.size(); i++) {
|
| 466 |
+
vector<string> token = Tokenize(mappingVector[i]);
|
| 467 |
+
size_t decodeGraphInd;
|
| 468 |
+
|
| 469 |
+
decodeGraphInd = Scan<size_t>(token[0]);
|
| 470 |
+
//the vectorList index can only increment by one
|
| 471 |
+
UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
|
| 472 |
+
&& decodeGraphInd != prevDecodeGraphInd + 1,
|
| 473 |
+
"Malformed mapping");
|
| 474 |
+
if (decodeGraphInd > prevDecodeGraphInd) {
|
| 475 |
+
prev = NULL;
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
if (prevDecodeGraphInd < decodeGraphInd) {
|
| 479 |
+
featuresRemaining = &FeatureFunction::GetFeatureFunctions();
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
FeatureFunction &ff = FeatureFunction::FindFeatureFunction(token[1]);
|
| 483 |
+
|
| 484 |
+
DecodeStep* decodeStep = NULL;
|
| 485 |
+
if (typeid(ff) == typeid(PhraseDictionary)) {
|
| 486 |
+
decodeStep = new DecodeStepTranslation(&static_cast<PhraseDictionary&>(ff), prev, *featuresRemaining);
|
| 487 |
+
} else if (typeid(ff) == typeid(GenerationDictionary)) {
|
| 488 |
+
decodeStep = new DecodeStepGeneration(&static_cast<GenerationDictionary&>(ff), prev, *featuresRemaining);
|
| 489 |
+
} else {
|
| 490 |
+
UTIL_THROW(util::Exception, "Unknown decode step");
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
featuresRemaining = &decodeStep->GetFeaturesRemaining();
|
| 494 |
+
|
| 495 |
+
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
|
| 496 |
+
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
|
| 497 |
+
DecodeGraph *decodeGraph;
|
| 498 |
+
if (is_syntax(m_options->search.algo)) {
|
| 499 |
+
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
|
| 500 |
+
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
|
| 501 |
+
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
|
| 502 |
+
} else {
|
| 503 |
+
decodeGraph = new DecodeGraph(m_decodeGraphs.size());
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
|
| 510 |
+
prev = decodeStep;
|
| 511 |
+
prevDecodeGraphInd = decodeGraphInd;
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
// set maximum n-gram size for backoff approach to decoding paths
|
| 515 |
+
// default is always use subsequent paths (value = 0)
|
| 516 |
+
// if specified, record maxmimum unseen n-gram size
|
| 517 |
+
const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
|
| 518 |
+
for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
|
| 519 |
+
DecodeGraph &decodeGraph = *m_decodeGraphs[i];
|
| 520 |
+
|
| 521 |
+
if (i < backoffVector->size()) {
|
| 522 |
+
decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
|
| 523 |
+
}
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
}
|
| 527 |
+
|
| 528 |
+
void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
|
| 529 |
+
{
|
| 530 |
+
//loop over ScoreProducers to update weights of BleuScoreFeature
|
| 531 |
+
const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
|
| 532 |
+
for(size_t i=0; i<producers.size(); ++i) {
|
| 533 |
+
FeatureFunction *ff = producers[i];
|
| 534 |
+
std::string ffName = ff->GetScoreProducerDescription();
|
| 535 |
+
|
| 536 |
+
if (ffName == "BleuScoreFeature") {
|
| 537 |
+
SetWeight(ff, weight);
|
| 538 |
+
break;
|
| 539 |
+
}
|
| 540 |
+
}
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
// ScoreComponentCollection StaticData::GetAllWeightsScoreComponentCollection() const {}
|
| 544 |
+
// in ScoreComponentCollection.h
|
| 545 |
+
|
| 546 |
+
void StaticData::SetExecPath(const std::string &path)
|
| 547 |
+
{
|
| 548 |
+
// NOT TESTED
|
| 549 |
+
size_t pos = path.rfind("/");
|
| 550 |
+
if (pos != string::npos) {
|
| 551 |
+
m_binPath = path.substr(0, pos);
|
| 552 |
+
}
|
| 553 |
+
VERBOSE(1,m_binPath << endl);
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
const string &StaticData::GetBinDirectory() const
|
| 557 |
+
{
|
| 558 |
+
return m_binPath;
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
float StaticData::GetWeightWordPenalty() const
|
| 562 |
+
{
|
| 563 |
+
float weightWP = GetWeight(&WordPenaltyProducer::Instance());
|
| 564 |
+
return weightWP;
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
void
|
| 568 |
+
StaticData::
|
| 569 |
+
InitializeForInput(ttasksptr const& ttask) const
|
| 570 |
+
{
|
| 571 |
+
const std::vector<FeatureFunction*> &producers
|
| 572 |
+
= FeatureFunction::GetFeatureFunctions();
|
| 573 |
+
for(size_t i=0; i<producers.size(); ++i) {
|
| 574 |
+
FeatureFunction &ff = *producers[i];
|
| 575 |
+
if (! IsFeatureFunctionIgnored(ff)) {
|
| 576 |
+
Timer iTime;
|
| 577 |
+
iTime.start();
|
| 578 |
+
ff.InitializeForInput(ttask);
|
| 579 |
+
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription()
|
| 580 |
+
<< " )" << "= " << iTime << endl);
|
| 581 |
+
}
|
| 582 |
+
}
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
void
|
| 586 |
+
StaticData::
|
| 587 |
+
CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
|
| 588 |
+
{
|
| 589 |
+
const std::vector<FeatureFunction*> &producers
|
| 590 |
+
= FeatureFunction::GetFeatureFunctions();
|
| 591 |
+
for(size_t i=0; i<producers.size(); ++i) {
|
| 592 |
+
FeatureFunction &ff = *producers[i];
|
| 593 |
+
if (! IsFeatureFunctionIgnored(ff)) {
|
| 594 |
+
ff.CleanUpAfterSentenceProcessing(ttask);
|
| 595 |
+
}
|
| 596 |
+
}
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
void StaticData::LoadFeatureFunctions()
|
| 600 |
+
{
|
| 601 |
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
| 602 |
+
std::vector<FeatureFunction*>::const_iterator iter;
|
| 603 |
+
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
|
| 604 |
+
FeatureFunction *ff = *iter;
|
| 605 |
+
bool doLoad = true;
|
| 606 |
+
|
| 607 |
+
if (ff->RequireSortingAfterSourceContext()) {
|
| 608 |
+
m_requireSortingAfterSourceContext = true;
|
| 609 |
+
}
|
| 610 |
+
|
| 611 |
+
if (dynamic_cast<PhraseDictionary*>(ff)) {
|
| 612 |
+
doLoad = false;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
if (doLoad) {
|
| 616 |
+
VERBOSE(1, "Loading " << ff->GetScoreProducerDescription() << endl);
|
| 617 |
+
ff->Load(options());
|
| 618 |
+
}
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
|
| 622 |
+
for (size_t i = 0; i < pts.size(); ++i) {
|
| 623 |
+
PhraseDictionary *pt = pts[i];
|
| 624 |
+
VERBOSE(1, "Loading " << pt->GetScoreProducerDescription() << endl);
|
| 625 |
+
pt->Load(options());
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
CheckLEGACYPT();
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
bool StaticData::CheckWeights() const
|
| 632 |
+
{
|
| 633 |
+
set<string> weightNames = m_parameter->GetWeightNames();
|
| 634 |
+
set<string> featureNames;
|
| 635 |
+
|
| 636 |
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
| 637 |
+
for (size_t i = 0; i < ffs.size(); ++i) {
|
| 638 |
+
const FeatureFunction &ff = *ffs[i];
|
| 639 |
+
const string &descr = ff.GetScoreProducerDescription();
|
| 640 |
+
featureNames.insert(descr);
|
| 641 |
+
|
| 642 |
+
set<string>::iterator iter = weightNames.find(descr);
|
| 643 |
+
if (iter == weightNames.end()) {
|
| 644 |
+
cerr << "Can't find weights for feature function " << descr << endl;
|
| 645 |
+
} else {
|
| 646 |
+
weightNames.erase(iter);
|
| 647 |
+
}
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
//sparse features
|
| 651 |
+
if (!weightNames.empty()) {
|
| 652 |
+
set<string>::iterator iter;
|
| 653 |
+
for (iter = weightNames.begin(); iter != weightNames.end(); ) {
|
| 654 |
+
string fname = (*iter).substr(0, (*iter).find("_"));
|
| 655 |
+
VERBOSE(1,fname << "\n");
|
| 656 |
+
if (featureNames.find(fname) != featureNames.end()) {
|
| 657 |
+
weightNames.erase(iter++);
|
| 658 |
+
} else {
|
| 659 |
+
++iter;
|
| 660 |
+
}
|
| 661 |
+
}
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
if (!weightNames.empty()) {
|
| 665 |
+
cerr << "The following weights have no feature function. "
|
| 666 |
+
<< "Maybe incorrectly spelt weights: ";
|
| 667 |
+
set<string>::iterator iter;
|
| 668 |
+
for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) {
|
| 669 |
+
cerr << *iter << ",";
|
| 670 |
+
}
|
| 671 |
+
return false;
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
return true;
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
|
| 678 |
+
void StaticData::LoadSparseWeightsFromConfig()
|
| 679 |
+
{
|
| 680 |
+
set<string> featureNames;
|
| 681 |
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
| 682 |
+
for (size_t i = 0; i < ffs.size(); ++i) {
|
| 683 |
+
const FeatureFunction &ff = *ffs[i];
|
| 684 |
+
const string &descr = ff.GetScoreProducerDescription();
|
| 685 |
+
featureNames.insert(descr);
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
const std::map<std::string, std::vector<float> > &weights = m_parameter->GetAllWeights();
|
| 689 |
+
std::map<std::string, std::vector<float> >::const_iterator iter;
|
| 690 |
+
for (iter = weights.begin(); iter != weights.end(); ++iter) {
|
| 691 |
+
// this indicates that it is sparse feature
|
| 692 |
+
if (featureNames.find(iter->first) == featureNames.end()) {
|
| 693 |
+
UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first);
|
| 694 |
+
m_allWeights.Assign(iter->first, iter->second[0]);
|
| 695 |
+
}
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
/**! Read in settings for alternative weights */
|
| 702 |
+
bool StaticData::LoadAlternateWeightSettings()
|
| 703 |
+
{
|
| 704 |
+
if (m_threadCount > 1) {
|
| 705 |
+
cerr << "ERROR: alternative weight settings currently not supported with multi-threading.";
|
| 706 |
+
return false;
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
vector<string> weightSpecification;
|
| 710 |
+
const PARAM_VEC *params = m_parameter->GetParam("alternate-weight-setting");
|
| 711 |
+
if (params && params->size()) {
|
| 712 |
+
weightSpecification = *params;
|
| 713 |
+
}
|
| 714 |
+
|
| 715 |
+
// get mapping from feature names to feature functions
|
| 716 |
+
map<string,FeatureFunction*> nameToFF;
|
| 717 |
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
| 718 |
+
for (size_t i = 0; i < ffs.size(); ++i) {
|
| 719 |
+
nameToFF[ ffs[i]->GetScoreProducerDescription() ] = ffs[i];
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
// copy main weight setting as default
|
| 723 |
+
m_weightSetting["default"] = new ScoreComponentCollection( m_allWeights );
|
| 724 |
+
|
| 725 |
+
// go through specification in config file
|
| 726 |
+
string currentId = "";
|
| 727 |
+
bool hasErrors = false;
|
| 728 |
+
for (size_t i=0; i<weightSpecification.size(); ++i) {
|
| 729 |
+
|
| 730 |
+
// identifier line (with optional additional specifications)
|
| 731 |
+
if (weightSpecification[i].find("id=") == 0) {
|
| 732 |
+
vector<string> tokens = Tokenize(weightSpecification[i]);
|
| 733 |
+
vector<string> args = Tokenize(tokens[0], "=");
|
| 734 |
+
currentId = args[1];
|
| 735 |
+
VERBOSE(1,"alternate weight setting " << currentId << endl);
|
| 736 |
+
UTIL_THROW_IF2(m_weightSetting.find(currentId) != m_weightSetting.end(),
|
| 737 |
+
"Duplicate alternate weight id: " << currentId);
|
| 738 |
+
m_weightSetting[ currentId ] = new ScoreComponentCollection;
|
| 739 |
+
|
| 740 |
+
// other specifications
|
| 741 |
+
for(size_t j=1; j<tokens.size(); j++) {
|
| 742 |
+
vector<string> args = Tokenize(tokens[j], "=");
|
| 743 |
+
// sparse weights
|
| 744 |
+
if (args[0] == "weight-file") {
|
| 745 |
+
if (args.size() != 2) {
|
| 746 |
+
std::cerr << "One argument should be supplied for weight-file";
|
| 747 |
+
return false;
|
| 748 |
+
}
|
| 749 |
+
ScoreComponentCollection extraWeights;
|
| 750 |
+
if (!extraWeights.Load(args[1])) {
|
| 751 |
+
std::cerr << "Unable to load weights from " << args[1];
|
| 752 |
+
return false;
|
| 753 |
+
}
|
| 754 |
+
m_weightSetting[ currentId ]->PlusEquals(extraWeights);
|
| 755 |
+
}
|
| 756 |
+
// ignore feature functions
|
| 757 |
+
else if (args[0] == "ignore-ff") {
|
| 758 |
+
set< string > *ffNameSet = new set< string >;
|
| 759 |
+
m_weightSettingIgnoreFF[ currentId ] = *ffNameSet;
|
| 760 |
+
vector<string> featureFunctionName = Tokenize(args[1], ",");
|
| 761 |
+
for(size_t k=0; k<featureFunctionName.size(); k++) {
|
| 762 |
+
// check if a valid nane
|
| 763 |
+
map<string,FeatureFunction*>::iterator ffLookUp = nameToFF.find(featureFunctionName[k]);
|
| 764 |
+
if (ffLookUp == nameToFF.end()) {
|
| 765 |
+
cerr << "ERROR: alternate weight setting " << currentId
|
| 766 |
+
<< " specifies to ignore feature function " << featureFunctionName[k]
|
| 767 |
+
<< " but there is no such feature function" << endl;
|
| 768 |
+
hasErrors = true;
|
| 769 |
+
} else {
|
| 770 |
+
m_weightSettingIgnoreFF[ currentId ].insert( featureFunctionName[k] );
|
| 771 |
+
}
|
| 772 |
+
}
|
| 773 |
+
}
|
| 774 |
+
}
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
// weight lines
|
| 778 |
+
else {
|
| 779 |
+
UTIL_THROW_IF2(currentId.empty(), "No alternative weights specified");
|
| 780 |
+
vector<string> tokens = Tokenize(weightSpecification[i]);
|
| 781 |
+
UTIL_THROW_IF2(tokens.size() < 2
|
| 782 |
+
, "Incorrect format for alternate weights: " << weightSpecification[i]);
|
| 783 |
+
|
| 784 |
+
// get name and weight values
|
| 785 |
+
string name = tokens[0];
|
| 786 |
+
name = name.substr(0, name.size() - 1); // remove trailing "="
|
| 787 |
+
vector<float> weights(tokens.size() - 1);
|
| 788 |
+
for (size_t i = 1; i < tokens.size(); ++i) {
|
| 789 |
+
float weight = Scan<float>(tokens[i]);
|
| 790 |
+
weights[i - 1] = weight;
|
| 791 |
+
}
|
| 792 |
+
|
| 793 |
+
// check if a valid nane
|
| 794 |
+
map<string,FeatureFunction*>::iterator ffLookUp = nameToFF.find(name);
|
| 795 |
+
if (ffLookUp == nameToFF.end()) {
|
| 796 |
+
cerr << "ERROR: alternate weight setting " << currentId
|
| 797 |
+
<< " specifies weight(s) for " << name
|
| 798 |
+
<< " but there is no such feature function" << endl;
|
| 799 |
+
hasErrors = true;
|
| 800 |
+
} else {
|
| 801 |
+
m_weightSetting[ currentId ]->Assign( nameToFF[name], weights);
|
| 802 |
+
}
|
| 803 |
+
}
|
| 804 |
+
}
|
| 805 |
+
UTIL_THROW_IF2(hasErrors, "Errors loading alternate weights");
|
| 806 |
+
return true;
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
void StaticData::NoCache()
|
| 810 |
+
{
|
| 811 |
+
bool noCache;
|
| 812 |
+
m_parameter->SetParameter(noCache, "no-cache", false );
|
| 813 |
+
|
| 814 |
+
if (noCache) {
|
| 815 |
+
const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
|
| 816 |
+
for (size_t i = 0; i < pts.size(); ++i) {
|
| 817 |
+
PhraseDictionary &pt = *pts[i];
|
| 818 |
+
pt.SetParameter("cache-size", "0");
|
| 819 |
+
}
|
| 820 |
+
}
|
| 821 |
+
}
|
| 822 |
+
|
| 823 |
+
std::map<std::string, std::string>
|
| 824 |
+
StaticData
|
| 825 |
+
::OverrideFeatureNames()
|
| 826 |
+
{
|
| 827 |
+
std::map<std::string, std::string> ret;
|
| 828 |
+
|
| 829 |
+
const PARAM_VEC *params = m_parameter->GetParam("feature-name-overwrite");
|
| 830 |
+
if (params && params->size()) {
|
| 831 |
+
UTIL_THROW_IF2(params->size() != 1, "Only provide 1 line in the section [feature-name-overwrite]");
|
| 832 |
+
vector<string> toks = Tokenize(params->at(0));
|
| 833 |
+
UTIL_THROW_IF2(toks.size() % 2 != 0, "Format of -feature-name-overwrite must be [old-name new-name]*");
|
| 834 |
+
|
| 835 |
+
for (size_t i = 0; i < toks.size(); i += 2) {
|
| 836 |
+
const string &oldName = toks[i];
|
| 837 |
+
const string &newName = toks[i+1];
|
| 838 |
+
ret[oldName] = newName;
|
| 839 |
+
}
|
| 840 |
+
}
|
| 841 |
+
|
| 842 |
+
// FIXME Does this make sense for F2S? Perhaps it should be changed once
|
| 843 |
+
// FIXME the pipeline uses RuleTable consistently.
|
| 844 |
+
SearchAlgorithm algo = m_options->search.algo;
|
| 845 |
+
if (algo == SyntaxS2T || algo == SyntaxT2S ||
|
| 846 |
+
algo == SyntaxT2S_SCFG || algo == SyntaxF2S) {
|
| 847 |
+
// Automatically override PhraseDictionary{Memory,Scope3}. This will
|
| 848 |
+
// have to change if the FF parameters diverge too much in the future,
|
| 849 |
+
// but for now it makes switching between the old and new decoders much
|
| 850 |
+
// more convenient.
|
| 851 |
+
ret["PhraseDictionaryMemory"] = "RuleTable";
|
| 852 |
+
ret["PhraseDictionaryScope3"] = "RuleTable";
|
| 853 |
+
}
|
| 854 |
+
|
| 855 |
+
return ret;
|
| 856 |
+
}
|
| 857 |
+
|
| 858 |
+
void StaticData::OverrideFeatures()
|
| 859 |
+
{
|
| 860 |
+
const PARAM_VEC *params = m_parameter->GetParam("feature-overwrite");
|
| 861 |
+
for (size_t i = 0; params && i < params->size(); ++i) {
|
| 862 |
+
const string &str = params->at(i);
|
| 863 |
+
vector<string> toks = Tokenize(str);
|
| 864 |
+
UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
|
| 865 |
+
|
| 866 |
+
FeatureFunction &ff = FeatureFunction::FindFeatureFunction(toks[0]);
|
| 867 |
+
|
| 868 |
+
for (size_t j = 1; j < toks.size(); ++j) {
|
| 869 |
+
const string &keyValStr = toks[j];
|
| 870 |
+
vector<string> keyVal = Tokenize(keyValStr, "=");
|
| 871 |
+
UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
|
| 872 |
+
|
| 873 |
+
VERBOSE(1, "Override " << ff.GetScoreProducerDescription() << " "
|
| 874 |
+
<< keyVal[0] << "=" << keyVal[1] << endl);
|
| 875 |
+
|
| 876 |
+
ff.SetParameter(keyVal[0], keyVal[1]);
|
| 877 |
+
|
| 878 |
+
}
|
| 879 |
+
}
|
| 880 |
+
|
| 881 |
+
}
|
| 882 |
+
|
| 883 |
+
void StaticData::CheckLEGACYPT()
|
| 884 |
+
{
|
| 885 |
+
const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
|
| 886 |
+
for (size_t i = 0; i < pts.size(); ++i) {
|
| 887 |
+
const PhraseDictionary *phraseDictionary = pts[i];
|
| 888 |
+
if (dynamic_cast<const PhraseDictionaryTreeAdaptor*>(phraseDictionary) != NULL) {
|
| 889 |
+
m_useLegacyPT = true;
|
| 890 |
+
return;
|
| 891 |
+
}
|
| 892 |
+
}
|
| 893 |
+
|
| 894 |
+
m_useLegacyPT = false;
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
|
| 898 |
+
void StaticData::ResetWeights(const std::string &denseWeights, const std::string &sparseFile)
|
| 899 |
+
{
|
| 900 |
+
m_allWeights = ScoreComponentCollection();
|
| 901 |
+
|
| 902 |
+
// dense weights
|
| 903 |
+
string name("");
|
| 904 |
+
vector<float> weights;
|
| 905 |
+
vector<string> toks = Tokenize(denseWeights);
|
| 906 |
+
for (size_t i = 0; i < toks.size(); ++i) {
|
| 907 |
+
const string &tok = toks[i];
|
| 908 |
+
|
| 909 |
+
if (ends_with(tok, "=")) {
|
| 910 |
+
// start of new feature
|
| 911 |
+
|
| 912 |
+
if (name != "") {
|
| 913 |
+
// save previous ff
|
| 914 |
+
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
|
| 915 |
+
m_allWeights.Assign(&ff, weights);
|
| 916 |
+
weights.clear();
|
| 917 |
+
}
|
| 918 |
+
|
| 919 |
+
name = tok.substr(0, tok.size() - 1);
|
| 920 |
+
} else {
|
| 921 |
+
// a weight for curr ff
|
| 922 |
+
float weight = Scan<float>(toks[i]);
|
| 923 |
+
weights.push_back(weight);
|
| 924 |
+
}
|
| 925 |
+
}
|
| 926 |
+
|
| 927 |
+
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
|
| 928 |
+
m_allWeights.Assign(&ff, weights);
|
| 929 |
+
|
| 930 |
+
// sparse weights
|
| 931 |
+
InputFileStream sparseStrme(sparseFile);
|
| 932 |
+
string line;
|
| 933 |
+
while (getline(sparseStrme, line)) {
|
| 934 |
+
vector<string> toks = Tokenize(line);
|
| 935 |
+
UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
|
| 936 |
+
|
| 937 |
+
vector<string> names = Tokenize(toks[0], "_");
|
| 938 |
+
UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
|
| 939 |
+
|
| 940 |
+
const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
|
| 941 |
+
m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
|
| 942 |
+
}
|
| 943 |
+
}
|
| 944 |
+
|
| 945 |
+
size_t StaticData::GetCoordSpace(string space) const
|
| 946 |
+
{
|
| 947 |
+
map<string const, size_t>::const_iterator m = m_coordSpaceMap.find(space);
|
| 948 |
+
if(m == m_coordSpaceMap.end()) {
|
| 949 |
+
return 0;
|
| 950 |
+
}
|
| 951 |
+
return m->second;
|
| 952 |
+
}
|
| 953 |
+
|
| 954 |
+
size_t StaticData::MapCoordSpace(string space)
|
| 955 |
+
{
|
| 956 |
+
map<string const, size_t>::const_iterator m = m_coordSpaceMap.find(space);
|
| 957 |
+
if (m != m_coordSpaceMap.end()) {
|
| 958 |
+
return m->second;
|
| 959 |
+
}
|
| 960 |
+
size_t id = m_coordSpaceNextID;
|
| 961 |
+
m_coordSpaceNextID += 1;
|
| 962 |
+
m_coordSpaceMap[space] = id;
|
| 963 |
+
return id;
|
| 964 |
+
}
|
| 965 |
+
|
| 966 |
+
} // namespace
|