Upload 575 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +3 -0
- mosesdecoder/moses/AlignmentInfo.cpp +170 -0
- mosesdecoder/moses/AlignmentInfo.h +143 -0
- mosesdecoder/moses/AlignmentInfoCollection.cpp +60 -0
- mosesdecoder/moses/AlignmentInfoCollection.h +81 -0
- mosesdecoder/moses/AlignmentInfoTest.cpp +71 -0
- mosesdecoder/moses/BaseManager.cpp +160 -0
- mosesdecoder/moses/BaseManager.h +78 -0
- mosesdecoder/moses/Bitmap.cpp +96 -0
- mosesdecoder/moses/Bitmap.h +249 -0
- mosesdecoder/moses/BitmapContainer.cpp +498 -0
- mosesdecoder/moses/BitmapContainer.h +275 -0
- mosesdecoder/moses/Bitmaps.cpp +58 -0
- mosesdecoder/moses/Bitmaps.h +32 -0
- mosesdecoder/moses/CMakeLists.txt +13 -0
- mosesdecoder/moses/ChartCell.cpp +233 -0
- mosesdecoder/moses/ChartCell.h +128 -0
- mosesdecoder/moses/ChartCellCollection.cpp +58 -0
- mosesdecoder/moses/ChartCellCollection.h +102 -0
- mosesdecoder/moses/ChartCellLabel.h +89 -0
- mosesdecoder/moses/ChartCellLabelSet.h +147 -0
- mosesdecoder/moses/ChartHypothesis.cpp +360 -0
- mosesdecoder/moses/ChartHypothesis.h +204 -0
- mosesdecoder/moses/ChartHypothesisCollection.cpp +301 -0
- mosesdecoder/moses/ChartHypothesisCollection.h +108 -0
- mosesdecoder/moses/ChartKBestExtractor.cpp +332 -0
- mosesdecoder/moses/ChartKBestExtractor.h +132 -0
- mosesdecoder/moses/ChartManager.cpp +867 -0
- mosesdecoder/moses/ChartManager.h +162 -0
- mosesdecoder/moses/ChartParser.cpp +313 -0
- mosesdecoder/moses/ChartParser.h +99 -0
- mosesdecoder/moses/ChartParserCallback.h +35 -0
- mosesdecoder/moses/ChartRuleLookupManager.cpp +9 -0
- mosesdecoder/moses/ChartRuleLookupManager.h +84 -0
- mosesdecoder/moses/ChartTranslationOption.cpp +33 -0
- mosesdecoder/moses/ChartTranslationOption.h +54 -0
- mosesdecoder/moses/ChartTranslationOptionList.cpp +219 -0
- mosesdecoder/moses/ChartTranslationOptionList.h +90 -0
- mosesdecoder/moses/ChartTranslationOptions.cpp +168 -0
- mosesdecoder/moses/ChartTranslationOptions.h +104 -0
- mosesdecoder/moses/ConfusionNet.cpp +294 -0
- mosesdecoder/moses/ConfusionNet.h +92 -0
- mosesdecoder/moses/ContextScope.h +124 -0
- mosesdecoder/moses/DecodeGraph.cpp +43 -0
- mosesdecoder/moses/DecodeGraph.h +101 -0
- mosesdecoder/moses/DecodeStep.cpp +90 -0
- mosesdecoder/moses/DecodeStep.h +118 -0
- mosesdecoder/moses/DecodeStepGeneration.cpp +169 -0
- mosesdecoder/moses/DecodeStepGeneration.h +54 -0
- mosesdecoder/moses/DecodeStepTranslation.cpp +280 -0
.gitattributes
CHANGED
|
@@ -95,3 +95,6 @@ mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/processPhraseTab
|
|
| 95 |
mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/prunePhraseTable filter=lfs diff=lfs merge=lfs -text
|
| 96 |
mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryLexicalTable filter=lfs diff=lfs merge=lfs -text
|
| 97 |
mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryPhraseTableMin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/prunePhraseTable filter=lfs diff=lfs merge=lfs -text
|
| 96 |
mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryLexicalTable filter=lfs diff=lfs merge=lfs -text
|
| 97 |
mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryPhraseTableMin filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-9/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
|
mosesdecoder/moses/AlignmentInfo.cpp
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
#include <algorithm>
|
| 20 |
+
#include <set>
|
| 21 |
+
#include "AlignmentInfo.h"
|
| 22 |
+
#include "TypeDef.h"
|
| 23 |
+
#include "StaticData.h"
|
| 24 |
+
#include "Util.h"
|
| 25 |
+
#include "util/exception.hh"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
|
| 30 |
+
AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
|
| 31 |
+
: m_collection(pairs)
|
| 32 |
+
{
|
| 33 |
+
BuildNonTermIndexMaps();
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
|
| 37 |
+
{
|
| 38 |
+
assert(aln.size()%2==0);
|
| 39 |
+
for (size_t i = 0; i < aln.size(); i+= 2)
|
| 40 |
+
m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
|
| 41 |
+
BuildNonTermIndexMaps();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
AlignmentInfo::AlignmentInfo(const std::string &str)
|
| 45 |
+
{
|
| 46 |
+
std::vector<std::string> points = Tokenize(str, " ");
|
| 47 |
+
std::vector<std::string>::const_iterator iter;
|
| 48 |
+
for (iter = points.begin(); iter != points.end(); iter++) {
|
| 49 |
+
std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
|
| 50 |
+
UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
|
| 51 |
+
Add(point[0], point[1]);
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void AlignmentInfo::BuildNonTermIndexMaps()
|
| 56 |
+
{
|
| 57 |
+
if (m_collection.empty()) {
|
| 58 |
+
return;
|
| 59 |
+
}
|
| 60 |
+
const_iterator p = begin();
|
| 61 |
+
size_t maxIndex = p->second;
|
| 62 |
+
for (++p; p != end(); ++p) {
|
| 63 |
+
if (p->second > maxIndex) {
|
| 64 |
+
maxIndex = p->second;
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
|
| 68 |
+
m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
|
| 69 |
+
size_t i = 0;
|
| 70 |
+
for (p = begin(); p != end(); ++p) {
|
| 71 |
+
if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
|
| 72 |
+
// 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
|
| 73 |
+
m_nonTermIndexMap.clear();
|
| 74 |
+
m_nonTermIndexMap2.clear();
|
| 75 |
+
return;
|
| 76 |
+
}
|
| 77 |
+
m_nonTermIndexMap[p->second] = i++;
|
| 78 |
+
m_nonTermIndexMap2[p->second] = p->first;
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
|
| 83 |
+
{
|
| 84 |
+
std::set<size_t> ret;
|
| 85 |
+
CollType::const_iterator iter;
|
| 86 |
+
for (iter = begin(); iter != end(); ++iter) {
|
| 87 |
+
// const std::pair<size_t,size_t> &align = *iter;
|
| 88 |
+
if (iter->first == sourcePos) {
|
| 89 |
+
ret.insert(iter->second);
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
return ret;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
|
| 96 |
+
{
|
| 97 |
+
std::set<size_t> ret;
|
| 98 |
+
CollType::const_iterator iter;
|
| 99 |
+
for (iter = begin(); iter != end(); ++iter) {
|
| 100 |
+
// const std::pair<size_t,size_t> &align = *iter;
|
| 101 |
+
if (iter->second == targetPos) {
|
| 102 |
+
ret.insert(iter->first);
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
return ret;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
bool
|
| 110 |
+
compare_target(std::pair<size_t,size_t> const* a,
|
| 111 |
+
std::pair<size_t,size_t> const* b)
|
| 112 |
+
{
|
| 113 |
+
if(a->second < b->second) return true;
|
| 114 |
+
if(a->second == b->second) return (a->first < b->first);
|
| 115 |
+
return false;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
std::vector< const std::pair<size_t,size_t>* >
|
| 120 |
+
AlignmentInfo::
|
| 121 |
+
GetSortedAlignments(WordAlignmentSort SortOrder) const
|
| 122 |
+
{
|
| 123 |
+
std::vector< const std::pair<size_t,size_t>* > ret;
|
| 124 |
+
|
| 125 |
+
CollType::const_iterator iter;
|
| 126 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 127 |
+
const std::pair<size_t,size_t> &alignPair = *iter;
|
| 128 |
+
ret.push_back(&alignPair);
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
switch (SortOrder) {
|
| 132 |
+
case NoSort:
|
| 133 |
+
break;
|
| 134 |
+
|
| 135 |
+
case TargetOrder:
|
| 136 |
+
std::sort(ret.begin(), ret.end(), compare_target);
|
| 137 |
+
break;
|
| 138 |
+
|
| 139 |
+
default:
|
| 140 |
+
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
|
| 141 |
+
<< SortOrder);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
return ret;
|
| 145 |
+
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
|
| 149 |
+
{
|
| 150 |
+
std::set<size_t> sourcePoses;
|
| 151 |
+
|
| 152 |
+
CollType::const_iterator iter;
|
| 153 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 154 |
+
size_t sourcePos = iter->first;
|
| 155 |
+
sourcePoses.insert(sourcePos);
|
| 156 |
+
}
|
| 157 |
+
std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
|
| 158 |
+
return ret;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
|
| 162 |
+
{
|
| 163 |
+
AlignmentInfo::const_iterator iter;
|
| 164 |
+
for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) {
|
| 165 |
+
out << iter->first << "-" << iter->second << " ";
|
| 166 |
+
}
|
| 167 |
+
return out;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
}
|
mosesdecoder/moses/AlignmentInfo.h
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include <iostream>
|
| 23 |
+
#include <ostream>
|
| 24 |
+
#include <set>
|
| 25 |
+
#include <vector>
|
| 26 |
+
#include <cstdlib>
|
| 27 |
+
|
| 28 |
+
#include <boost/functional/hash.hpp>
|
| 29 |
+
#include "TypeDef.h"
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
class AlignmentInfoCollection;
|
| 34 |
+
|
| 35 |
+
/** Collection of non-terminal alignment pairs, ordered by source index.
|
| 36 |
+
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
|
| 37 |
+
*/
|
| 38 |
+
class AlignmentInfo
|
| 39 |
+
{
|
| 40 |
+
friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
|
| 41 |
+
friend struct AlignmentInfoOrderer;
|
| 42 |
+
friend struct AlignmentInfoHasher;
|
| 43 |
+
friend class AlignmentInfoCollection;
|
| 44 |
+
friend class VW;
|
| 45 |
+
|
| 46 |
+
public:
|
| 47 |
+
typedef std::set<std::pair<size_t,size_t> > CollType;
|
| 48 |
+
typedef std::vector<size_t> NonTermIndexMap;
|
| 49 |
+
typedef CollType::const_iterator const_iterator;
|
| 50 |
+
|
| 51 |
+
const_iterator begin() const {
|
| 52 |
+
return m_collection.begin();
|
| 53 |
+
}
|
| 54 |
+
const_iterator end() const {
|
| 55 |
+
return m_collection.end();
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
void Add(size_t sourcePos, size_t targetPos) {
|
| 59 |
+
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
|
| 60 |
+
}
|
| 61 |
+
/** Provides a map from target-side to source-side non-terminal indices.
|
| 62 |
+
* The target-side index should be the rule symbol index (COUNTING terminals).
|
| 63 |
+
* The index returned is the rule non-terminal index (IGNORING terminals).
|
| 64 |
+
*/
|
| 65 |
+
const NonTermIndexMap &GetNonTermIndexMap() const {
|
| 66 |
+
return m_nonTermIndexMap;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
|
| 70 |
+
* the index counting both terminals and non-terminals) */
|
| 71 |
+
const NonTermIndexMap &GetNonTermIndexMap2() const {
|
| 72 |
+
return m_nonTermIndexMap2;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
const CollType &GetAlignments() const {
|
| 76 |
+
return m_collection;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
|
| 80 |
+
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
|
| 81 |
+
|
| 82 |
+
size_t GetSize() const {
|
| 83 |
+
return m_collection.size();
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
std::vector< const std::pair<size_t,size_t>* >
|
| 87 |
+
GetSortedAlignments(WordAlignmentSort SortOrder) const;
|
| 88 |
+
|
| 89 |
+
std::vector<size_t> GetSourceIndex2PosMap() const;
|
| 90 |
+
|
| 91 |
+
bool operator==(const AlignmentInfo& rhs) const {
|
| 92 |
+
return m_collection == rhs.m_collection &&
|
| 93 |
+
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
private:
|
| 97 |
+
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
|
| 98 |
+
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
|
| 99 |
+
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
|
| 100 |
+
|
| 101 |
+
// used only by VW to load word alignment between sentences
|
| 102 |
+
explicit AlignmentInfo(const std::string &str);
|
| 103 |
+
|
| 104 |
+
void BuildNonTermIndexMaps();
|
| 105 |
+
|
| 106 |
+
CollType m_collection;
|
| 107 |
+
NonTermIndexMap m_nonTermIndexMap;
|
| 108 |
+
NonTermIndexMap m_nonTermIndexMap2;
|
| 109 |
+
};
|
| 110 |
+
|
| 111 |
+
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
|
| 112 |
+
* for use by AlignmentInfoCollection.
|
| 113 |
+
*/
|
| 114 |
+
struct AlignmentInfoOrderer {
|
| 115 |
+
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
|
| 116 |
+
if (a.m_collection == b.m_collection) {
|
| 117 |
+
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
|
| 118 |
+
} else {
|
| 119 |
+
return a.m_collection < b.m_collection;
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
};
|
| 123 |
+
|
| 124 |
+
/**
|
| 125 |
+
* Hashing functoid
|
| 126 |
+
**/
|
| 127 |
+
struct AlignmentInfoHasher {
|
| 128 |
+
size_t operator()(const AlignmentInfo& a) const {
|
| 129 |
+
size_t seed = 0;
|
| 130 |
+
boost::hash_combine(seed,a.m_collection);
|
| 131 |
+
boost::hash_combine(seed,a.m_nonTermIndexMap);
|
| 132 |
+
return seed;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
};
|
| 136 |
+
|
| 137 |
+
inline size_t hash_value(const AlignmentInfo& a)
|
| 138 |
+
{
|
| 139 |
+
static AlignmentInfoHasher hasher;
|
| 140 |
+
return hasher(a);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
}
|
mosesdecoder/moses/AlignmentInfoCollection.cpp
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "AlignmentInfoCollection.h"
|
| 21 |
+
|
| 22 |
+
namespace Moses
|
| 23 |
+
{
|
| 24 |
+
|
| 25 |
+
AlignmentInfoCollection AlignmentInfoCollection::s_instance;
|
| 26 |
+
|
| 27 |
+
AlignmentInfoCollection::AlignmentInfoCollection()
|
| 28 |
+
{
|
| 29 |
+
std::set<std::pair<size_t,size_t> > pairs;
|
| 30 |
+
m_emptyAlignmentInfo = Add(pairs);
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
AlignmentInfoCollection::~AlignmentInfoCollection()
|
| 34 |
+
{}
|
| 35 |
+
|
| 36 |
+
const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
|
| 37 |
+
{
|
| 38 |
+
return *m_emptyAlignmentInfo;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
AlignmentInfo const *
|
| 42 |
+
AlignmentInfoCollection::
|
| 43 |
+
Add(AlignmentInfo const& ainfo)
|
| 44 |
+
{
|
| 45 |
+
#ifdef WITH_THREADS
|
| 46 |
+
{
|
| 47 |
+
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
| 48 |
+
AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
|
| 49 |
+
if (i != m_collection.end())
|
| 50 |
+
return &*i;
|
| 51 |
+
}
|
| 52 |
+
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
| 53 |
+
#endif
|
| 54 |
+
std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
|
| 55 |
+
return &(*ret.first);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
}
|
mosesdecoder/moses/AlignmentInfoCollection.h
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "AlignmentInfo.h"
|
| 23 |
+
|
| 24 |
+
#include <set>
|
| 25 |
+
|
| 26 |
+
#ifdef WITH_THREADS
|
| 27 |
+
#include <boost/thread/shared_mutex.hpp>
|
| 28 |
+
#include <boost/thread/locks.hpp>
|
| 29 |
+
#endif
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
/** Singleton collection of all AlignmentInfo objects.
|
| 35 |
+
* Used as a cache of all alignment info to save space.
|
| 36 |
+
*/
|
| 37 |
+
class AlignmentInfoCollection
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
static AlignmentInfoCollection &Instance() {
|
| 41 |
+
return s_instance;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
/** Returns a pointer to an AlignmentInfo object with the same source-target
|
| 45 |
+
* alignment pairs as given in the argument. If the collection already
|
| 46 |
+
* contains such an object then returns a pointer to it; otherwise a new
|
| 47 |
+
* one is inserted.
|
| 48 |
+
*/
|
| 49 |
+
private:
|
| 50 |
+
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
|
| 51 |
+
|
| 52 |
+
public:
|
| 53 |
+
template<typename ALNREP>
|
| 54 |
+
AlignmentInfo const *
|
| 55 |
+
Add(ALNREP const & aln) {
|
| 56 |
+
return this->Add(AlignmentInfo(aln));
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
//! Returns a pointer to an empty AlignmentInfo object.
|
| 60 |
+
const AlignmentInfo &GetEmptyAlignmentInfo() const;
|
| 61 |
+
|
| 62 |
+
private:
|
| 63 |
+
typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
//! Only a single static variable should be created.
|
| 67 |
+
AlignmentInfoCollection();
|
| 68 |
+
~AlignmentInfoCollection();
|
| 69 |
+
|
| 70 |
+
static AlignmentInfoCollection s_instance;
|
| 71 |
+
|
| 72 |
+
#ifdef WITH_THREADS
|
| 73 |
+
//reader-writer lock
|
| 74 |
+
mutable boost::shared_mutex m_accessLock;
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
+
AlignmentInfoSet m_collection;
|
| 78 |
+
const AlignmentInfo *m_emptyAlignmentInfo;
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
}
|
mosesdecoder/moses/AlignmentInfoTest.cpp
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010- University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include <boost/test/unit_test.hpp>
|
| 21 |
+
|
| 22 |
+
#include "AlignmentInfo.h"
|
| 23 |
+
#include "AlignmentInfoCollection.h"
|
| 24 |
+
|
| 25 |
+
using namespace Moses;
|
| 26 |
+
using namespace std;
|
| 27 |
+
|
| 28 |
+
BOOST_AUTO_TEST_SUITE(alignment_info)
|
| 29 |
+
|
| 30 |
+
typedef pair<size_t,size_t> IndexPair;
|
| 31 |
+
typedef set<pair<size_t,size_t> > IndexSet;
|
| 32 |
+
|
| 33 |
+
struct AlignmentInfoFixture {
|
| 34 |
+
const AlignmentInfo* ai1;
|
| 35 |
+
const AlignmentInfo* ai2;
|
| 36 |
+
const AlignmentInfo* ai3;
|
| 37 |
+
|
| 38 |
+
AlignmentInfoFixture() {
|
| 39 |
+
AlignmentInfoCollection& collection = AlignmentInfoCollection::Instance();
|
| 40 |
+
IndexSet aligns1,aligns2,aligns3;
|
| 41 |
+
aligns1.insert(IndexPair(1,1));
|
| 42 |
+
aligns1.insert(IndexPair(2,1));
|
| 43 |
+
aligns2.insert(IndexPair(1,1));
|
| 44 |
+
aligns2.insert(IndexPair(2,1));
|
| 45 |
+
aligns3.insert(IndexPair(1,2));
|
| 46 |
+
aligns3.insert(IndexPair(2,1));
|
| 47 |
+
ai1 = collection.Add(aligns1);
|
| 48 |
+
ai2 = collection.Add(aligns2);
|
| 49 |
+
ai3 = collection.Add(aligns3);
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
BOOST_FIXTURE_TEST_CASE(comparator, AlignmentInfoFixture)
|
| 55 |
+
{
|
| 56 |
+
BOOST_CHECK(*ai1 == *ai2);
|
| 57 |
+
BOOST_CHECK(*ai1 == *ai1);
|
| 58 |
+
BOOST_CHECK(*ai2 == *ai2);
|
| 59 |
+
BOOST_CHECK(*ai3 == *ai3);
|
| 60 |
+
BOOST_CHECK(!(*ai2 == *ai3));
|
| 61 |
+
BOOST_CHECK(!(*ai1 == *ai3));
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
BOOST_FIXTURE_TEST_CASE(hasher, AlignmentInfoFixture)
|
| 65 |
+
{
|
| 66 |
+
//simple test that same objects give same hash
|
| 67 |
+
AlignmentInfoHasher hash;
|
| 68 |
+
BOOST_CHECK_EQUAL(hash(*ai1), hash(*ai2));
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
BOOST_AUTO_TEST_SUITE_END()
|
mosesdecoder/moses/BaseManager.cpp
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "BaseManager.h"
|
| 2 |
+
#include "StaticData.h"
|
| 3 |
+
#include "moses/FF/StatelessFeatureFunction.h"
|
| 4 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 5 |
+
#include "moses/TranslationTask.h"
|
| 6 |
+
|
| 7 |
+
#include <vector>
|
| 8 |
+
#include <boost/algorithm/string/predicate.hpp>
|
| 9 |
+
#include <boost/iostreams/device/file.hpp>
|
| 10 |
+
#include <boost/iostreams/filter/bzip2.hpp>
|
| 11 |
+
#include <boost/iostreams/filter/gzip.hpp>
|
| 12 |
+
#include <boost/iostreams/filtering_stream.hpp>
|
| 13 |
+
#include <boost/filesystem.hpp>
|
| 14 |
+
|
| 15 |
+
using namespace std;
|
| 16 |
+
|
| 17 |
+
namespace Moses
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
BaseManager::BaseManager(ttasksptr const& ttask)
|
| 21 |
+
: m_ttask(ttask), m_source(*(ttask->GetSource().get()))
|
| 22 |
+
{ }
|
| 23 |
+
|
| 24 |
+
const InputType&
|
| 25 |
+
BaseManager::GetSource() const
|
| 26 |
+
{
|
| 27 |
+
return m_source;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
const ttasksptr
|
| 31 |
+
BaseManager::GetTtask() const
|
| 32 |
+
{
|
| 33 |
+
return m_ttask.lock();
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
void
|
| 37 |
+
BaseManager::
|
| 38 |
+
OutputSearchGraphAsHypergraph(std::ostream& out) const
|
| 39 |
+
{
|
| 40 |
+
// This virtual function that may not be implemented everywhere, but it should for
|
| 41 |
+
// derived classes that use it
|
| 42 |
+
UTIL_THROW2("Not implemented.");
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
void
|
| 46 |
+
BaseManager::
|
| 47 |
+
OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const
|
| 48 |
+
{
|
| 49 |
+
std::string odir = boost::filesystem::path(fname).parent_path().string();
|
| 50 |
+
if (! boost::filesystem::exists(odir))
|
| 51 |
+
boost::filesystem::create_directory(odir);
|
| 52 |
+
UTIL_THROW_IF2(!boost::filesystem::is_directory(odir),
|
| 53 |
+
"Cannot output hypergraphs to " << odir
|
| 54 |
+
<< " because that path exists but is not a directory.");
|
| 55 |
+
|
| 56 |
+
// not clear why we need to output the weights every time we dump a search
|
| 57 |
+
// graph into a file again, but that's what the old code did.
|
| 58 |
+
|
| 59 |
+
string weightsFile = odir + "/weights";
|
| 60 |
+
TRACE_ERR("The weights file is " << weightsFile << "\n");
|
| 61 |
+
ofstream weightsOut;
|
| 62 |
+
weightsOut.open(weightsFile.c_str());
|
| 63 |
+
weightsOut.setf(std::ios::fixed);
|
| 64 |
+
weightsOut.precision(6);
|
| 65 |
+
// just temporarily, till we've implemented weight scoring in the manager
|
| 66 |
+
// (or the translation task)
|
| 67 |
+
StaticData::Instance().GetAllWeights().Save(weightsOut);
|
| 68 |
+
weightsOut.close();
|
| 69 |
+
|
| 70 |
+
boost::iostreams::filtering_ostream file;
|
| 71 |
+
if (boost::ends_with(fname, ".gz"))
|
| 72 |
+
file.push(boost::iostreams::gzip_compressor());
|
| 73 |
+
else if (boost::ends_with(fname, ".bz2"))
|
| 74 |
+
file.push( boost::iostreams::bzip2_compressor() );
|
| 75 |
+
file.push( boost::iostreams::file_sink(fname, ios_base::out) );
|
| 76 |
+
if (file.is_complete() && file.good()) {
|
| 77 |
+
file.setf(std::ios::fixed);
|
| 78 |
+
file.precision(precision);
|
| 79 |
+
this->OutputSearchGraphAsHypergraph(file);
|
| 80 |
+
file.flush();
|
| 81 |
+
} else {
|
| 82 |
+
TRACE_ERR("Cannot output hypergraph for line "
|
| 83 |
+
<< this->GetSource().GetTranslationId()
|
| 84 |
+
<< " because the output file " << fname
|
| 85 |
+
<< " is not open or not ready for writing"
|
| 86 |
+
<< std::endl);
|
| 87 |
+
}
|
| 88 |
+
file.pop();
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
/***
|
| 95 |
+
* print surface factor only for the given phrase
|
| 96 |
+
*/
|
| 97 |
+
void
|
| 98 |
+
BaseManager::
|
| 99 |
+
OutputSurface(std::ostream &out, Phrase const& phrase) const
|
| 100 |
+
{
|
| 101 |
+
std::vector<FactorType> const& factor_order = options()->output.factor_order;
|
| 102 |
+
|
| 103 |
+
bool markUnknown = options()->unk.mark;
|
| 104 |
+
std::string const& fd = options()->output.factor_delimiter;
|
| 105 |
+
|
| 106 |
+
size_t size = phrase.GetSize();
|
| 107 |
+
for (size_t pos = 0 ; pos < size ; pos++) {
|
| 108 |
+
const Factor *factor = phrase.GetFactor(pos, factor_order[0]);
|
| 109 |
+
UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos);
|
| 110 |
+
|
| 111 |
+
const Word &word = phrase.GetWord(pos);
|
| 112 |
+
if(markUnknown && word.IsOOV()) {
|
| 113 |
+
out << options()->unk.prefix;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
out << *factor;
|
| 117 |
+
|
| 118 |
+
for (size_t i = 1 ; i < factor_order.size() ; i++) {
|
| 119 |
+
const Factor *factor = phrase.GetFactor(pos, factor_order[i]);
|
| 120 |
+
UTIL_THROW_IF2(!factor, "Empty factor " << i << " at position " << pos);
|
| 121 |
+
out << fd << *factor;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
if(markUnknown && word.IsOOV()) {
|
| 125 |
+
out << options()->unk.suffix;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
out << " ";
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
// Emulates the old operator<<(ostream &, const DottedRule &) function. The
|
| 133 |
+
// output format is a bit odd (reverse order and double spacing between symbols)
|
| 134 |
+
// but there are scripts and tools that expect the output of -T to look like
|
| 135 |
+
// that.
|
| 136 |
+
void BaseManager::WriteApplicationContext(std::ostream &out,
|
| 137 |
+
const ApplicationContext &context) const
|
| 138 |
+
{
|
| 139 |
+
assert(!context.empty());
|
| 140 |
+
ApplicationContext::const_reverse_iterator p = context.rbegin();
|
| 141 |
+
while (true) {
|
| 142 |
+
out << p->second << "=" << p->first << " ";
|
| 143 |
+
if (++p == context.rend()) {
|
| 144 |
+
break;
|
| 145 |
+
}
|
| 146 |
+
out << " ";
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
AllOptions::ptr const&
|
| 151 |
+
BaseManager::
|
| 152 |
+
options() const
|
| 153 |
+
{
|
| 154 |
+
return GetTtask()->options();
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
} // namespace
|
| 159 |
+
|
| 160 |
+
|
mosesdecoder/moses/BaseManager.h
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- c++ -*-
|
| 2 |
+
#pragma once
|
| 3 |
+
|
| 4 |
+
#include <iostream>
|
| 5 |
+
#include <string>
|
| 6 |
+
#include "ScoreComponentCollection.h"
|
| 7 |
+
#include "InputType.h"
|
| 8 |
+
#include "moses/parameters/AllOptions.h"
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
class ScoreComponentCollection;
|
| 12 |
+
class FeatureFunction;
|
| 13 |
+
class OutputCollector;
|
| 14 |
+
|
| 15 |
+
class BaseManager
|
| 16 |
+
{
|
| 17 |
+
protected:
|
| 18 |
+
// const InputType &m_source; /**< source sentence to be translated */
|
| 19 |
+
ttaskwptr m_ttask;
|
| 20 |
+
InputType const& m_source;
|
| 21 |
+
|
| 22 |
+
BaseManager(ttasksptr const& ttask);
|
| 23 |
+
|
| 24 |
+
// output
|
| 25 |
+
typedef std::vector<std::pair<Moses::Word, Moses::Range> > ApplicationContext;
|
| 26 |
+
typedef std::set< std::pair<size_t, size_t> > Alignments;
|
| 27 |
+
|
| 28 |
+
void OutputSurface(std::ostream &out, Phrase const& phrase) const;
|
| 29 |
+
|
| 30 |
+
void WriteApplicationContext(std::ostream &out,
|
| 31 |
+
const ApplicationContext &context) const;
|
| 32 |
+
|
| 33 |
+
template <class T>
|
| 34 |
+
void ShiftOffsets(std::vector<T> &offsets, T shift) const {
|
| 35 |
+
T currPos = shift;
|
| 36 |
+
for (size_t i = 0; i < offsets.size(); ++i) {
|
| 37 |
+
if (offsets[i] == 0) {
|
| 38 |
+
offsets[i] = currPos;
|
| 39 |
+
++currPos;
|
| 40 |
+
} else {
|
| 41 |
+
currPos += offsets[i];
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
public:
|
| 47 |
+
virtual ~BaseManager() { }
|
| 48 |
+
|
| 49 |
+
//! the input sentence being decoded
|
| 50 |
+
const InputType& GetSource() const;
|
| 51 |
+
const ttasksptr GetTtask() const;
|
| 52 |
+
AllOptions::ptr const& options() const;
|
| 53 |
+
|
| 54 |
+
virtual void Decode() = 0;
|
| 55 |
+
// outputs
|
| 56 |
+
virtual void OutputBest(OutputCollector *collector) const = 0;
|
| 57 |
+
virtual void OutputNBest(OutputCollector *collector) const = 0;
|
| 58 |
+
virtual void OutputLatticeSamples(OutputCollector *collector) const = 0;
|
| 59 |
+
virtual void OutputAlignment(OutputCollector *collector) const = 0;
|
| 60 |
+
virtual void OutputDetailedTranslationReport(OutputCollector *collector) const = 0;
|
| 61 |
+
virtual void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const = 0;
|
| 62 |
+
virtual void OutputWordGraph(OutputCollector *collector) const = 0;
|
| 63 |
+
virtual void OutputSearchGraph(OutputCollector *collector) const = 0;
|
| 64 |
+
virtual void OutputUnknowns(OutputCollector *collector) const = 0;
|
| 65 |
+
virtual void OutputSearchGraphSLF() const = 0;
|
| 66 |
+
// virtual void OutputSearchGraphHypergraph() const = 0;
|
| 67 |
+
|
| 68 |
+
virtual void OutputSearchGraphAsHypergraph(std::ostream& out) const;
|
| 69 |
+
virtual void OutputSearchGraphAsHypergraph(std::string const& fname,
|
| 70 |
+
size_t const precision) const;
|
| 71 |
+
/***
|
| 72 |
+
* to be called after processing a sentence
|
| 73 |
+
*/
|
| 74 |
+
virtual void CalcDecoderStatistics() const = 0;
|
| 75 |
+
|
| 76 |
+
};
|
| 77 |
+
|
| 78 |
+
}
|
mosesdecoder/moses/Bitmap.cpp
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <boost/functional/hash.hpp>
|
| 23 |
+
#include "Bitmap.h"
|
| 24 |
+
|
| 25 |
+
namespace Moses
|
| 26 |
+
{
|
| 27 |
+
|
| 28 |
+
TO_STRING_BODY(Bitmap);
|
| 29 |
+
|
| 30 |
+
Bitmap::Bitmap(size_t size, const std::vector<bool>& initializer)
|
| 31 |
+
:m_bitmap(initializer.begin(), initializer.end())
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
// The initializer may not be of the same length. Change to the desired
|
| 35 |
+
// length. If we need to add any elements, initialize them to false.
|
| 36 |
+
m_bitmap.resize(size, false);
|
| 37 |
+
|
| 38 |
+
m_numWordsCovered = std::count(m_bitmap.begin(), m_bitmap.end(), true);
|
| 39 |
+
|
| 40 |
+
// Find the first gap, and cache it.
|
| 41 |
+
std::vector<char>::const_iterator first_gap = std::find(
|
| 42 |
+
m_bitmap.begin(), m_bitmap.end(), false);
|
| 43 |
+
m_firstGap = (
|
| 44 |
+
(first_gap == m_bitmap.end()) ?
|
| 45 |
+
NOT_FOUND : first_gap - m_bitmap.begin());
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
//! Create Bitmap of length size and initialise.
|
| 49 |
+
Bitmap::Bitmap(size_t size)
|
| 50 |
+
:m_bitmap(size, false)
|
| 51 |
+
,m_firstGap(0)
|
| 52 |
+
,m_numWordsCovered(0)
|
| 53 |
+
|
| 54 |
+
{
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
//! Deep copy.
|
| 58 |
+
Bitmap::Bitmap(const Bitmap ©)
|
| 59 |
+
:m_bitmap(copy.m_bitmap)
|
| 60 |
+
,m_firstGap(copy.m_firstGap)
|
| 61 |
+
,m_numWordsCovered(copy.m_numWordsCovered)
|
| 62 |
+
{
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
Bitmap::Bitmap(const Bitmap ©, const Range &range)
|
| 66 |
+
:m_bitmap(copy.m_bitmap)
|
| 67 |
+
,m_firstGap(copy.m_firstGap)
|
| 68 |
+
,m_numWordsCovered(copy.m_numWordsCovered)
|
| 69 |
+
{
|
| 70 |
+
SetValueNonOverlap(range);
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
// for unordered_set in stack
|
| 74 |
+
size_t Bitmap::hash() const
|
| 75 |
+
{
|
| 76 |
+
size_t ret = boost::hash_value(m_bitmap);
|
| 77 |
+
return ret;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
bool Bitmap::operator==(const Bitmap& other) const
|
| 81 |
+
{
|
| 82 |
+
return m_bitmap == other.m_bitmap;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
// friend
|
| 86 |
+
std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap)
|
| 87 |
+
{
|
| 88 |
+
for (size_t i = 0 ; i < bitmap.m_bitmap.size() ; i++) {
|
| 89 |
+
out << int(bitmap.GetValue(i));
|
| 90 |
+
}
|
| 91 |
+
return out;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
} // namespace
|
| 95 |
+
|
| 96 |
+
|
mosesdecoder/moses/Bitmap.h
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_WordsBitmap_h
|
| 23 |
+
#define moses_WordsBitmap_h
|
| 24 |
+
|
| 25 |
+
#include <algorithm>
|
| 26 |
+
#include <limits>
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include <iostream>
|
| 29 |
+
#include <cstring>
|
| 30 |
+
#include <cmath>
|
| 31 |
+
#include <cstdlib>
|
| 32 |
+
#include "TypeDef.h"
|
| 33 |
+
#include "Range.h"
|
| 34 |
+
|
| 35 |
+
namespace Moses
|
| 36 |
+
{
|
| 37 |
+
typedef unsigned long WordsBitmapID;
|
| 38 |
+
|
| 39 |
+
/** Vector of boolean to represent whether a word has been translated or not.
|
| 40 |
+
*
|
| 41 |
+
* Implemented using a vector of char, which is usually the same representation
|
| 42 |
+
* for the elements that a C array of bool would use. A vector of bool, or a
|
| 43 |
+
* Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
|
| 44 |
+
* algorithms like std::find() are not optimized for vector<bool> on gcc or
|
| 45 |
+
* clang, and dynamic_bitset lacks all the optimized search operations we want.
|
| 46 |
+
* Only benchmarking will tell what works best. Perhaps dynamic_bitset could
|
| 47 |
+
* still be a dramatic improvement, if we flip the meaning of the bits around
|
| 48 |
+
* so we can use its find_first() and find_next() for the most common searches.
|
| 49 |
+
*/
|
| 50 |
+
class Bitmap
|
| 51 |
+
{
|
| 52 |
+
friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
|
| 53 |
+
private:
|
| 54 |
+
std::vector<char> m_bitmap; //! Ticks of words in sentence that have been done.
|
| 55 |
+
size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
|
| 56 |
+
size_t m_numWordsCovered;
|
| 57 |
+
|
| 58 |
+
Bitmap(); // not implemented
|
| 59 |
+
Bitmap& operator= (const Bitmap& other);
|
| 60 |
+
|
| 61 |
+
/** Update the first gap, when bits are flipped */
|
| 62 |
+
void UpdateFirstGap(size_t startPos, size_t endPos, bool value) {
|
| 63 |
+
if (value) {
|
| 64 |
+
//may remove gap
|
| 65 |
+
if (startPos <= m_firstGap && m_firstGap <= endPos) {
|
| 66 |
+
m_firstGap = NOT_FOUND;
|
| 67 |
+
for (size_t i = endPos + 1 ; i < m_bitmap.size(); ++i) {
|
| 68 |
+
if (!m_bitmap[i]) {
|
| 69 |
+
m_firstGap = i;
|
| 70 |
+
break;
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
} else {
|
| 76 |
+
//setting positions to false, may add new gap
|
| 77 |
+
if (startPos < m_firstGap) {
|
| 78 |
+
m_firstGap = startPos;
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
//! set value between 2 positions, inclusive
|
| 84 |
+
void
|
| 85 |
+
SetValueNonOverlap(Range const& range) {
|
| 86 |
+
size_t startPos = range.GetStartPos();
|
| 87 |
+
size_t endPos = range.GetEndPos();
|
| 88 |
+
|
| 89 |
+
for(size_t pos = startPos ; pos <= endPos ; pos++) {
|
| 90 |
+
m_bitmap[pos] = true;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
m_numWordsCovered += range.GetNumWordsCovered();
|
| 94 |
+
UpdateFirstGap(startPos, endPos, true);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
public:
|
| 98 |
+
//! Create Bitmap of length size, and initialise with vector.
|
| 99 |
+
explicit Bitmap(size_t size, const std::vector<bool>& initializer);
|
| 100 |
+
|
| 101 |
+
//! Create Bitmap of length size and initialise.
|
| 102 |
+
explicit Bitmap(size_t size);
|
| 103 |
+
|
| 104 |
+
//! Deep copy.
|
| 105 |
+
explicit Bitmap(const Bitmap ©);
|
| 106 |
+
|
| 107 |
+
explicit Bitmap(const Bitmap ©, const Range &range);
|
| 108 |
+
|
| 109 |
+
//! Count of words translated.
|
| 110 |
+
size_t GetNumWordsCovered() const {
|
| 111 |
+
return m_numWordsCovered;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
//! position of 1st word not yet translated, or NOT_FOUND if everything already translated
|
| 115 |
+
size_t GetFirstGapPos() const {
|
| 116 |
+
return m_firstGap;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
//! position of last word not yet translated, or NOT_FOUND if everything already translated
|
| 121 |
+
size_t GetLastGapPos() const {
|
| 122 |
+
for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
|
| 123 |
+
if (!m_bitmap[pos]) {
|
| 124 |
+
return pos;
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
// no starting pos
|
| 128 |
+
return NOT_FOUND;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
//! position of last translated word
|
| 133 |
+
size_t GetLastPos() const {
|
| 134 |
+
for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
|
| 135 |
+
if (m_bitmap[pos]) {
|
| 136 |
+
return pos;
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
// no starting pos
|
| 140 |
+
return NOT_FOUND;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
//! whether a word has been translated at a particular position
|
| 144 |
+
bool GetValue(size_t pos) const {
|
| 145 |
+
return bool(m_bitmap[pos]);
|
| 146 |
+
}
|
| 147 |
+
//! set value at a particular position
|
| 148 |
+
void SetValue( size_t pos, bool value ) {
|
| 149 |
+
bool origValue = m_bitmap[pos];
|
| 150 |
+
if (origValue == value) {
|
| 151 |
+
// do nothing
|
| 152 |
+
} else {
|
| 153 |
+
m_bitmap[pos] = value;
|
| 154 |
+
UpdateFirstGap(pos, pos, value);
|
| 155 |
+
if (value) {
|
| 156 |
+
++m_numWordsCovered;
|
| 157 |
+
} else {
|
| 158 |
+
--m_numWordsCovered;
|
| 159 |
+
}
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
//! whether every word has been translated
|
| 164 |
+
bool IsComplete() const {
|
| 165 |
+
return GetSize() == GetNumWordsCovered();
|
| 166 |
+
}
|
| 167 |
+
//! whether the wordrange overlaps with any translated word in this bitmap
|
| 168 |
+
bool Overlap(const Range &compare) const {
|
| 169 |
+
for (size_t pos = compare.GetStartPos() ; pos <= compare.GetEndPos() ; pos++) {
|
| 170 |
+
if (m_bitmap[pos])
|
| 171 |
+
return true;
|
| 172 |
+
}
|
| 173 |
+
return false;
|
| 174 |
+
}
|
| 175 |
+
//! number of elements
|
| 176 |
+
size_t GetSize() const {
|
| 177 |
+
return m_bitmap.size();
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
inline size_t GetEdgeToTheLeftOf(size_t l) const {
|
| 181 |
+
if (l == 0) return l;
|
| 182 |
+
while (l && !m_bitmap[l-1]) {
|
| 183 |
+
--l;
|
| 184 |
+
}
|
| 185 |
+
return l;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
inline size_t GetEdgeToTheRightOf(size_t r) const {
|
| 189 |
+
if (r+1 == m_bitmap.size()) return r;
|
| 190 |
+
return (
|
| 191 |
+
std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
|
| 192 |
+
m_bitmap.begin()
|
| 193 |
+
) - 1;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
//! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
|
| 198 |
+
WordsBitmapID GetID() const {
|
| 199 |
+
assert(m_bitmap.size() < (1<<16));
|
| 200 |
+
|
| 201 |
+
size_t start = GetFirstGapPos();
|
| 202 |
+
if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
|
| 203 |
+
|
| 204 |
+
size_t end = GetLastPos();
|
| 205 |
+
if (end == NOT_FOUND) end = 0; // nothing translated yet
|
| 206 |
+
|
| 207 |
+
assert(end < start || end-start <= 16);
|
| 208 |
+
WordsBitmapID id = 0;
|
| 209 |
+
for(size_t pos = end; pos > start; pos--) {
|
| 210 |
+
id = id*2 + (int) GetValue(pos);
|
| 211 |
+
}
|
| 212 |
+
return id + (1<<16) * start;
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
//! converts bitmap into an integer ID, with an additional span covered
|
| 216 |
+
WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
|
| 217 |
+
assert(m_bitmap.size() < (1<<16));
|
| 218 |
+
|
| 219 |
+
size_t start = GetFirstGapPos();
|
| 220 |
+
if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
|
| 221 |
+
|
| 222 |
+
size_t end = GetLastPos();
|
| 223 |
+
if (end == NOT_FOUND) end = 0; // nothing translated yet
|
| 224 |
+
|
| 225 |
+
if (start == startPos) start = endPos+1;
|
| 226 |
+
if (end < endPos) end = endPos;
|
| 227 |
+
|
| 228 |
+
assert(end < start || end-start <= 16);
|
| 229 |
+
WordsBitmapID id = 0;
|
| 230 |
+
for(size_t pos = end; pos > start; pos--) {
|
| 231 |
+
id = id*2;
|
| 232 |
+
if (GetValue(pos) || (startPos<=pos && pos<=endPos))
|
| 233 |
+
id++;
|
| 234 |
+
}
|
| 235 |
+
return id + (1<<16) * start;
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
// for unordered_set in stack
|
| 239 |
+
size_t hash() const;
|
| 240 |
+
bool operator==(const Bitmap& other) const;
|
| 241 |
+
bool operator!=(const Bitmap& other) const {
|
| 242 |
+
return !(*this == other);
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
TO_STRING();
|
| 246 |
+
};
|
| 247 |
+
|
| 248 |
+
}
|
| 249 |
+
#endif
|
mosesdecoder/moses/BitmapContainer.cpp
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <limits>
|
| 24 |
+
#include <utility>
|
| 25 |
+
|
| 26 |
+
#include "BitmapContainer.h"
|
| 27 |
+
#include "HypothesisStackCubePruning.h"
|
| 28 |
+
#include "moses/FF/DistortionScoreProducer.h"
|
| 29 |
+
#include "TranslationOptionList.h"
|
| 30 |
+
#include "Manager.h"
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
class HypothesisScoreOrdererNoDistortion
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
|
| 39 |
+
const float scoreA = hypoA->GetScore();
|
| 40 |
+
const float scoreB = hypoB->GetScore();
|
| 41 |
+
|
| 42 |
+
if (scoreA > scoreB) {
|
| 43 |
+
return true;
|
| 44 |
+
} else if (scoreA < scoreB) {
|
| 45 |
+
return false;
|
| 46 |
+
} else {
|
| 47 |
+
return hypoA < hypoB;
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
class HypothesisScoreOrdererWithDistortion
|
| 53 |
+
{
|
| 54 |
+
private:
|
| 55 |
+
bool m_deterministic;
|
| 56 |
+
|
| 57 |
+
public:
|
| 58 |
+
HypothesisScoreOrdererWithDistortion(const Range* transOptRange,
|
| 59 |
+
const bool deterministic = false)
|
| 60 |
+
: m_deterministic(deterministic)
|
| 61 |
+
, m_transOptRange(transOptRange) {
|
| 62 |
+
m_totalWeightDistortion = 0;
|
| 63 |
+
const StaticData &staticData = StaticData::Instance();
|
| 64 |
+
|
| 65 |
+
const std::vector<const DistortionScoreProducer*> &ffs = DistortionScoreProducer::GetDistortionFeatureFunctions();
|
| 66 |
+
std::vector<const DistortionScoreProducer*>::const_iterator iter;
|
| 67 |
+
for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
|
| 68 |
+
const DistortionScoreProducer *ff = *iter;
|
| 69 |
+
|
| 70 |
+
float weight =staticData.GetAllWeights().GetScoreForProducer(ff);
|
| 71 |
+
m_totalWeightDistortion += weight;
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
const Range* m_transOptRange;
|
| 76 |
+
float m_totalWeightDistortion;
|
| 77 |
+
|
| 78 |
+
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
|
| 79 |
+
UTIL_THROW_IF2(m_transOptRange == NULL, "Words range not set");
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
const float distortionScoreA = DistortionScoreProducer::CalculateDistortionScore(
|
| 83 |
+
*hypoA,
|
| 84 |
+
hypoA->GetCurrSourceWordsRange(),
|
| 85 |
+
*m_transOptRange,
|
| 86 |
+
hypoA->GetWordsBitmap().GetFirstGapPos()
|
| 87 |
+
);
|
| 88 |
+
const float distortionScoreB = DistortionScoreProducer::CalculateDistortionScore(
|
| 89 |
+
*hypoB,
|
| 90 |
+
hypoB->GetCurrSourceWordsRange(),
|
| 91 |
+
*m_transOptRange,
|
| 92 |
+
hypoB->GetWordsBitmap().GetFirstGapPos()
|
| 93 |
+
);
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
const float scoreA = hypoA->GetScore() + distortionScoreA * m_totalWeightDistortion;
|
| 97 |
+
const float scoreB = hypoB->GetScore() + distortionScoreB * m_totalWeightDistortion;
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
if (scoreA > scoreB) {
|
| 101 |
+
return true;
|
| 102 |
+
} else if (scoreA < scoreB) {
|
| 103 |
+
return false;
|
| 104 |
+
} else {
|
| 105 |
+
if (m_deterministic) {
|
| 106 |
+
// Equal scores: break ties by comparing target phrases
|
| 107 |
+
return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
|
| 108 |
+
}
|
| 109 |
+
// Fallback: non-deterministic sort
|
| 110 |
+
return hypoA < hypoB;
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 117 |
+
// BackwardsEdge Code
|
| 118 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 119 |
+
|
| 120 |
+
BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
| 121 |
+
, BitmapContainer &parent
|
| 122 |
+
, const TranslationOptionList &translations
|
| 123 |
+
, const SquareMatrix &estimatedScores,
|
| 124 |
+
const InputType& itype,
|
| 125 |
+
const bool deterministic)
|
| 126 |
+
: m_initialized(false)
|
| 127 |
+
, m_prevBitmapContainer(prevBitmapContainer)
|
| 128 |
+
, m_parent(parent)
|
| 129 |
+
, m_translations(translations)
|
| 130 |
+
, m_estimatedScores(estimatedScores)
|
| 131 |
+
, m_deterministic(deterministic)
|
| 132 |
+
, m_seenPosition()
|
| 133 |
+
{
|
| 134 |
+
|
| 135 |
+
// If either dimension is empty, we haven't got anything to do.
|
| 136 |
+
if(m_prevBitmapContainer.GetHypotheses().size() == 0 || m_translations.size() == 0) {
|
| 137 |
+
VERBOSE(3, "Empty cube on BackwardsEdge" << std::endl);
|
| 138 |
+
return;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
// Fetch the things we need for distortion cost computation.
|
| 142 |
+
// int maxDistortion = StaticData::Instance().GetMaxDistortion();
|
| 143 |
+
int maxDistortion = itype.options()->reordering.max_distortion;
|
| 144 |
+
|
| 145 |
+
if (maxDistortion == -1) {
|
| 146 |
+
for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter) {
|
| 147 |
+
m_hypotheses.push_back(*iter);
|
| 148 |
+
}
|
| 149 |
+
return;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
const Range &transOptRange = translations.Get(0)->GetSourceWordsRange();
|
| 153 |
+
|
| 154 |
+
HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
|
| 155 |
+
HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
|
| 156 |
+
|
| 157 |
+
while (iterHypo != iterEnd) {
|
| 158 |
+
const Hypothesis &hypo = **iterHypo;
|
| 159 |
+
// Special case: If this is the first hypothesis used to seed the search,
|
| 160 |
+
// it doesn't have a valid range, and we create the hypothesis, if the
|
| 161 |
+
// initial position is not further into the sentence than the distortion limit.
|
| 162 |
+
if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0) {
|
| 163 |
+
if ((int)transOptRange.GetStartPos() <= maxDistortion)
|
| 164 |
+
m_hypotheses.push_back(&hypo);
|
| 165 |
+
} else {
|
| 166 |
+
int distortionDistance = itype.ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
|
| 167 |
+
, transOptRange);
|
| 168 |
+
|
| 169 |
+
if (distortionDistance <= maxDistortion)
|
| 170 |
+
m_hypotheses.push_back(&hypo);
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
++iterHypo;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
if (m_translations.size() > 1) {
|
| 177 |
+
UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
|
| 178 |
+
"Non-monotonic future score: "
|
| 179 |
+
<< m_translations.Get(0)->GetFutureScore() << " vs. "
|
| 180 |
+
<< m_translations.Get(1)->GetFutureScore());
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
if (m_hypotheses.size() > 1) {
|
| 184 |
+
UTIL_THROW_IF2(m_hypotheses[0]->GetFutureScore() < m_hypotheses[1]->GetFutureScore(),
|
| 185 |
+
"Non-monotonic total score"
|
| 186 |
+
<< m_hypotheses[0]->GetFutureScore() << " vs. "
|
| 187 |
+
<< m_hypotheses[1]->GetFutureScore());
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
HypothesisScoreOrdererWithDistortion orderer (&transOptRange, m_deterministic);
|
| 191 |
+
std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
|
| 192 |
+
|
| 193 |
+
// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
BackwardsEdge::~BackwardsEdge()
|
| 197 |
+
{
|
| 198 |
+
m_seenPosition.clear();
|
| 199 |
+
m_hypotheses.clear();
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
void
|
| 204 |
+
BackwardsEdge::Initialize()
|
| 205 |
+
{
|
| 206 |
+
if(m_hypotheses.size() == 0 || m_translations.size() == 0) {
|
| 207 |
+
m_initialized = true;
|
| 208 |
+
return;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
const Bitmap &bm = m_hypotheses[0]->GetWordsBitmap();
|
| 212 |
+
const Range &newRange = m_translations.Get(0)->GetSourceWordsRange();
|
| 213 |
+
m_estimatedScore = m_estimatedScores.CalcEstimatedScore(bm, newRange.GetStartPos(), newRange.GetEndPos());
|
| 214 |
+
|
| 215 |
+
Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0));
|
| 216 |
+
m_parent.Enqueue(0, 0, expanded, this);
|
| 217 |
+
SetSeenPosition(0, 0);
|
| 218 |
+
m_initialized = true;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt)
|
| 222 |
+
{
|
| 223 |
+
// create hypothesis and calculate all its scores
|
| 224 |
+
IFVERBOSE(2) {
|
| 225 |
+
hypothesis.GetManager().GetSentenceStats().StartTimeBuildHyp();
|
| 226 |
+
}
|
| 227 |
+
const Bitmap &bitmap = m_parent.GetWordsBitmap();
|
| 228 |
+
Hypothesis *newHypo = new Hypothesis(hypothesis, transOpt, bitmap, hypothesis.GetManager().GetNextHypoId());
|
| 229 |
+
IFVERBOSE(2) {
|
| 230 |
+
hypothesis.GetManager().GetSentenceStats().StopTimeBuildHyp();
|
| 231 |
+
}
|
| 232 |
+
newHypo->EvaluateWhenApplied(m_estimatedScore);
|
| 233 |
+
|
| 234 |
+
return newHypo;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
bool
|
| 238 |
+
BackwardsEdge::SeenPosition(const size_t x, const size_t y)
|
| 239 |
+
{
|
| 240 |
+
boost::unordered_set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
|
| 241 |
+
return (iter != m_seenPosition.end());
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
void
|
| 245 |
+
BackwardsEdge::SetSeenPosition(const size_t x, const size_t y)
|
| 246 |
+
{
|
| 247 |
+
UTIL_THROW_IF2(x >= (1<<17), "Error");
|
| 248 |
+
UTIL_THROW_IF2(y >= (1<<17), "Error");
|
| 249 |
+
|
| 250 |
+
m_seenPosition.insert((x<<16) + y);
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
bool
|
| 255 |
+
BackwardsEdge::GetInitialized()
|
| 256 |
+
{
|
| 257 |
+
return m_initialized;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
const BitmapContainer&
|
| 261 |
+
BackwardsEdge::GetBitmapContainer() const
|
| 262 |
+
{
|
| 263 |
+
return m_prevBitmapContainer;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
void
|
| 267 |
+
BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
|
| 268 |
+
{
|
| 269 |
+
Hypothesis *newHypo;
|
| 270 |
+
|
| 271 |
+
if(y + 1 < m_translations.size() && !SeenPosition(x, y + 1)) {
|
| 272 |
+
SetSeenPosition(x, y + 1);
|
| 273 |
+
newHypo = CreateHypothesis(*m_hypotheses[x], *m_translations.Get(y + 1));
|
| 274 |
+
if(newHypo != NULL) {
|
| 275 |
+
m_parent.Enqueue(x, y + 1, newHypo, (BackwardsEdge*)this);
|
| 276 |
+
}
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
if(x + 1 < m_hypotheses.size() && !SeenPosition(x + 1, y)) {
|
| 280 |
+
SetSeenPosition(x + 1, y);
|
| 281 |
+
newHypo = CreateHypothesis(*m_hypotheses[x + 1], *m_translations.Get(y));
|
| 282 |
+
if(newHypo != NULL) {
|
| 283 |
+
m_parent.Enqueue(x + 1, y, newHypo, (BackwardsEdge*)this);
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 290 |
+
// BitmapContainer Code
|
| 291 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 292 |
+
|
| 293 |
+
BitmapContainer::BitmapContainer(const Bitmap &bitmap
|
| 294 |
+
, HypothesisStackCubePruning &stack
|
| 295 |
+
, bool deterministic)
|
| 296 |
+
: m_bitmap(bitmap)
|
| 297 |
+
, m_stack(stack)
|
| 298 |
+
, m_numStackInsertions(0)
|
| 299 |
+
, m_deterministic(deterministic)
|
| 300 |
+
{
|
| 301 |
+
m_hypotheses = HypothesisSet();
|
| 302 |
+
m_edges = BackwardsEdgeSet();
|
| 303 |
+
m_queue = HypothesisQueue();
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
BitmapContainer::~BitmapContainer()
|
| 307 |
+
{
|
| 308 |
+
// As we have created the square position objects we clean up now.
|
| 309 |
+
|
| 310 |
+
while (!m_queue.empty()) {
|
| 311 |
+
HypothesisQueueItem *item = m_queue.top();
|
| 312 |
+
m_queue.pop();
|
| 313 |
+
|
| 314 |
+
delete item->GetHypothesis();
|
| 315 |
+
delete item;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
// Delete all edges.
|
| 319 |
+
RemoveAllInColl(m_edges);
|
| 320 |
+
|
| 321 |
+
m_hypotheses.clear();
|
| 322 |
+
m_edges.clear();
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
void
|
| 327 |
+
BitmapContainer::Enqueue(int hypothesis_pos
|
| 328 |
+
, int translation_pos
|
| 329 |
+
, Hypothesis *hypothesis
|
| 330 |
+
, BackwardsEdge *edge)
|
| 331 |
+
{
|
| 332 |
+
// Only supply target phrase if running deterministic search mode
|
| 333 |
+
const TargetPhrase *target_phrase = m_deterministic ? &(hypothesis->GetCurrTargetPhrase()) : NULL;
|
| 334 |
+
HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
|
| 335 |
+
, translation_pos
|
| 336 |
+
, hypothesis
|
| 337 |
+
, edge
|
| 338 |
+
, target_phrase);
|
| 339 |
+
IFVERBOSE(2) {
|
| 340 |
+
item->GetHypothesis()->GetManager().GetSentenceStats().StartTimeManageCubes();
|
| 341 |
+
}
|
| 342 |
+
m_queue.push(item);
|
| 343 |
+
IFVERBOSE(2) {
|
| 344 |
+
item->GetHypothesis()->GetManager().GetSentenceStats().StopTimeManageCubes();
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
HypothesisQueueItem*
|
| 349 |
+
BitmapContainer::Dequeue(bool keepValue)
|
| 350 |
+
{
|
| 351 |
+
if (!m_queue.empty()) {
|
| 352 |
+
HypothesisQueueItem *item = m_queue.top();
|
| 353 |
+
|
| 354 |
+
if (!keepValue) {
|
| 355 |
+
m_queue.pop();
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
return item;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
return NULL;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
HypothesisQueueItem*
|
| 365 |
+
BitmapContainer::Top() const
|
| 366 |
+
{
|
| 367 |
+
return m_queue.top();
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
size_t
|
| 371 |
+
BitmapContainer::Size()
|
| 372 |
+
{
|
| 373 |
+
return m_queue.size();
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
bool
|
| 377 |
+
BitmapContainer::Empty() const
|
| 378 |
+
{
|
| 379 |
+
return m_queue.empty();
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
const HypothesisSet&
|
| 383 |
+
BitmapContainer::GetHypotheses() const
|
| 384 |
+
{
|
| 385 |
+
return m_hypotheses;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
size_t
|
| 389 |
+
BitmapContainer::GetHypothesesSize() const
|
| 390 |
+
{
|
| 391 |
+
return m_hypotheses.size();
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
const BackwardsEdgeSet&
|
| 395 |
+
BitmapContainer::GetBackwardsEdges()
|
| 396 |
+
{
|
| 397 |
+
return m_edges;
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
void
|
| 401 |
+
BitmapContainer::AddHypothesis(Hypothesis *hypothesis)
|
| 402 |
+
{
|
| 403 |
+
bool itemExists = false;
|
| 404 |
+
HypothesisSet::const_iterator iter = m_hypotheses.begin();
|
| 405 |
+
HypothesisSet::const_iterator iterEnd = m_hypotheses.end();
|
| 406 |
+
|
| 407 |
+
// cfedermann: do we actually need this check?
|
| 408 |
+
while (iter != iterEnd) {
|
| 409 |
+
if (*iter == hypothesis) {
|
| 410 |
+
itemExists = true;
|
| 411 |
+
break;
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
++iter;
|
| 415 |
+
}
|
| 416 |
+
UTIL_THROW_IF2(itemExists, "Duplicate hypotheses");
|
| 417 |
+
m_hypotheses.push_back(hypothesis);
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
void
|
| 421 |
+
BitmapContainer::AddBackwardsEdge(BackwardsEdge *edge)
|
| 422 |
+
{
|
| 423 |
+
m_edges.insert(edge);
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
void
|
| 427 |
+
BitmapContainer::InitializeEdges()
|
| 428 |
+
{
|
| 429 |
+
BackwardsEdgeSet::iterator iter = m_edges.begin();
|
| 430 |
+
BackwardsEdgeSet::iterator iterEnd = m_edges.end();
|
| 431 |
+
|
| 432 |
+
while (iter != iterEnd) {
|
| 433 |
+
BackwardsEdge *edge = *iter;
|
| 434 |
+
edge->Initialize();
|
| 435 |
+
|
| 436 |
+
++iter;
|
| 437 |
+
}
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
void
|
| 441 |
+
BitmapContainer::EnsureMinStackHyps(const size_t minNumHyps)
|
| 442 |
+
{
|
| 443 |
+
while ((!Empty()) && m_numStackInsertions < minNumHyps) {
|
| 444 |
+
ProcessBestHypothesis();
|
| 445 |
+
}
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
void
|
| 449 |
+
BitmapContainer::ProcessBestHypothesis()
|
| 450 |
+
{
|
| 451 |
+
if (m_queue.empty()) {
|
| 452 |
+
return;
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
// Get the currently best hypothesis from the queue.
|
| 456 |
+
HypothesisQueueItem *item = Dequeue();
|
| 457 |
+
|
| 458 |
+
// If the priority queue is exhausted, we are done and should have exited
|
| 459 |
+
UTIL_THROW_IF2(item == NULL, "Null object");
|
| 460 |
+
|
| 461 |
+
// check we are pulling things off of priority queue in right order
|
| 462 |
+
if (!Empty()) {
|
| 463 |
+
HypothesisQueueItem *check = Dequeue(true);
|
| 464 |
+
UTIL_THROW_IF2(item->GetHypothesis()->GetFutureScore() < check->GetHypothesis()->GetFutureScore(),
|
| 465 |
+
"Non-monotonic total score: "
|
| 466 |
+
<< item->GetHypothesis()->GetFutureScore() << " vs. "
|
| 467 |
+
<< check->GetHypothesis()->GetFutureScore());
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
// Logging for the criminally insane
|
| 471 |
+
IFVERBOSE(3) {
|
| 472 |
+
item->GetHypothesis()->PrintHypothesis();
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
// Add best hypothesis to hypothesis stack.
|
| 476 |
+
const bool newstackentry = m_stack.AddPrune(item->GetHypothesis());
|
| 477 |
+
if (newstackentry)
|
| 478 |
+
m_numStackInsertions++;
|
| 479 |
+
|
| 480 |
+
IFVERBOSE(3) {
|
| 481 |
+
TRACE_ERR("new stack entry flag is " << newstackentry << std::endl);
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
// Create new hypotheses for the two successors of the hypothesis just added.
|
| 485 |
+
item->GetBackwardsEdge()->PushSuccessors(item->GetHypothesisPos(), item->GetTranslationPos());
|
| 486 |
+
|
| 487 |
+
// We are done with the queue item, we delete it.
|
| 488 |
+
delete item;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
void
|
| 492 |
+
BitmapContainer::SortHypotheses()
|
| 493 |
+
{
|
| 494 |
+
std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer(m_deterministic));
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
}
|
| 498 |
+
|
mosesdecoder/moses/BitmapContainer.h
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_BitmapContainer_h
|
| 23 |
+
#define moses_BitmapContainer_h
|
| 24 |
+
|
| 25 |
+
#include <queue>
|
| 26 |
+
#include <set>
|
| 27 |
+
#include <vector>
|
| 28 |
+
|
| 29 |
+
#include "Hypothesis.h"
|
| 30 |
+
#include "HypothesisStackCubePruning.h"
|
| 31 |
+
#include "SquareMatrix.h"
|
| 32 |
+
#include "TranslationOption.h"
|
| 33 |
+
#include "TypeDef.h"
|
| 34 |
+
#include "Bitmap.h"
|
| 35 |
+
|
| 36 |
+
#include <boost/unordered_set.hpp>
|
| 37 |
+
|
| 38 |
+
namespace Moses
|
| 39 |
+
{
|
| 40 |
+
|
| 41 |
+
class BitmapContainer;
|
| 42 |
+
class BackwardsEdge;
|
| 43 |
+
class Hypothesis;
|
| 44 |
+
class HypothesisStackCubePruning;
|
| 45 |
+
class HypothesisQueueItem;
|
| 46 |
+
class QueueItemOrderer;
|
| 47 |
+
class TranslationOptionList;
|
| 48 |
+
|
| 49 |
+
typedef std::vector< Hypothesis* > HypothesisSet;
|
| 50 |
+
typedef std::set< BackwardsEdge* > BackwardsEdgeSet;
|
| 51 |
+
typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueItem* >, QueueItemOrderer> HypothesisQueue;
|
| 52 |
+
|
| 53 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 54 |
+
// Hypothesis Priority Queue Code
|
| 55 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 56 |
+
|
| 57 |
+
//! 1 item in the priority queue for stack decoding (phrase-based)
|
| 58 |
+
class HypothesisQueueItem
|
| 59 |
+
{
|
| 60 |
+
private:
|
| 61 |
+
size_t m_hypothesis_pos, m_translation_pos;
|
| 62 |
+
Hypothesis *m_hypothesis;
|
| 63 |
+
BackwardsEdge *m_edge;
|
| 64 |
+
boost::shared_ptr<TargetPhrase> m_target_phrase;
|
| 65 |
+
|
| 66 |
+
HypothesisQueueItem();
|
| 67 |
+
|
| 68 |
+
public:
|
| 69 |
+
HypothesisQueueItem(const size_t hypothesis_pos
|
| 70 |
+
, const size_t translation_pos
|
| 71 |
+
, Hypothesis *hypothesis
|
| 72 |
+
, BackwardsEdge *edge
|
| 73 |
+
, const TargetPhrase *target_phrase = NULL)
|
| 74 |
+
: m_hypothesis_pos(hypothesis_pos)
|
| 75 |
+
, m_translation_pos(translation_pos)
|
| 76 |
+
, m_hypothesis(hypothesis)
|
| 77 |
+
, m_edge(edge) {
|
| 78 |
+
if (target_phrase != NULL) {
|
| 79 |
+
m_target_phrase.reset(new TargetPhrase(*target_phrase));
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
~HypothesisQueueItem() {
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
int GetHypothesisPos() {
|
| 87 |
+
return m_hypothesis_pos;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
int GetTranslationPos() {
|
| 91 |
+
return m_translation_pos;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
Hypothesis *GetHypothesis() {
|
| 95 |
+
return m_hypothesis;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
BackwardsEdge *GetBackwardsEdge() {
|
| 99 |
+
return m_edge;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
boost::shared_ptr<TargetPhrase> GetTargetPhrase() {
|
| 103 |
+
return m_target_phrase;
|
| 104 |
+
}
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
//! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
|
| 108 |
+
class QueueItemOrderer
|
| 109 |
+
{
|
| 110 |
+
public:
|
| 111 |
+
bool operator()(HypothesisQueueItem* itemA, HypothesisQueueItem* itemB) const {
|
| 112 |
+
float scoreA = itemA->GetHypothesis()->GetFutureScore();
|
| 113 |
+
float scoreB = itemB->GetHypothesis()->GetFutureScore();
|
| 114 |
+
|
| 115 |
+
if (scoreA < scoreB) {
|
| 116 |
+
return true;
|
| 117 |
+
} else if (scoreA > scoreB) {
|
| 118 |
+
return false;
|
| 119 |
+
} else {
|
| 120 |
+
// Equal scores: break ties by comparing target phrases (if they exist)
|
| 121 |
+
// *Important*: these are pointers to copies of the target phrases from the
|
| 122 |
+
// hypotheses. This class is used to keep priority queues ordered in the
|
| 123 |
+
// background, so comparisons made as those data structures are cleaned up
|
| 124 |
+
// may occur *after* the target phrases in hypotheses have been cleaned up,
|
| 125 |
+
// leading to segfaults if relying on hypotheses to provide target phrases.
|
| 126 |
+
boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
|
| 127 |
+
boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
|
| 128 |
+
if (!phrA || !phrB) {
|
| 129 |
+
// Fallback: scoreA < scoreB == false, non-deterministic sort
|
| 130 |
+
return false;
|
| 131 |
+
}
|
| 132 |
+
return (phrA->Compare(*phrB) > 0);
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
};
|
| 136 |
+
|
| 137 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 138 |
+
// Hypothesis Orderer Code
|
| 139 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 140 |
+
// Allows to compare two Hypothesis objects by the corresponding scores.
|
| 141 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 142 |
+
|
| 143 |
+
class HypothesisScoreOrderer
|
| 144 |
+
{
|
| 145 |
+
private:
|
| 146 |
+
bool m_deterministic;
|
| 147 |
+
|
| 148 |
+
public:
|
| 149 |
+
HypothesisScoreOrderer(const bool deterministic = false)
|
| 150 |
+
: m_deterministic(deterministic) {}
|
| 151 |
+
|
| 152 |
+
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
|
| 153 |
+
|
| 154 |
+
float scoreA = hypoA->GetFutureScore();
|
| 155 |
+
float scoreB = hypoB->GetFutureScore();
|
| 156 |
+
|
| 157 |
+
if (scoreA > scoreB) {
|
| 158 |
+
return true;
|
| 159 |
+
} else if (scoreA < scoreB) {
|
| 160 |
+
return false;
|
| 161 |
+
} else {
|
| 162 |
+
if (m_deterministic) {
|
| 163 |
+
// Equal scores: break ties by comparing target phrases
|
| 164 |
+
return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
|
| 165 |
+
}
|
| 166 |
+
// Fallback: scoreA > scoreB == false, non-deterministic sort
|
| 167 |
+
return false;
|
| 168 |
+
}
|
| 169 |
+
}
|
| 170 |
+
};
|
| 171 |
+
|
| 172 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 173 |
+
// Backwards Edge Code
|
| 174 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 175 |
+
// Encodes an edge pointing to a BitmapContainer.
|
| 176 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 177 |
+
|
| 178 |
+
class BackwardsEdge
|
| 179 |
+
{
|
| 180 |
+
private:
|
| 181 |
+
friend class BitmapContainer;
|
| 182 |
+
bool m_initialized;
|
| 183 |
+
|
| 184 |
+
const BitmapContainer &m_prevBitmapContainer;
|
| 185 |
+
BitmapContainer &m_parent;
|
| 186 |
+
const TranslationOptionList &m_translations;
|
| 187 |
+
const SquareMatrix &m_estimatedScores;
|
| 188 |
+
float m_estimatedScore;
|
| 189 |
+
|
| 190 |
+
bool m_deterministic;
|
| 191 |
+
|
| 192 |
+
std::vector< const Hypothesis* > m_hypotheses;
|
| 193 |
+
boost::unordered_set< int > m_seenPosition;
|
| 194 |
+
|
| 195 |
+
// We don't want to instantiate "empty" objects.
|
| 196 |
+
BackwardsEdge();
|
| 197 |
+
|
| 198 |
+
Hypothesis *CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt);
|
| 199 |
+
bool SeenPosition(const size_t x, const size_t y);
|
| 200 |
+
void SetSeenPosition(const size_t x, const size_t y);
|
| 201 |
+
|
| 202 |
+
protected:
|
| 203 |
+
void Initialize();
|
| 204 |
+
|
| 205 |
+
public:
|
| 206 |
+
BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
| 207 |
+
, BitmapContainer &parent
|
| 208 |
+
, const TranslationOptionList &translations
|
| 209 |
+
, const SquareMatrix &estimatedScores
|
| 210 |
+
, const InputType& source
|
| 211 |
+
, const bool deterministic = false);
|
| 212 |
+
~BackwardsEdge();
|
| 213 |
+
|
| 214 |
+
bool GetInitialized();
|
| 215 |
+
const BitmapContainer &GetBitmapContainer() const;
|
| 216 |
+
int GetDistortionPenalty();
|
| 217 |
+
void PushSuccessors(const size_t x, const size_t y);
|
| 218 |
+
};
|
| 219 |
+
|
| 220 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 221 |
+
// Bitmap Container Code
|
| 222 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 223 |
+
// A BitmapContainer encodes an ordered set of hypotheses and a set of edges
|
| 224 |
+
// pointing to the "generating" BitmapContainers. It also stores a priority
|
| 225 |
+
// queue that contains expanded hypotheses from the connected edges.
|
| 226 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 227 |
+
|
| 228 |
+
class BitmapContainer
|
| 229 |
+
{
|
| 230 |
+
private:
|
| 231 |
+
const Bitmap &m_bitmap;
|
| 232 |
+
HypothesisStackCubePruning &m_stack;
|
| 233 |
+
HypothesisSet m_hypotheses;
|
| 234 |
+
BackwardsEdgeSet m_edges;
|
| 235 |
+
HypothesisQueue m_queue;
|
| 236 |
+
size_t m_numStackInsertions;
|
| 237 |
+
bool m_deterministic;
|
| 238 |
+
|
| 239 |
+
// We always require a corresponding bitmap to be supplied.
|
| 240 |
+
BitmapContainer();
|
| 241 |
+
BitmapContainer(const BitmapContainer &);
|
| 242 |
+
public:
|
| 243 |
+
BitmapContainer(const Bitmap &bitmap
|
| 244 |
+
, HypothesisStackCubePruning &stack
|
| 245 |
+
, bool deterministic = false);
|
| 246 |
+
|
| 247 |
+
// The destructor will also delete all the edges that are
|
| 248 |
+
// connected to this BitmapContainer.
|
| 249 |
+
~BitmapContainer();
|
| 250 |
+
|
| 251 |
+
void Enqueue(int hypothesis_pos, int translation_pos, Hypothesis *hypothesis, BackwardsEdge *edge);
|
| 252 |
+
HypothesisQueueItem *Dequeue(bool keepValue=false);
|
| 253 |
+
HypothesisQueueItem *Top() const;
|
| 254 |
+
size_t Size();
|
| 255 |
+
bool Empty() const;
|
| 256 |
+
|
| 257 |
+
const Bitmap &GetWordsBitmap() const {
|
| 258 |
+
return m_bitmap;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
const HypothesisSet &GetHypotheses() const;
|
| 262 |
+
size_t GetHypothesesSize() const;
|
| 263 |
+
const BackwardsEdgeSet &GetBackwardsEdges();
|
| 264 |
+
|
| 265 |
+
void InitializeEdges();
|
| 266 |
+
void ProcessBestHypothesis();
|
| 267 |
+
void EnsureMinStackHyps(const size_t minNumHyps);
|
| 268 |
+
void AddHypothesis(Hypothesis *hypothesis);
|
| 269 |
+
void AddBackwardsEdge(BackwardsEdge *edge);
|
| 270 |
+
void SortHypotheses();
|
| 271 |
+
};
|
| 272 |
+
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
#endif
|
mosesdecoder/moses/Bitmaps.cpp
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <boost/foreach.hpp>
|
| 2 |
+
#include "Bitmaps.h"
|
| 3 |
+
#include "Util.h"
|
| 4 |
+
|
| 5 |
+
using namespace std;
|
| 6 |
+
|
| 7 |
+
namespace Moses
|
| 8 |
+
{
|
| 9 |
+
Bitmaps::Bitmaps(size_t inputSize, const std::vector<bool> &initSourceCompleted)
|
| 10 |
+
{
|
| 11 |
+
m_initBitmap = new Bitmap(inputSize, initSourceCompleted);
|
| 12 |
+
m_coll[m_initBitmap];
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
Bitmaps::~Bitmaps()
|
| 16 |
+
{
|
| 17 |
+
BOOST_FOREACH (const Coll::value_type& myPair, m_coll) {
|
| 18 |
+
const Bitmap *bm = myPair.first;
|
| 19 |
+
delete bm;
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range)
|
| 24 |
+
{
|
| 25 |
+
Bitmap *newBM = new Bitmap(bm, range);
|
| 26 |
+
|
| 27 |
+
Coll::const_iterator iter = m_coll.find(newBM);
|
| 28 |
+
if (iter == m_coll.end()) {
|
| 29 |
+
m_coll[newBM] = NextBitmaps();
|
| 30 |
+
return *newBM;
|
| 31 |
+
} else {
|
| 32 |
+
delete newBM;
|
| 33 |
+
return *iter->first;
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
const Bitmap &Bitmaps::GetBitmap(const Bitmap &bm, const Range &range)
|
| 38 |
+
{
|
| 39 |
+
Coll::iterator iter = m_coll.find(&bm);
|
| 40 |
+
assert(iter != m_coll.end());
|
| 41 |
+
|
| 42 |
+
const Bitmap *newBM;
|
| 43 |
+
NextBitmaps &next = iter->second;
|
| 44 |
+
NextBitmaps::const_iterator iterNext = next.find(range);
|
| 45 |
+
if (iterNext == next.end()) {
|
| 46 |
+
// not seen the link yet.
|
| 47 |
+
newBM = &GetNextBitmap(bm, range);
|
| 48 |
+
next[range] = newBM;
|
| 49 |
+
} else {
|
| 50 |
+
// link exist
|
| 51 |
+
//std::cerr << "link exists" << endl;
|
| 52 |
+
newBM = iterNext->second;
|
| 53 |
+
}
|
| 54 |
+
return *newBM;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
}
|
| 58 |
+
|
mosesdecoder/moses/Bitmaps.h
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <boost/unordered_set.hpp>
|
| 4 |
+
#include <boost/unordered_map.hpp>
|
| 5 |
+
#include <set>
|
| 6 |
+
#include "Bitmap.h"
|
| 7 |
+
#include "Util.h"
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
|
| 12 |
+
class Bitmaps
|
| 13 |
+
{
|
| 14 |
+
typedef boost::unordered_map<Range, const Bitmap*> NextBitmaps;
|
| 15 |
+
typedef boost::unordered_map<const Bitmap*, NextBitmaps, UnorderedComparer<Bitmap>, UnorderedComparer<Bitmap> > Coll;
|
| 16 |
+
//typedef std::set<const Bitmap*, OrderedComparer<Bitmap> > Coll;
|
| 17 |
+
Coll m_coll;
|
| 18 |
+
const Bitmap *m_initBitmap;
|
| 19 |
+
|
| 20 |
+
const Bitmap &GetNextBitmap(const Bitmap &bm, const Range &range);
|
| 21 |
+
public:
|
| 22 |
+
Bitmaps(size_t inputSize, const std::vector<bool> &initSourceCompleted);
|
| 23 |
+
virtual ~Bitmaps();
|
| 24 |
+
|
| 25 |
+
const Bitmap &GetInitialBitmap() const {
|
| 26 |
+
return *m_initBitmap;
|
| 27 |
+
}
|
| 28 |
+
const Bitmap &GetBitmap(const Bitmap &bm, const Range &range);
|
| 29 |
+
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
}
|
mosesdecoder/moses/CMakeLists.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
project(moses)
|
| 2 |
+
|
| 3 |
+
FILE(GLOB source_moses *.cpp)
|
| 4 |
+
FILE(GLOB source_moses_ff FF/*.cpp)
|
| 5 |
+
FILE(GLOB source_moses_ff_lexicalReordering FF/LexicalReordering/*.cpp)
|
| 6 |
+
FILE(GLOB source_moses_ff_osm FF/OSM-Feature/*.cpp)
|
| 7 |
+
FILE(GLOB source_moses_lm LM/*.cpp)
|
| 8 |
+
FILE(GLOB source_moses_tm TranslationModel/*.cpp)
|
| 9 |
+
FILE(GLOB source_moses_tm_compactPT TranslationModel/CompactPT/*.cpp)
|
| 10 |
+
FILE(GLOB source_moses_tm_cky TranslationModel/CKYPlusParser/*.cpp)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
add_library(biconcor ${biconcor_source})
|
mosesdecoder/moses/ChartCell.cpp
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include "ChartCell.h"
|
| 24 |
+
#include "ChartCellCollection.h"
|
| 25 |
+
#include "HypergraphOutput.h"
|
| 26 |
+
#include "RuleCubeQueue.h"
|
| 27 |
+
#include "RuleCube.h"
|
| 28 |
+
#include "Range.h"
|
| 29 |
+
#include "Util.h"
|
| 30 |
+
#include "ChartTranslationOptions.h"
|
| 31 |
+
#include "ChartTranslationOptionList.h"
|
| 32 |
+
#include "ChartManager.h"
|
| 33 |
+
#include "util/exception.hh"
|
| 34 |
+
|
| 35 |
+
using namespace std;
|
| 36 |
+
|
| 37 |
+
namespace Moses
|
| 38 |
+
{
|
| 39 |
+
|
| 40 |
+
ChartCellBase::ChartCellBase(size_t startPos, size_t endPos) :
|
| 41 |
+
m_coverage(startPos, endPos),
|
| 42 |
+
m_targetLabelSet(m_coverage) {}
|
| 43 |
+
|
| 44 |
+
ChartCellBase::~ChartCellBase() {}
|
| 45 |
+
|
| 46 |
+
/** Constructor
|
| 47 |
+
* \param startPos endPos range of this cell
|
| 48 |
+
* \param manager pointer back to the manager
|
| 49 |
+
*/
|
| 50 |
+
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
|
| 51 |
+
ChartCellBase(startPos, endPos), m_manager(manager)
|
| 52 |
+
{
|
| 53 |
+
m_nBestIsEnabled = manager.options()->nbest.enabled;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
ChartCell::~ChartCell() {}
|
| 57 |
+
|
| 58 |
+
/** Add the given hypothesis to the cell.
|
| 59 |
+
* Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
|
| 60 |
+
* This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
|
| 61 |
+
* \param hypo Hypothesis to be added
|
| 62 |
+
*/
|
| 63 |
+
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
|
| 64 |
+
{
|
| 65 |
+
const Word &targetLHS = hypo->GetTargetLHS();
|
| 66 |
+
MapType::iterator m = m_hypoColl.find(targetLHS);
|
| 67 |
+
if (m == m_hypoColl.end()) {
|
| 68 |
+
std::pair<Word, ChartHypothesisCollection>
|
| 69 |
+
e(targetLHS, ChartHypothesisCollection(*m_manager.options()));
|
| 70 |
+
m = m_hypoColl.insert(e).first;
|
| 71 |
+
}
|
| 72 |
+
return m->second.AddHypothesis(hypo, m_manager);
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
/** Prune each collection in this cell to a particular size */
|
| 76 |
+
void ChartCell::PruneToSize()
|
| 77 |
+
{
|
| 78 |
+
MapType::iterator iter;
|
| 79 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 80 |
+
ChartHypothesisCollection &coll = iter->second;
|
| 81 |
+
coll.PruneToSize(m_manager);
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/** Decoding at span level: fill chart cell with hypotheses
|
| 86 |
+
* (implementation of cube pruning)
|
| 87 |
+
* \param transOptList list of applicable rules to create hypotheses for the cell
|
| 88 |
+
* \param allChartCells entire chart - needed to look up underlying hypotheses
|
| 89 |
+
*/
|
| 90 |
+
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
| 91 |
+
, const ChartCellCollection &allChartCells)
|
| 92 |
+
{
|
| 93 |
+
// priority queue for applicable rules with selected hypotheses
|
| 94 |
+
RuleCubeQueue queue(m_manager);
|
| 95 |
+
|
| 96 |
+
// add all trans opt into queue. using only 1st child node.
|
| 97 |
+
for (size_t i = 0; i < transOptList.GetSize(); ++i) {
|
| 98 |
+
const ChartTranslationOptions &transOpt = transOptList.Get(i);
|
| 99 |
+
RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
|
| 100 |
+
queue.Add(ruleCube);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// pluck things out of queue and add to hypo collection
|
| 104 |
+
const size_t popLimit = m_manager.options()->cube.pop_limit;
|
| 105 |
+
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
| 106 |
+
ChartHypothesis *hypo = queue.Pop();
|
| 107 |
+
AddHypothesis(hypo);
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
//! call SortHypotheses() in each hypo collection in this cell
|
| 112 |
+
void ChartCell::SortHypotheses()
|
| 113 |
+
{
|
| 114 |
+
UTIL_THROW_IF2(!m_targetLabelSet.Empty(), "Already sorted");
|
| 115 |
+
|
| 116 |
+
MapType::iterator iter;
|
| 117 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 118 |
+
ChartHypothesisCollection &coll = iter->second;
|
| 119 |
+
|
| 120 |
+
if (coll.GetSize()) {
|
| 121 |
+
coll.SortHypotheses();
|
| 122 |
+
m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
/** Return the highest scoring hypothesis out of all the hypo collection in this cell */
|
| 128 |
+
const ChartHypothesis *ChartCell::GetBestHypothesis() const
|
| 129 |
+
{
|
| 130 |
+
const ChartHypothesis *ret = NULL;
|
| 131 |
+
float bestScore = -std::numeric_limits<float>::infinity();
|
| 132 |
+
|
| 133 |
+
MapType::const_iterator iter;
|
| 134 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 135 |
+
const HypoList &sortedList = iter->second.GetSortedHypotheses();
|
| 136 |
+
if (sortedList.size() > 0) {
|
| 137 |
+
const ChartHypothesis *hypo = sortedList[0];
|
| 138 |
+
if (hypo->GetFutureScore() > bestScore) {
|
| 139 |
+
bestScore = hypo->GetFutureScore();
|
| 140 |
+
ret = hypo;
|
| 141 |
+
}
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
return ret;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
//! call CleanupArcList() in each hypo collection in this cell
|
| 149 |
+
void ChartCell::CleanupArcList()
|
| 150 |
+
{
|
| 151 |
+
// only necessary if n-best calculations are enabled
|
| 152 |
+
if (!m_nBestIsEnabled) return;
|
| 153 |
+
|
| 154 |
+
MapType::iterator iter;
|
| 155 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 156 |
+
ChartHypothesisCollection &coll = iter->second;
|
| 157 |
+
coll.CleanupArcList();
|
| 158 |
+
}
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
//! debug info - size of each hypo collection in this cell
|
| 162 |
+
void ChartCell::OutputSizes(std::ostream &out) const
|
| 163 |
+
{
|
| 164 |
+
MapType::const_iterator iter;
|
| 165 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 166 |
+
const Word &targetLHS = iter->first;
|
| 167 |
+
const ChartHypothesisCollection &coll = iter->second;
|
| 168 |
+
|
| 169 |
+
out << targetLHS << "=" << coll.GetSize() << " ";
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
//! debug info - total number of hypos in all hypo collection in this cell
|
| 174 |
+
size_t ChartCell::GetSize() const
|
| 175 |
+
{
|
| 176 |
+
size_t ret = 0;
|
| 177 |
+
MapType::const_iterator iter;
|
| 178 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 179 |
+
const ChartHypothesisCollection &coll = iter->second;
|
| 180 |
+
|
| 181 |
+
ret += coll.GetSize();
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
return ret;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
const HypoList *ChartCell::GetAllSortedHypotheses() const
|
| 188 |
+
{
|
| 189 |
+
HypoList *ret = new HypoList();
|
| 190 |
+
|
| 191 |
+
MapType::const_iterator iter;
|
| 192 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 193 |
+
const ChartHypothesisCollection &coll = iter->second;
|
| 194 |
+
const HypoList &list = coll.GetSortedHypotheses();
|
| 195 |
+
std::copy(list.begin(), list.end(), std::inserter(*ret, ret->end()));
|
| 196 |
+
}
|
| 197 |
+
return ret;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
//! call WriteSearchGraph() for each hypo collection
|
| 201 |
+
void ChartCell::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
|
| 202 |
+
{
|
| 203 |
+
MapType::const_iterator iterOutside;
|
| 204 |
+
for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
|
| 205 |
+
const ChartHypothesisCollection &coll = iterOutside->second;
|
| 206 |
+
coll.WriteSearchGraph(writer, reachable);
|
| 207 |
+
}
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
|
| 211 |
+
{
|
| 212 |
+
ChartCell::MapType::const_iterator iterOutside;
|
| 213 |
+
for (iterOutside = cell.m_hypoColl.begin(); iterOutside != cell.m_hypoColl.end(); ++iterOutside) {
|
| 214 |
+
const Word &targetLHS = iterOutside->first;
|
| 215 |
+
cerr << targetLHS << ":" << endl;
|
| 216 |
+
|
| 217 |
+
const ChartHypothesisCollection &coll = iterOutside->second;
|
| 218 |
+
cerr << coll;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
/*
|
| 222 |
+
ChartCell::HCType::const_iterator iter;
|
| 223 |
+
for (iter = cell.m_hypos.begin(); iter != cell.m_hypos.end(); ++iter)
|
| 224 |
+
{
|
| 225 |
+
const ChartHypothesis &hypo = **iter;
|
| 226 |
+
out << hypo << endl;
|
| 227 |
+
}
|
| 228 |
+
*/
|
| 229 |
+
|
| 230 |
+
return out;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
} // namespace
|
mosesdecoder/moses/ChartCell.h
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include <queue>
|
| 26 |
+
#include <map>
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include "Word.h"
|
| 29 |
+
#include "Range.h"
|
| 30 |
+
#include "NonTerminal.h"
|
| 31 |
+
#include "ChartHypothesis.h"
|
| 32 |
+
#include "ChartHypothesisCollection.h"
|
| 33 |
+
#include "RuleCube.h"
|
| 34 |
+
#include "ChartCellLabelSet.h"
|
| 35 |
+
|
| 36 |
+
#include <boost/scoped_ptr.hpp>
|
| 37 |
+
#include <boost/functional/hash.hpp>
|
| 38 |
+
#include <boost/unordered_map.hpp>
|
| 39 |
+
#include <boost/version.hpp>
|
| 40 |
+
|
| 41 |
+
namespace Moses
|
| 42 |
+
{
|
| 43 |
+
class ChartSearchGraphWriter;
|
| 44 |
+
class ChartTranslationOptionList;
|
| 45 |
+
class ChartCellCollection;
|
| 46 |
+
class ChartManager;
|
| 47 |
+
|
| 48 |
+
class ChartCellBase
|
| 49 |
+
{
|
| 50 |
+
public:
|
| 51 |
+
ChartCellBase(size_t startPos, size_t endPos);
|
| 52 |
+
|
| 53 |
+
virtual ~ChartCellBase();
|
| 54 |
+
|
| 55 |
+
const ChartCellLabelSet &GetTargetLabelSet() const {
|
| 56 |
+
return m_targetLabelSet;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
ChartCellLabelSet &MutableTargetLabelSet() {
|
| 60 |
+
return m_targetLabelSet;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
const Range &GetCoverage() const {
|
| 64 |
+
return m_coverage;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
protected:
|
| 68 |
+
const Range m_coverage;
|
| 69 |
+
ChartCellLabelSet m_targetLabelSet;
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
/** 1 cell in chart decoder.
|
| 73 |
+
* Doesn't directly hold hypotheses. Each cell contain a map of ChartHypothesisCollection that have different constituent labels
|
| 74 |
+
*/
|
| 75 |
+
class ChartCell : public ChartCellBase
|
| 76 |
+
{
|
| 77 |
+
friend std::ostream& operator<<(std::ostream&, const ChartCell&);
|
| 78 |
+
public:
|
| 79 |
+
#if defined(BOOST_VERSION) && (BOOST_VERSION >= 104200)
|
| 80 |
+
typedef boost::unordered_map<Word,
|
| 81 |
+
ChartHypothesisCollection,
|
| 82 |
+
NonTerminalHasher,
|
| 83 |
+
NonTerminalEqualityPred
|
| 84 |
+
> MapType;
|
| 85 |
+
#else
|
| 86 |
+
typedef std::map<Word, ChartHypothesisCollection> MapType;
|
| 87 |
+
#endif
|
| 88 |
+
|
| 89 |
+
protected:
|
| 90 |
+
MapType m_hypoColl;
|
| 91 |
+
|
| 92 |
+
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
|
| 93 |
+
ChartManager &m_manager;
|
| 94 |
+
|
| 95 |
+
public:
|
| 96 |
+
ChartCell(size_t startPos, size_t endPos, ChartManager &manager);
|
| 97 |
+
~ChartCell();
|
| 98 |
+
|
| 99 |
+
void Decode(const ChartTranslationOptionList &transOptList
|
| 100 |
+
,const ChartCellCollection &allChartCells);
|
| 101 |
+
|
| 102 |
+
//! Get all hypotheses in the cell that have the specified constituent label
|
| 103 |
+
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {
|
| 104 |
+
MapType::const_iterator p = m_hypoColl.find(constituentLabel);
|
| 105 |
+
return (p == m_hypoColl.end()) ? NULL : &(p->second.GetSortedHypotheses());
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
//! for n-best list
|
| 109 |
+
const HypoList *GetAllSortedHypotheses() const;
|
| 110 |
+
|
| 111 |
+
bool AddHypothesis(ChartHypothesis *hypo);
|
| 112 |
+
|
| 113 |
+
void SortHypotheses();
|
| 114 |
+
void PruneToSize();
|
| 115 |
+
|
| 116 |
+
const ChartHypothesis *GetBestHypothesis() const;
|
| 117 |
+
|
| 118 |
+
void CleanupArcList();
|
| 119 |
+
|
| 120 |
+
void OutputSizes(std::ostream &out) const;
|
| 121 |
+
size_t GetSize() const;
|
| 122 |
+
|
| 123 |
+
void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
|
| 124 |
+
|
| 125 |
+
};
|
| 126 |
+
|
| 127 |
+
}
|
| 128 |
+
|
mosesdecoder/moses/ChartCellCollection.cpp
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "ChartCellCollection.h"
|
| 23 |
+
#include "InputType.h"
|
| 24 |
+
#include "Range.h"
|
| 25 |
+
#include "ChartManager.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
|
| 30 |
+
ChartCellCollectionBase::~ChartCellCollectionBase()
|
| 31 |
+
{
|
| 32 |
+
m_source.clear();
|
| 33 |
+
for (std::vector<std::vector<ChartCellBase*> >::iterator i = m_cells.begin(); i != m_cells.end(); ++i)
|
| 34 |
+
RemoveAllInColl(*i);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
class CubeCellFactory
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
explicit CubeCellFactory(ChartManager &manager) : m_manager(manager) {}
|
| 41 |
+
|
| 42 |
+
ChartCell *operator()(size_t start, size_t end) const {
|
| 43 |
+
return new ChartCell(start, end, m_manager);
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
private:
|
| 47 |
+
ChartManager &m_manager;
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
/** Costructor
|
| 51 |
+
\param input the input sentence
|
| 52 |
+
\param manager reference back to the manager
|
| 53 |
+
*/
|
| 54 |
+
ChartCellCollection::ChartCellCollection(const InputType &input, ChartManager &manager)
|
| 55 |
+
:ChartCellCollectionBase(input, CubeCellFactory(manager), manager.GetParser()) {}
|
| 56 |
+
|
| 57 |
+
} // namespace
|
| 58 |
+
|
mosesdecoder/moses/ChartCellCollection.h
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
#pragma once
|
| 22 |
+
|
| 23 |
+
#include <boost/ptr_container/ptr_vector.hpp>
|
| 24 |
+
#include "InputType.h"
|
| 25 |
+
#include "ChartCell.h"
|
| 26 |
+
#include "Range.h"
|
| 27 |
+
#include "InputPath.h"
|
| 28 |
+
|
| 29 |
+
namespace Moses
|
| 30 |
+
{
|
| 31 |
+
class InputType;
|
| 32 |
+
class ChartManager;
|
| 33 |
+
class ChartParser;
|
| 34 |
+
|
| 35 |
+
class ChartCellCollectionBase
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
template <class Factory> ChartCellCollectionBase(const InputType &input,
|
| 39 |
+
const Factory &factory,
|
| 40 |
+
const ChartParser &parser)
|
| 41 |
+
:m_cells(input.GetSize()) {
|
| 42 |
+
|
| 43 |
+
size_t size = input.GetSize();
|
| 44 |
+
for (size_t startPos = 0; startPos < size; ++startPos) {
|
| 45 |
+
std::vector<ChartCellBase*> &inner = m_cells[startPos];
|
| 46 |
+
inner.reserve(size - startPos);
|
| 47 |
+
for (size_t endPos = startPos; endPos < size; ++endPos) {
|
| 48 |
+
inner.push_back(factory(startPos, endPos));
|
| 49 |
+
}
|
| 50 |
+
/* Hack: ChartCellLabel shouldn't need to know its span, but the parser
|
| 51 |
+
* gets it from there :-(. The span is actually stored as a reference,
|
| 52 |
+
* which needs to point somewhere, so I have it refer to the ChartCell.
|
| 53 |
+
*/
|
| 54 |
+
const Range &range = inner[0]->GetCoverage();
|
| 55 |
+
|
| 56 |
+
m_source.push_back(new ChartCellLabel(range, input.GetWord(startPos)));
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
virtual ~ChartCellCollectionBase();
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
const ChartCellBase &GetBase(const Range &coverage) const {
|
| 64 |
+
return *m_cells[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()];
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
ChartCellBase &MutableBase(const Range &coverage) {
|
| 68 |
+
return *m_cells[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()];
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
const ChartCellLabel &GetSourceWordLabel(size_t at) const {
|
| 73 |
+
return m_source[at];
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
private:
|
| 77 |
+
std::vector<std::vector<ChartCellBase*> > m_cells;
|
| 78 |
+
|
| 79 |
+
boost::ptr_vector<ChartCellLabel> m_source;
|
| 80 |
+
|
| 81 |
+
};
|
| 82 |
+
|
| 83 |
+
/** Hold all the chart cells for 1 input sentence. A variable of this type is held by the ChartManager
|
| 84 |
+
*/
|
| 85 |
+
class ChartCellCollection : public ChartCellCollectionBase
|
| 86 |
+
{
|
| 87 |
+
public:
|
| 88 |
+
ChartCellCollection(const InputType &input, ChartManager &manager);
|
| 89 |
+
|
| 90 |
+
//! get a chart cell for a particular range
|
| 91 |
+
ChartCell &Get(const Range &coverage) {
|
| 92 |
+
return static_cast<ChartCell&>(MutableBase(coverage));
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
//! get a chart cell for a particular range
|
| 96 |
+
const ChartCell &Get(const Range &coverage) const {
|
| 97 |
+
return static_cast<const ChartCell&>(GetBase(coverage));
|
| 98 |
+
}
|
| 99 |
+
};
|
| 100 |
+
|
| 101 |
+
}
|
| 102 |
+
|
mosesdecoder/moses/ChartCellLabel.h
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "HypoList.h"
|
| 23 |
+
#include "Word.h"
|
| 24 |
+
#include "Range.h"
|
| 25 |
+
#include "ChartParserCallback.h"
|
| 26 |
+
|
| 27 |
+
namespace search
|
| 28 |
+
{
|
| 29 |
+
class Vertex;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
class Word;
|
| 36 |
+
|
| 37 |
+
/** Contains a range, word (non-terms?) and a vector of hypotheses.
|
| 38 |
+
* @todo This is probably incompatible with lattice decoding when the word that spans
|
| 39 |
+
* a position (or positions) can vary.
|
| 40 |
+
* @todo is this to hold sorted hypotheses that are in the queue for creating the next hypos?
|
| 41 |
+
*/
|
| 42 |
+
class ChartCellLabel
|
| 43 |
+
{
|
| 44 |
+
public:
|
| 45 |
+
union Stack {
|
| 46 |
+
const HypoList *cube; // cube pruning
|
| 47 |
+
search::Vertex *incr; // incremental search after filling.
|
| 48 |
+
void *incr_generator; // incremental search during filling.
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
ChartCellLabel(const Range &coverage, const Word &label,
|
| 53 |
+
Stack stack=Stack())
|
| 54 |
+
: m_coverage(coverage)
|
| 55 |
+
, m_label(label)
|
| 56 |
+
, m_stack(stack)
|
| 57 |
+
, m_bestScore(0) {
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const Range &GetCoverage() const {
|
| 61 |
+
return m_coverage;
|
| 62 |
+
}
|
| 63 |
+
const Word &GetLabel() const {
|
| 64 |
+
return m_label;
|
| 65 |
+
}
|
| 66 |
+
Stack GetStack() const {
|
| 67 |
+
return m_stack;
|
| 68 |
+
}
|
| 69 |
+
Stack &MutableStack() {
|
| 70 |
+
return m_stack;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
//caching of best score on stack
|
| 74 |
+
float GetBestScore(const ChartParserCallback *outColl) const {
|
| 75 |
+
if (m_bestScore == 0) {
|
| 76 |
+
m_bestScore = outColl->GetBestScore(this);
|
| 77 |
+
}
|
| 78 |
+
return m_bestScore;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
private:
|
| 82 |
+
const Range &m_coverage;
|
| 83 |
+
const Word &m_label;
|
| 84 |
+
//const InputPath &m_inputPath;
|
| 85 |
+
Stack m_stack;
|
| 86 |
+
mutable float m_bestScore;
|
| 87 |
+
};
|
| 88 |
+
|
| 89 |
+
}
|
mosesdecoder/moses/ChartCellLabelSet.h
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "ChartCellLabel.h"
|
| 23 |
+
#include "NonTerminal.h"
|
| 24 |
+
#include "moses/FactorCollection.h"
|
| 25 |
+
|
| 26 |
+
#include <boost/functional/hash.hpp>
|
| 27 |
+
#include <boost/unordered_map.hpp>
|
| 28 |
+
#include <boost/version.hpp>
|
| 29 |
+
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
class ChartHypothesisCollection;
|
| 34 |
+
|
| 35 |
+
/** @todo I have no idea what's in here
|
| 36 |
+
*/
|
| 37 |
+
class ChartCellLabelSet
|
| 38 |
+
{
|
| 39 |
+
private:
|
| 40 |
+
|
| 41 |
+
typedef std::vector<ChartCellLabel*> MapType;
|
| 42 |
+
|
| 43 |
+
public:
|
| 44 |
+
typedef MapType::const_iterator const_iterator;
|
| 45 |
+
typedef MapType::iterator iterator;
|
| 46 |
+
|
| 47 |
+
ChartCellLabelSet(const Range &coverage)
|
| 48 |
+
: m_coverage(coverage)
|
| 49 |
+
, m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
|
| 50 |
+
, m_size(0) { }
|
| 51 |
+
|
| 52 |
+
~ChartCellLabelSet() {
|
| 53 |
+
RemoveAllInColl(m_map);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// TODO: skip empty elements when iterating, or deprecate this
|
| 57 |
+
const_iterator begin() const {
|
| 58 |
+
return m_map.begin();
|
| 59 |
+
}
|
| 60 |
+
const_iterator end() const {
|
| 61 |
+
return m_map.end();
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
iterator mutable_begin() {
|
| 65 |
+
return m_map.begin();
|
| 66 |
+
}
|
| 67 |
+
iterator mutable_end() {
|
| 68 |
+
return m_map.end();
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
void AddWord(const Word &w) {
|
| 72 |
+
size_t idx = w[0]->GetId();
|
| 73 |
+
if (! ChartCellExists(idx)) {
|
| 74 |
+
m_size++;
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
m_map[idx] = new ChartCellLabel(m_coverage, w);
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
// Stack is a HypoList or whatever the search algorithm uses.
|
| 82 |
+
void AddConstituent(const Word &w, const HypoList *stack) {
|
| 83 |
+
size_t idx = w[0]->GetId();
|
| 84 |
+
if (ChartCellExists(idx)) {
|
| 85 |
+
ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
|
| 86 |
+
s.cube = stack;
|
| 87 |
+
} else {
|
| 88 |
+
ChartCellLabel::Stack s;
|
| 89 |
+
s.cube = stack;
|
| 90 |
+
m_size++;
|
| 91 |
+
m_map[idx] = new ChartCellLabel(m_coverage, w, s);
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// grow vector if necessary
|
| 96 |
+
bool ChartCellExists(size_t idx) {
|
| 97 |
+
try {
|
| 98 |
+
if (m_map.at(idx) != NULL) {
|
| 99 |
+
return true;
|
| 100 |
+
}
|
| 101 |
+
} catch (const std::out_of_range& oor) {
|
| 102 |
+
m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
|
| 103 |
+
}
|
| 104 |
+
return false;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
bool Empty() const {
|
| 108 |
+
return m_size == 0;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
size_t GetSize() const {
|
| 112 |
+
return m_size;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
const ChartCellLabel *Find(const Word &w) const {
|
| 116 |
+
size_t idx = w[0]->GetId();
|
| 117 |
+
try {
|
| 118 |
+
return m_map.at(idx);
|
| 119 |
+
} catch (const std::out_of_range& oor) {
|
| 120 |
+
return NULL;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
const ChartCellLabel *Find(size_t idx) const {
|
| 125 |
+
try {
|
| 126 |
+
return m_map.at(idx);
|
| 127 |
+
} catch (const std::out_of_range& oor) {
|
| 128 |
+
return NULL;
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
ChartCellLabel::Stack &FindOrInsert(const Word &w) {
|
| 133 |
+
size_t idx = w[0]->GetId();
|
| 134 |
+
if (! ChartCellExists(idx)) {
|
| 135 |
+
m_size++;
|
| 136 |
+
m_map[idx] = new ChartCellLabel(m_coverage, w);
|
| 137 |
+
}
|
| 138 |
+
return m_map[idx]->MutableStack();
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
private:
|
| 142 |
+
const Range &m_coverage;
|
| 143 |
+
MapType m_map;
|
| 144 |
+
size_t m_size;
|
| 145 |
+
};
|
| 146 |
+
|
| 147 |
+
}
|
mosesdecoder/moses/ChartHypothesis.cpp
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// vim:tabstop=2
|
| 2 |
+
/***********************************************************************
|
| 3 |
+
Moses - factored phrase-based language decoder
|
| 4 |
+
Copyright (C) 2010 Hieu Hoang
|
| 5 |
+
|
| 6 |
+
This library is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU Lesser General Public
|
| 8 |
+
License as published by the Free Software Foundation; either
|
| 9 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This library is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 14 |
+
Lesser General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU Lesser General Public
|
| 17 |
+
License along with this library; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 19 |
+
***********************************************************************/
|
| 20 |
+
|
| 21 |
+
#include <algorithm>
|
| 22 |
+
#include <vector>
|
| 23 |
+
#include "ChartHypothesis.h"
|
| 24 |
+
#include "RuleCubeItem.h"
|
| 25 |
+
#include "ChartCell.h"
|
| 26 |
+
#include "ChartManager.h"
|
| 27 |
+
#include "TargetPhrase.h"
|
| 28 |
+
#include "Phrase.h"
|
| 29 |
+
#include "StaticData.h"
|
| 30 |
+
#include "ChartTranslationOptions.h"
|
| 31 |
+
#include "moses/FF/FFState.h"
|
| 32 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 33 |
+
#include "moses/FF/StatelessFeatureFunction.h"
|
| 34 |
+
|
| 35 |
+
using namespace std;
|
| 36 |
+
|
| 37 |
+
namespace Moses
|
| 38 |
+
{
|
| 39 |
+
|
| 40 |
+
/** Create a hypothesis from a rule
|
| 41 |
+
* \param transOpt wrapper around the rule
|
| 42 |
+
* \param item @todo dunno
|
| 43 |
+
* \param manager reference back to manager
|
| 44 |
+
*/
|
| 45 |
+
ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
|
| 46 |
+
const RuleCubeItem &item,
|
| 47 |
+
ChartManager &manager)
|
| 48 |
+
:m_transOpt(item.GetTranslationDimension().GetTranslationOption())
|
| 49 |
+
,m_currSourceWordsRange(transOpt.GetSourceWordsRange())
|
| 50 |
+
,m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
|
| 51 |
+
,m_arcList(NULL)
|
| 52 |
+
,m_winningHypo(NULL)
|
| 53 |
+
,m_manager(manager)
|
| 54 |
+
,m_id(manager.GetNextHypoId())
|
| 55 |
+
{
|
| 56 |
+
// underlying hypotheses for sub-spans
|
| 57 |
+
const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
|
| 58 |
+
m_prevHypos.reserve(childEntries.size());
|
| 59 |
+
std::vector<HypothesisDimension>::const_iterator iter;
|
| 60 |
+
for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
|
| 61 |
+
m_prevHypos.push_back(iter->GetHypothesis());
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
// Intended to be used by ChartKBestExtractor only. This creates a mock
|
| 66 |
+
// ChartHypothesis for use by the extractor's top-level target vertex.
|
| 67 |
+
ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
|
| 68 |
+
const ChartKBestExtractor & /*unused*/)
|
| 69 |
+
:m_currSourceWordsRange(pred.m_currSourceWordsRange)
|
| 70 |
+
,m_totalScore(pred.m_totalScore)
|
| 71 |
+
,m_arcList(NULL)
|
| 72 |
+
,m_winningHypo(NULL)
|
| 73 |
+
,m_manager(pred.m_manager)
|
| 74 |
+
,m_id(pred.m_manager.GetNextHypoId())
|
| 75 |
+
{
|
| 76 |
+
// One predecessor, which is an existing top-level ChartHypothesis.
|
| 77 |
+
m_prevHypos.push_back(&pred);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
ChartHypothesis::~ChartHypothesis()
|
| 81 |
+
{
|
| 82 |
+
// delete feature function states
|
| 83 |
+
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
|
| 84 |
+
delete m_ffStates[i];
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
// delete hypotheses that are not in the chart (recombined away)
|
| 88 |
+
if (m_arcList) {
|
| 89 |
+
ChartArcList::iterator iter;
|
| 90 |
+
for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
|
| 91 |
+
ChartHypothesis *hypo = *iter;
|
| 92 |
+
delete hypo;
|
| 93 |
+
}
|
| 94 |
+
m_arcList->clear();
|
| 95 |
+
|
| 96 |
+
delete m_arcList;
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
/** Create full output phrase that is contained in the hypothesis (and its children)
|
| 101 |
+
* \param outPhrase full output phrase as return argument
|
| 102 |
+
*/
|
| 103 |
+
void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
|
| 104 |
+
{
|
| 105 |
+
FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor;
|
| 106 |
+
|
| 107 |
+
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
|
| 108 |
+
const Word &word = GetCurrTargetPhrase().GetWord(pos);
|
| 109 |
+
if (word.IsNonTerminal()) {
|
| 110 |
+
// non-term. fill out with prev hypo
|
| 111 |
+
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
| 112 |
+
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
|
| 113 |
+
prevHypo->GetOutputPhrase(outPhrase);
|
| 114 |
+
} else {
|
| 115 |
+
outPhrase.AddWord(word);
|
| 116 |
+
|
| 117 |
+
if (placeholderFactor != NOT_FOUND) {
|
| 118 |
+
std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos);
|
| 119 |
+
if (sourcePosSet.size() == 1) {
|
| 120 |
+
const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
|
| 121 |
+
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
| 122 |
+
"No source rule");
|
| 123 |
+
|
| 124 |
+
size_t sourcePos = *sourcePosSet.begin();
|
| 125 |
+
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
|
| 126 |
+
UTIL_THROW_IF2(sourceWord == NULL,
|
| 127 |
+
"No source word");
|
| 128 |
+
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
| 129 |
+
if (factor) {
|
| 130 |
+
outPhrase.Back()[0] = factor;
|
| 131 |
+
}
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
/** Return full output phrase */
|
| 140 |
+
Phrase ChartHypothesis::GetOutputPhrase() const
|
| 141 |
+
{
|
| 142 |
+
Phrase outPhrase(ARRAY_SIZE_INCR);
|
| 143 |
+
GetOutputPhrase(outPhrase);
|
| 144 |
+
return outPhrase;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
/** TODO: this method isn't used anywhere. Remove? */
|
| 148 |
+
void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const
|
| 149 |
+
{
|
| 150 |
+
const TargetPhrase &tp = GetCurrTargetPhrase();
|
| 151 |
+
|
| 152 |
+
size_t targetSize = tp.GetSize();
|
| 153 |
+
for (size_t i = 0; i < targetSize; ++i) {
|
| 154 |
+
size_t pos;
|
| 155 |
+
if (leftRightMost == 1) {
|
| 156 |
+
pos = i;
|
| 157 |
+
} else if (leftRightMost == 2) {
|
| 158 |
+
pos = targetSize - i - 1;
|
| 159 |
+
} else {
|
| 160 |
+
abort();
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
const Word &word = tp.GetWord(pos);
|
| 164 |
+
|
| 165 |
+
if (word.IsNonTerminal()) {
|
| 166 |
+
// non-term. fill out with prev hypo
|
| 167 |
+
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
| 168 |
+
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
|
| 169 |
+
prevHypo->GetOutputPhrase(outPhrase);
|
| 170 |
+
} else {
|
| 171 |
+
outPhrase.AddWord(word);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
if (outPhrase.GetSize() >= numWords) {
|
| 175 |
+
return;
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
/** calculate total score */
|
| 181 |
+
void ChartHypothesis::EvaluateWhenApplied()
|
| 182 |
+
{
|
| 183 |
+
const StaticData &staticData = StaticData::Instance();
|
| 184 |
+
|
| 185 |
+
// compute values of stateless feature functions that were not
|
| 186 |
+
// cached in the translation option-- there is no principled distinction
|
| 187 |
+
const std::vector<const StatelessFeatureFunction*>& sfs =
|
| 188 |
+
StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
| 189 |
+
for (unsigned i = 0; i < sfs.size(); ++i) {
|
| 190 |
+
if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
|
| 191 |
+
sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
|
| 192 |
+
}
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
const std::vector<const StatefulFeatureFunction*>& ffs =
|
| 196 |
+
StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 197 |
+
for (unsigned i = 0; i < ffs.size(); ++i) {
|
| 198 |
+
if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
|
| 199 |
+
m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
// total score from current translation rule
|
| 204 |
+
m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
|
| 205 |
+
m_totalScore += m_currScoreBreakdown.GetWeightedScore();
|
| 206 |
+
|
| 207 |
+
// total scores from prev hypos
|
| 208 |
+
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
|
| 209 |
+
const ChartHypothesis &prevHypo = **iter;
|
| 210 |
+
m_totalScore += prevHypo.GetFutureScore();
|
| 211 |
+
}
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
|
| 215 |
+
{
|
| 216 |
+
if (!m_arcList) {
|
| 217 |
+
if (loserHypo->m_arcList) {
|
| 218 |
+
// we don't have an arcList, but loser does
|
| 219 |
+
this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
|
| 220 |
+
loserHypo->m_arcList = 0; // prevent a double deletion
|
| 221 |
+
} else {
|
| 222 |
+
this->m_arcList = new ChartArcList();
|
| 223 |
+
}
|
| 224 |
+
} else {
|
| 225 |
+
if (loserHypo->m_arcList) {
|
| 226 |
+
// both have an arc list: merge. delete loser
|
| 227 |
+
size_t my_size = m_arcList->size();
|
| 228 |
+
size_t add_size = loserHypo->m_arcList->size();
|
| 229 |
+
this->m_arcList->resize(my_size + add_size, 0);
|
| 230 |
+
std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
|
| 231 |
+
delete loserHypo->m_arcList;
|
| 232 |
+
loserHypo->m_arcList = 0;
|
| 233 |
+
} else {
|
| 234 |
+
// loserHypo doesn't have any arcs
|
| 235 |
+
// DO NOTHING
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
m_arcList->push_back(loserHypo);
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
// sorting helper
|
| 242 |
+
struct CompareChartHypothesisTotalScore {
|
| 243 |
+
bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
|
| 244 |
+
return hypo1->GetFutureScore() > hypo2->GetFutureScore();
|
| 245 |
+
}
|
| 246 |
+
};
|
| 247 |
+
|
| 248 |
+
void ChartHypothesis::CleanupArcList()
|
| 249 |
+
{
|
| 250 |
+
// point this hypo's main hypo to itself
|
| 251 |
+
m_winningHypo = this;
|
| 252 |
+
|
| 253 |
+
if (!m_arcList) return;
|
| 254 |
+
|
| 255 |
+
/* keep only number of arcs we need to create all n-best paths.
|
| 256 |
+
* However, may not be enough if only unique candidates are needed,
|
| 257 |
+
* so we'll keep all of arc list if nedd distinct n-best list
|
| 258 |
+
*/
|
| 259 |
+
AllOptions const& opts = *StaticData::Instance().options();
|
| 260 |
+
size_t nBestSize = opts.nbest.nbest_size;
|
| 261 |
+
bool distinctNBest = (opts.nbest.only_distinct
|
| 262 |
+
|| opts.mbr.enabled
|
| 263 |
+
|| opts.output.NeedSearchGraph()
|
| 264 |
+
|| !opts.output.SearchGraphHG.empty());
|
| 265 |
+
|
| 266 |
+
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
| 267 |
+
// prune arc list only if there too many arcs
|
| 268 |
+
NTH_ELEMENT4(m_arcList->begin()
|
| 269 |
+
, m_arcList->begin() + nBestSize - 1
|
| 270 |
+
, m_arcList->end()
|
| 271 |
+
, CompareChartHypothesisTotalScore());
|
| 272 |
+
|
| 273 |
+
// delete bad ones
|
| 274 |
+
ChartArcList::iterator iter;
|
| 275 |
+
for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
|
| 276 |
+
ChartHypothesis *arc = *iter;
|
| 277 |
+
delete arc;
|
| 278 |
+
}
|
| 279 |
+
m_arcList->erase(m_arcList->begin() + nBestSize
|
| 280 |
+
, m_arcList->end());
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
// set all arc's main hypo variable to this hypo
|
| 284 |
+
ChartArcList::iterator iter = m_arcList->begin();
|
| 285 |
+
for (; iter != m_arcList->end() ; ++iter) {
|
| 286 |
+
ChartHypothesis *arc = *iter;
|
| 287 |
+
arc->SetWinningHypo(this);
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
//cerr << m_arcList->size() << " ";
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
void ChartHypothesis::SetWinningHypo(const ChartHypothesis *hypo)
|
| 294 |
+
{
|
| 295 |
+
m_winningHypo = hypo;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
size_t ChartHypothesis::hash() const
|
| 299 |
+
{
|
| 300 |
+
size_t seed = 0;
|
| 301 |
+
|
| 302 |
+
// states
|
| 303 |
+
for (size_t i = 0; i < m_ffStates.size(); ++i) {
|
| 304 |
+
const FFState *state = m_ffStates[i];
|
| 305 |
+
size_t hash = state->hash();
|
| 306 |
+
boost::hash_combine(seed, hash);
|
| 307 |
+
}
|
| 308 |
+
return seed;
|
| 309 |
+
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
bool ChartHypothesis::operator==(const ChartHypothesis& other) const
|
| 313 |
+
{
|
| 314 |
+
// states
|
| 315 |
+
for (size_t i = 0; i < m_ffStates.size(); ++i) {
|
| 316 |
+
const FFState &thisState = *m_ffStates[i];
|
| 317 |
+
const FFState &otherState = *other.m_ffStates[i];
|
| 318 |
+
if (thisState != otherState) {
|
| 319 |
+
return false;
|
| 320 |
+
}
|
| 321 |
+
}
|
| 322 |
+
return true;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
TO_STRING_BODY(ChartHypothesis)
|
| 326 |
+
|
| 327 |
+
// friend
|
| 328 |
+
std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
|
| 329 |
+
{
|
| 330 |
+
|
| 331 |
+
out << hypo.GetId();
|
| 332 |
+
|
| 333 |
+
// recombination
|
| 334 |
+
if (hypo.GetWinningHypothesis() != NULL &&
|
| 335 |
+
hypo.GetWinningHypothesis() != &hypo) {
|
| 336 |
+
out << "->" << hypo.GetWinningHypothesis()->GetId();
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
if (hypo.GetManager().options()->output.include_lhs_in_search_graph) {
|
| 340 |
+
out << " " << hypo.GetTargetLHS() << "=>";
|
| 341 |
+
}
|
| 342 |
+
out << " " << hypo.GetCurrTargetPhrase()
|
| 343 |
+
//<< " " << outPhrase
|
| 344 |
+
<< " " << hypo.GetCurrSourceRange();
|
| 345 |
+
|
| 346 |
+
HypoList::const_iterator iter;
|
| 347 |
+
for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) {
|
| 348 |
+
const ChartHypothesis &prevHypo = **iter;
|
| 349 |
+
out << " " << prevHypo.GetId();
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
out << " [total=" << hypo.GetFutureScore() << "]";
|
| 353 |
+
out << " " << hypo.GetScoreBreakdown();
|
| 354 |
+
|
| 355 |
+
//out << endl;
|
| 356 |
+
|
| 357 |
+
return out;
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
}
|
mosesdecoder/moses/ChartHypothesis.h
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// vim:tabstop=2
|
| 2 |
+
/***********************************************************************
|
| 3 |
+
Moses - factored phrase-based language decoder
|
| 4 |
+
Copyright (C) 2010 Hieu Hoang
|
| 5 |
+
|
| 6 |
+
This library is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU Lesser General Public
|
| 8 |
+
License as published by the Free Software Foundation; either
|
| 9 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This library is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 14 |
+
Lesser General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU Lesser General Public
|
| 17 |
+
License along with this library; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 19 |
+
***********************************************************************/
|
| 20 |
+
|
| 21 |
+
#pragma once
|
| 22 |
+
|
| 23 |
+
#include <vector>
|
| 24 |
+
#include <boost/scoped_ptr.hpp>
|
| 25 |
+
#include "Util.h"
|
| 26 |
+
#include "Range.h"
|
| 27 |
+
#include "ScoreComponentCollection.h"
|
| 28 |
+
#include "Phrase.h"
|
| 29 |
+
#include "ChartTranslationOptions.h"
|
| 30 |
+
#include "ObjectPool.h"
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
class ChartKBestExtractor;
|
| 36 |
+
class ChartHypothesis;
|
| 37 |
+
class ChartManager;
|
| 38 |
+
class RuleCubeItem;
|
| 39 |
+
class FFState;
|
| 40 |
+
|
| 41 |
+
typedef std::vector<ChartHypothesis*> ChartArcList;
|
| 42 |
+
|
| 43 |
+
/** a hypothesis in the hierarchical/syntax decoder.
|
| 44 |
+
* Contain a pointer to the current target phrase, a vector of previous hypos, and some scores
|
| 45 |
+
*/
|
| 46 |
+
class ChartHypothesis
|
| 47 |
+
{
|
| 48 |
+
friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
|
| 49 |
+
// friend class ChartKBestExtractor;
|
| 50 |
+
|
| 51 |
+
protected:
|
| 52 |
+
|
| 53 |
+
boost::shared_ptr<ChartTranslationOption> m_transOpt;
|
| 54 |
+
|
| 55 |
+
Range m_currSourceWordsRange;
|
| 56 |
+
std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
|
| 57 |
+
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
|
| 58 |
+
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
|
| 59 |
+
mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
|
| 60 |
+
ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
|
| 61 |
+
,m_lmNGram
|
| 62 |
+
,m_lmPrefix;
|
| 63 |
+
float m_totalScore;
|
| 64 |
+
|
| 65 |
+
ChartArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
|
| 66 |
+
const ChartHypothesis *m_winningHypo;
|
| 67 |
+
|
| 68 |
+
std::vector<const ChartHypothesis*> m_prevHypos; // always sorted by source position?
|
| 69 |
+
|
| 70 |
+
ChartManager& m_manager;
|
| 71 |
+
|
| 72 |
+
unsigned m_id; /* pkoehn wants to log the order in which hypotheses were generated */
|
| 73 |
+
|
| 74 |
+
//! not implemented
|
| 75 |
+
ChartHypothesis();
|
| 76 |
+
|
| 77 |
+
//! not implemented
|
| 78 |
+
ChartHypothesis(const ChartHypothesis ©);
|
| 79 |
+
|
| 80 |
+
public:
|
| 81 |
+
ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
|
| 82 |
+
ChartManager &manager);
|
| 83 |
+
|
| 84 |
+
//! only used by ChartKBestExtractor
|
| 85 |
+
ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
|
| 86 |
+
|
| 87 |
+
~ChartHypothesis();
|
| 88 |
+
|
| 89 |
+
unsigned GetId() const {
|
| 90 |
+
return m_id;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
const ChartTranslationOption &GetTranslationOption() const {
|
| 94 |
+
return *m_transOpt;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
//! Get the rule that created this hypothesis
|
| 98 |
+
const TargetPhrase &GetCurrTargetPhrase() const {
|
| 99 |
+
return m_transOpt->GetPhrase();
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
//! the source range that this hypothesis spans
|
| 103 |
+
const Range &GetCurrSourceRange() const {
|
| 104 |
+
return m_currSourceWordsRange;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
//! the arc list when creating n-best lists
|
| 108 |
+
inline const ChartArcList* GetArcList() const {
|
| 109 |
+
return m_arcList;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
//! the feature function states for a particular feature \param featureID
|
| 113 |
+
inline const FFState* GetFFState( size_t featureID ) const {
|
| 114 |
+
return m_ffStates[ featureID ];
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
//! reference back to the manager
|
| 118 |
+
inline const ChartManager& GetManager() const {
|
| 119 |
+
return m_manager;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
void GetOutputPhrase(Phrase &outPhrase) const;
|
| 123 |
+
Phrase GetOutputPhrase() const;
|
| 124 |
+
|
| 125 |
+
// get leftmost/rightmost words only
|
| 126 |
+
// leftRightMost: 1=left, 2=right
|
| 127 |
+
void GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const;
|
| 128 |
+
|
| 129 |
+
void EvaluateWhenApplied();
|
| 130 |
+
|
| 131 |
+
void AddArc(ChartHypothesis *loserHypo);
|
| 132 |
+
void CleanupArcList();
|
| 133 |
+
void SetWinningHypo(const ChartHypothesis *hypo);
|
| 134 |
+
|
| 135 |
+
//! get the unweighted score for each feature function
|
| 136 |
+
const ScoreComponentCollection &GetScoreBreakdown() const {
|
| 137 |
+
// Note: never call this method before m_currScoreBreakdown is fully computed
|
| 138 |
+
if (!m_scoreBreakdown.get()) {
|
| 139 |
+
m_scoreBreakdown.reset(new ScoreComponentCollection());
|
| 140 |
+
// score breakdown from current translation rule
|
| 141 |
+
if (m_transOpt) {
|
| 142 |
+
m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
|
| 143 |
+
}
|
| 144 |
+
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
| 145 |
+
// score breakdowns from prev hypos
|
| 146 |
+
for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
|
| 147 |
+
const ChartHypothesis &prevHypo = **iter;
|
| 148 |
+
m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
|
| 149 |
+
}
|
| 150 |
+
}
|
| 151 |
+
return *(m_scoreBreakdown.get());
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
//! get the unweighted score delta for each feature function
|
| 155 |
+
const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
|
| 156 |
+
// Note: never call this method before m_currScoreBreakdown is fully computed
|
| 157 |
+
if (!m_deltaScoreBreakdown.get()) {
|
| 158 |
+
m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
|
| 159 |
+
// score breakdown from current translation rule
|
| 160 |
+
if (m_transOpt) {
|
| 161 |
+
m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
|
| 162 |
+
}
|
| 163 |
+
m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
| 164 |
+
// delta: score breakdowns from prev hypos _not_ added
|
| 165 |
+
}
|
| 166 |
+
return *(m_deltaScoreBreakdown.get());
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
//! Get the weighted total score
|
| 170 |
+
float GetFutureScore() const {
|
| 171 |
+
// scores from current translation rule. eg. translation models & word penalty
|
| 172 |
+
return m_totalScore;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
//! vector of previous hypotheses this hypo is built on
|
| 176 |
+
const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
|
| 177 |
+
return m_prevHypos;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
//! get a particular previous hypos
|
| 181 |
+
const ChartHypothesis* GetPrevHypo(size_t pos) const {
|
| 182 |
+
return m_prevHypos[pos];
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
//! get the constituency label that covers this hypo
|
| 186 |
+
const Word &GetTargetLHS() const {
|
| 187 |
+
return GetCurrTargetPhrase().GetTargetLHS();
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
//! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
|
| 191 |
+
const ChartHypothesis* GetWinningHypothesis() const {
|
| 192 |
+
return m_winningHypo;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
// for unordered_set in stack
|
| 196 |
+
size_t hash() const;
|
| 197 |
+
bool operator==(const ChartHypothesis& other) const;
|
| 198 |
+
|
| 199 |
+
TO_STRING();
|
| 200 |
+
|
| 201 |
+
}; // class ChartHypothesis
|
| 202 |
+
|
| 203 |
+
}
|
| 204 |
+
|
mosesdecoder/moses/ChartHypothesisCollection.cpp
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include "StaticData.h"
|
| 24 |
+
#include "ChartHypothesisCollection.h"
|
| 25 |
+
#include "ChartHypothesis.h"
|
| 26 |
+
#include "ChartManager.h"
|
| 27 |
+
#include "HypergraphOutput.h"
|
| 28 |
+
#include "util/exception.hh"
|
| 29 |
+
#include "parameters/AllOptions.h"
|
| 30 |
+
|
| 31 |
+
using namespace std;
|
| 32 |
+
using namespace Moses;
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts)
|
| 38 |
+
{
|
| 39 |
+
// const StaticData &staticData = StaticData::Instance();
|
| 40 |
+
|
| 41 |
+
m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth();
|
| 42 |
+
m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size;
|
| 43 |
+
m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled;
|
| 44 |
+
m_bestScore = -std::numeric_limits<float>::infinity();
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
ChartHypothesisCollection::~ChartHypothesisCollection()
|
| 48 |
+
{
|
| 49 |
+
HCType::iterator iter;
|
| 50 |
+
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
|
| 51 |
+
ChartHypothesis *hypo = *iter;
|
| 52 |
+
delete hypo;
|
| 53 |
+
}
|
| 54 |
+
//RemoveAllInColl(m_hypos);
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/** public function to add hypothesis to this collection.
|
| 58 |
+
* Returns false if equiv hypo exists in collection, otherwise returns true.
|
| 59 |
+
* Takes care of update arc list for n-best list creation.
|
| 60 |
+
* Will delete hypo if it exists - once this function is call don't delete hypothesis.
|
| 61 |
+
* \param hypo hypothesis to add
|
| 62 |
+
* \param manager pointer back to manager
|
| 63 |
+
*/
|
| 64 |
+
bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManager &manager)
|
| 65 |
+
{
|
| 66 |
+
if (hypo->GetFutureScore() == - std::numeric_limits<float>::infinity()) {
|
| 67 |
+
manager.GetSentenceStats().AddDiscarded();
|
| 68 |
+
VERBOSE(3,"discarded, -inf score" << std::endl);
|
| 69 |
+
delete hypo;
|
| 70 |
+
return false;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
if (hypo->GetFutureScore() < m_bestScore + m_beamWidth) {
|
| 74 |
+
// really bad score. don't bother adding hypo into collection
|
| 75 |
+
manager.GetSentenceStats().AddDiscarded();
|
| 76 |
+
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
| 77 |
+
delete hypo;
|
| 78 |
+
return false;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
// over threshold, try to add to collection
|
| 82 |
+
std::pair<HCType::iterator, bool> addRet = Add(hypo, manager);
|
| 83 |
+
|
| 84 |
+
// does it have the same state as an existing hypothesis?
|
| 85 |
+
if (addRet.second) {
|
| 86 |
+
// nothing found. add to collection
|
| 87 |
+
return true;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
// equiv hypo exists, recombine with other hypo
|
| 91 |
+
HCType::iterator &iterExisting = addRet.first;
|
| 92 |
+
ChartHypothesis *hypoExisting = *iterExisting;
|
| 93 |
+
UTIL_THROW_IF2(iterExisting == m_hypos.end(),
|
| 94 |
+
"Adding a hypothesis should have returned a valid iterator");
|
| 95 |
+
|
| 96 |
+
//StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
| 97 |
+
|
| 98 |
+
// found existing hypo with same target ending.
|
| 99 |
+
// keep the best 1
|
| 100 |
+
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
| 101 |
+
// incoming hypo is better than the one we have
|
| 102 |
+
VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
|
| 103 |
+
if (m_nBestIsEnabled) {
|
| 104 |
+
hypo->AddArc(hypoExisting);
|
| 105 |
+
Detach(iterExisting);
|
| 106 |
+
} else {
|
| 107 |
+
Remove(iterExisting);
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
bool added = Add(hypo, manager).second;
|
| 111 |
+
if (!added) {
|
| 112 |
+
iterExisting = m_hypos.find(hypo);
|
| 113 |
+
UTIL_THROW2("Offending hypo = " << **iterExisting);
|
| 114 |
+
}
|
| 115 |
+
return false;
|
| 116 |
+
} else {
|
| 117 |
+
// already storing the best hypo. discard current hypo
|
| 118 |
+
VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
|
| 119 |
+
if (m_nBestIsEnabled) {
|
| 120 |
+
hypoExisting->AddArc(hypo);
|
| 121 |
+
} else {
|
| 122 |
+
delete hypo;
|
| 123 |
+
}
|
| 124 |
+
return false;
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
/** add hypothesis to stack. Prune if necessary.
|
| 129 |
+
* Returns false if equiv hypo exists in collection, otherwise returns true, and the iterator that points to the place where the hypo was added
|
| 130 |
+
* \param hypo hypothesis to add
|
| 131 |
+
* \param manager pointer back to manager
|
| 132 |
+
*/
|
| 133 |
+
pair<ChartHypothesisCollection::HCType::iterator, bool> ChartHypothesisCollection::Add(ChartHypothesis *hypo, ChartManager &manager)
|
| 134 |
+
{
|
| 135 |
+
std::pair<HCType::iterator, bool> ret = m_hypos.insert(hypo);
|
| 136 |
+
if (ret.second) {
|
| 137 |
+
// equiv hypo doesn't exists
|
| 138 |
+
VERBOSE(3,"added hyp to stack");
|
| 139 |
+
|
| 140 |
+
// Update best score, if this hypothesis is new best
|
| 141 |
+
if (hypo->GetFutureScore() > m_bestScore) {
|
| 142 |
+
VERBOSE(3,", best on stack");
|
| 143 |
+
m_bestScore = hypo->GetFutureScore();
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
// Prune only if stack is twice as big as needed (lazy pruning)
|
| 147 |
+
VERBOSE(3,", now size " << m_hypos.size());
|
| 148 |
+
if (m_hypos.size() > 2*m_maxHypoStackSize-1) {
|
| 149 |
+
PruneToSize(manager);
|
| 150 |
+
} else {
|
| 151 |
+
VERBOSE(3,std::endl);
|
| 152 |
+
}
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
return ret;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
/** Remove hypothesis pointed to by iterator but DOES NOT delete the object.
|
| 159 |
+
* \param iter iterator to delete
|
| 160 |
+
*/
|
| 161 |
+
void ChartHypothesisCollection::Detach(const HCType::iterator &iter)
|
| 162 |
+
{
|
| 163 |
+
m_hypos.erase(iter);
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
/** destroy iterator AND hypothesis pointed to by iterator. If in an object pool, takes care of that too
|
| 167 |
+
*/
|
| 168 |
+
void ChartHypothesisCollection::Remove(const HCType::iterator &iter)
|
| 169 |
+
{
|
| 170 |
+
ChartHypothesis *h = *iter;
|
| 171 |
+
Detach(iter);
|
| 172 |
+
delete h;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
/** prune number of hypo to a particular number of hypos, specified by m_maxHypoStackSize, according to score
|
| 176 |
+
* Don't prune of hypos have identical scores on the boundary, so occasionally number of hypo can remain above m_maxHypoStackSize.
|
| 177 |
+
* \param manager reference back to manager. Used for collecting stats
|
| 178 |
+
*/
|
| 179 |
+
void ChartHypothesisCollection::PruneToSize(ChartManager &manager)
|
| 180 |
+
{
|
| 181 |
+
if (m_maxHypoStackSize == 0) return; // no limit
|
| 182 |
+
|
| 183 |
+
if (GetSize() > m_maxHypoStackSize) { // ok, if not over the limit
|
| 184 |
+
priority_queue<float> bestScores;
|
| 185 |
+
|
| 186 |
+
// push all scores to a heap
|
| 187 |
+
// (but never push scores below m_bestScore+m_beamWidth)
|
| 188 |
+
HCType::iterator iter = m_hypos.begin();
|
| 189 |
+
float score = 0;
|
| 190 |
+
while (iter != m_hypos.end()) {
|
| 191 |
+
ChartHypothesis *hypo = *iter;
|
| 192 |
+
score = hypo->GetFutureScore();
|
| 193 |
+
if (score > m_bestScore+m_beamWidth) {
|
| 194 |
+
bestScores.push(score);
|
| 195 |
+
}
|
| 196 |
+
++iter;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
// pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)
|
| 200 |
+
// ensure to never pop beyond heap size
|
| 201 |
+
size_t minNewSizeHeapSize = m_maxHypoStackSize > bestScores.size() ? bestScores.size() : m_maxHypoStackSize;
|
| 202 |
+
for (size_t i = 1 ; i < minNewSizeHeapSize ; i++)
|
| 203 |
+
bestScores.pop();
|
| 204 |
+
|
| 205 |
+
// and remember the threshold
|
| 206 |
+
float scoreThreshold = bestScores.top();
|
| 207 |
+
|
| 208 |
+
// delete all hypos under score threshold
|
| 209 |
+
iter = m_hypos.begin();
|
| 210 |
+
while (iter != m_hypos.end()) {
|
| 211 |
+
ChartHypothesis *hypo = *iter;
|
| 212 |
+
float score = hypo->GetFutureScore();
|
| 213 |
+
if (score < scoreThreshold) {
|
| 214 |
+
HCType::iterator iterRemove = iter++;
|
| 215 |
+
Remove(iterRemove);
|
| 216 |
+
manager.GetSentenceStats().AddPruning();
|
| 217 |
+
} else {
|
| 218 |
+
++iter;
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
VERBOSE(3,", pruned to size " << m_hypos.size() << endl);
|
| 222 |
+
|
| 223 |
+
IFVERBOSE(3) {
|
| 224 |
+
TRACE_ERR("stack now contains: ");
|
| 225 |
+
for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) {
|
| 226 |
+
ChartHypothesis *hypo = *iter;
|
| 227 |
+
TRACE_ERR( hypo->GetId() << " (" << hypo->GetFutureScore() << ") ");
|
| 228 |
+
}
|
| 229 |
+
TRACE_ERR( endl);
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
// desperation pruning
|
| 233 |
+
if (m_hypos.size() > m_maxHypoStackSize * 2) {
|
| 234 |
+
std::vector<ChartHypothesis*> hyposOrdered;
|
| 235 |
+
|
| 236 |
+
// sort hypos
|
| 237 |
+
std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(hyposOrdered, hyposOrdered.end()));
|
| 238 |
+
std::sort(hyposOrdered.begin(), hyposOrdered.end(), ChartHypothesisScoreOrderer());
|
| 239 |
+
|
| 240 |
+
//keep only |size|. delete the rest
|
| 241 |
+
std::vector<ChartHypothesis*>::iterator iter;
|
| 242 |
+
for (iter = hyposOrdered.begin() + (m_maxHypoStackSize * 2); iter != hyposOrdered.end(); ++iter) {
|
| 243 |
+
ChartHypothesis *hypo = *iter;
|
| 244 |
+
HCType::iterator iterFindHypo = m_hypos.find(hypo);
|
| 245 |
+
UTIL_THROW_IF2(iterFindHypo == m_hypos.end(),
|
| 246 |
+
"Adding a hypothesis should have returned a valid iterator");
|
| 247 |
+
|
| 248 |
+
Remove(iterFindHypo);
|
| 249 |
+
}
|
| 250 |
+
}
|
| 251 |
+
}
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
//! sort hypothses by descending score. Put these hypos into a vector m_hyposOrdered to be returned by function GetSortedHypotheses()
|
| 255 |
+
void ChartHypothesisCollection::SortHypotheses()
|
| 256 |
+
{
|
| 257 |
+
UTIL_THROW_IF2(!m_hyposOrdered.empty(), "Hypotheses already sorted");
|
| 258 |
+
if (!m_hypos.empty()) {
|
| 259 |
+
// done everything for this cell.
|
| 260 |
+
// sort
|
| 261 |
+
// put into vec
|
| 262 |
+
m_hyposOrdered.reserve(m_hypos.size());
|
| 263 |
+
std::copy(m_hypos.begin(), m_hypos.end(), back_inserter(m_hyposOrdered));
|
| 264 |
+
std::sort(m_hyposOrdered.begin(), m_hyposOrdered.end(), ChartHypothesisScoreOrderer());
|
| 265 |
+
}
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
//! Call CleanupArcList() for each main hypo in collection
|
| 269 |
+
void ChartHypothesisCollection::CleanupArcList()
|
| 270 |
+
{
|
| 271 |
+
HCType::iterator iter;
|
| 272 |
+
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
|
| 273 |
+
ChartHypothesis *mainHypo = *iter;
|
| 274 |
+
mainHypo->CleanupArcList();
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
/** Return all hypos, and all hypos in the arclist, in order to create the output searchgraph, ie. the hypergraph. The output is the debug hypo information.
|
| 279 |
+
* @todo this is a useful function. Make sure it outputs everything required, especially scores.
|
| 280 |
+
* \param translationId unique, contiguous id for the input sentence
|
| 281 |
+
* \param outputSearchGraphStream stream to output the info to
|
| 282 |
+
* \param reachable @todo don't know
|
| 283 |
+
*/
|
| 284 |
+
void ChartHypothesisCollection::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
|
| 285 |
+
{
|
| 286 |
+
writer.WriteHypos(*this,reachable);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
std::ostream& operator<<(std::ostream &out, const ChartHypothesisCollection &coll)
|
| 290 |
+
{
|
| 291 |
+
HypoList::const_iterator iterInside;
|
| 292 |
+
for (iterInside = coll.m_hyposOrdered.begin(); iterInside != coll.m_hyposOrdered.end(); ++iterInside) {
|
| 293 |
+
const ChartHypothesis &hypo = **iterInside;
|
| 294 |
+
out << hypo << endl;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
return out;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
} // namespace
|
mosesdecoder/moses/ChartHypothesisCollection.h
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
#pragma once
|
| 22 |
+
|
| 23 |
+
#include <set>
|
| 24 |
+
#include "ChartHypothesis.h"
|
| 25 |
+
#include "RuleCube.h"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
class ChartSearchGraphWriter;
|
| 32 |
+
struct AllOptions;
|
| 33 |
+
|
| 34 |
+
//! functor to compare (chart) hypotheses by (descending) score
|
| 35 |
+
class ChartHypothesisScoreOrderer
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const {
|
| 39 |
+
return hypoA->GetFutureScore() > hypoB->GetFutureScore();
|
| 40 |
+
}
|
| 41 |
+
};
|
| 42 |
+
|
| 43 |
+
/** Contains a set of unique hypos that have the same HS non-term.
|
| 44 |
+
* ie. 1 of these for each target LHS in each cell
|
| 45 |
+
*/
|
| 46 |
+
class ChartHypothesisCollection
|
| 47 |
+
{
|
| 48 |
+
friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&);
|
| 49 |
+
|
| 50 |
+
protected:
|
| 51 |
+
//typedef std::set<ChartHypothesis*, ChartHypothesisRecombinationOrderer> HCType;
|
| 52 |
+
typedef boost::unordered_set< ChartHypothesis*, UnorderedComparer<ChartHypothesis>, UnorderedComparer<ChartHypothesis> > HCType;
|
| 53 |
+
HCType m_hypos;
|
| 54 |
+
HypoList m_hyposOrdered;
|
| 55 |
+
|
| 56 |
+
float m_bestScore; /**< score of the best hypothesis in collection */
|
| 57 |
+
float m_beamWidth; /**< minimum score due to threashold pruning */
|
| 58 |
+
size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
|
| 59 |
+
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
|
| 60 |
+
|
| 61 |
+
std::pair<HCType::iterator, bool> Add(ChartHypothesis *hypo, ChartManager &manager);
|
| 62 |
+
|
| 63 |
+
public:
|
| 64 |
+
typedef HCType::iterator iterator;
|
| 65 |
+
typedef HCType::const_iterator const_iterator;
|
| 66 |
+
//! iterators
|
| 67 |
+
const_iterator begin() const {
|
| 68 |
+
return m_hypos.begin();
|
| 69 |
+
}
|
| 70 |
+
const_iterator end() const {
|
| 71 |
+
return m_hypos.end();
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
ChartHypothesisCollection(AllOptions const& opts);
|
| 75 |
+
~ChartHypothesisCollection();
|
| 76 |
+
bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);
|
| 77 |
+
|
| 78 |
+
void Detach(const HCType::iterator &iter);
|
| 79 |
+
void Remove(const HCType::iterator &iter);
|
| 80 |
+
|
| 81 |
+
void PruneToSize(ChartManager &manager);
|
| 82 |
+
|
| 83 |
+
size_t GetSize() const {
|
| 84 |
+
return m_hypos.size();
|
| 85 |
+
}
|
| 86 |
+
size_t GetHypo() const {
|
| 87 |
+
return m_hypos.size();
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
void SortHypotheses();
|
| 91 |
+
void CleanupArcList();
|
| 92 |
+
|
| 93 |
+
//! return vector of hypothesis that has been sorted by score
|
| 94 |
+
const HypoList &GetSortedHypotheses() const {
|
| 95 |
+
return m_hyposOrdered;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
//! return the best total score of all hypos in this collection
|
| 99 |
+
float GetBestScore() const {
|
| 100 |
+
return m_bestScore;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
|
| 104 |
+
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
} // namespace
|
| 108 |
+
|
mosesdecoder/moses/ChartKBestExtractor.cpp
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2014 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "ChartKBestExtractor.h"
|
| 21 |
+
|
| 22 |
+
#include "ChartHypothesis.h"
|
| 23 |
+
#include "ScoreComponentCollection.h"
|
| 24 |
+
#include "StaticData.h"
|
| 25 |
+
|
| 26 |
+
#include <boost/scoped_ptr.hpp>
|
| 27 |
+
|
| 28 |
+
#include <vector>
|
| 29 |
+
|
| 30 |
+
using namespace std;
|
| 31 |
+
|
| 32 |
+
namespace Moses
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
// Extract the k-best list from the search graph.
|
| 36 |
+
void ChartKBestExtractor::Extract(
|
| 37 |
+
const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
|
| 38 |
+
KBestVec &kBestList)
|
| 39 |
+
{
|
| 40 |
+
kBestList.clear();
|
| 41 |
+
if (topLevelHypos.empty()) {
|
| 42 |
+
return;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
// Create a new ChartHypothesis object, supremeHypo, that has the best
|
| 46 |
+
// top-level hypothesis as its predecessor and has the same score.
|
| 47 |
+
std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
|
| 48 |
+
const ChartHypothesis &bestTopLevelHypo = **p;
|
| 49 |
+
boost::scoped_ptr<ChartHypothesis> supremeHypo(
|
| 50 |
+
new ChartHypothesis(bestTopLevelHypo, *this));
|
| 51 |
+
|
| 52 |
+
// Do the same for each alternative top-level hypothesis, but add the new
|
| 53 |
+
// ChartHypothesis objects as arcs from supremeHypo, as if they had been
|
| 54 |
+
// recombined.
|
| 55 |
+
for (++p; p != topLevelHypos.end(); ++p) {
|
| 56 |
+
// Check that the first item in topLevelHypos really was the best.
|
| 57 |
+
UTIL_THROW_IF2((*p)->GetFutureScore() > bestTopLevelHypo.GetFutureScore(),
|
| 58 |
+
"top-level hypotheses are not correctly sorted");
|
| 59 |
+
// Note: there's no need for a smart pointer here: supremeHypo will take
|
| 60 |
+
// ownership of altHypo.
|
| 61 |
+
ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
|
| 62 |
+
supremeHypo->AddArc(altHypo);
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
// Create the target vertex then lazily fill its k-best list.
|
| 66 |
+
boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
|
| 67 |
+
LazyKthBest(*targetVertex, k, k);
|
| 68 |
+
|
| 69 |
+
// Copy the k-best list from the target vertex, but drop the top edge from
|
| 70 |
+
// each derivation.
|
| 71 |
+
kBestList.reserve(targetVertex->kBestList.size());
|
| 72 |
+
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
|
| 73 |
+
q = targetVertex->kBestList.begin();
|
| 74 |
+
q != targetVertex->kBestList.end(); ++q) {
|
| 75 |
+
const boost::shared_ptr<Derivation> d(*q);
|
| 76 |
+
assert(d);
|
| 77 |
+
assert(d->subderivations.size() == 1);
|
| 78 |
+
kBestList.push_back(d->subderivations[0]);
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// Generate the target-side yield of the derivation d.
|
| 83 |
+
Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
|
| 84 |
+
{
|
| 85 |
+
FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor;
|
| 86 |
+
|
| 87 |
+
Phrase ret(ARRAY_SIZE_INCR);
|
| 88 |
+
|
| 89 |
+
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
| 90 |
+
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
| 91 |
+
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
| 92 |
+
phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
| 93 |
+
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
| 94 |
+
const Word &word = phrase.GetWord(pos);
|
| 95 |
+
if (word.IsNonTerminal()) {
|
| 96 |
+
std::size_t nonTermInd = nonTermIndexMap[pos];
|
| 97 |
+
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
| 98 |
+
Phrase subPhrase = GetOutputPhrase(subderivation);
|
| 99 |
+
ret.Append(subPhrase);
|
| 100 |
+
} else {
|
| 101 |
+
ret.AddWord(word);
|
| 102 |
+
if (placeholderFactor == NOT_FOUND) {
|
| 103 |
+
continue;
|
| 104 |
+
}
|
| 105 |
+
std::set<std::size_t> sourcePosSet =
|
| 106 |
+
phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
|
| 107 |
+
if (sourcePosSet.size() == 1) {
|
| 108 |
+
const std::vector<const Word*> *ruleSourceFromInputPath =
|
| 109 |
+
hypo.GetTranslationOption().GetSourceRuleFromInputPath();
|
| 110 |
+
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
| 111 |
+
"Source Words in of the rules hasn't been filled out");
|
| 112 |
+
std::size_t sourcePos = *sourcePosSet.begin();
|
| 113 |
+
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
|
| 114 |
+
UTIL_THROW_IF2(sourceWord == NULL,
|
| 115 |
+
"Null source word at position " << sourcePos);
|
| 116 |
+
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
| 117 |
+
if (factor) {
|
| 118 |
+
ret.Back()[0] = factor;
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
return ret;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
// Generate the score breakdown of the derivation d.
|
| 128 |
+
boost::shared_ptr<ScoreComponentCollection>
|
| 129 |
+
ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
|
| 130 |
+
{
|
| 131 |
+
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
| 132 |
+
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown(new ScoreComponentCollection());
|
| 133 |
+
scoreBreakdown->PlusEquals(hypo.GetDeltaScoreBreakdown());
|
| 134 |
+
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
| 135 |
+
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
|
| 136 |
+
phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
| 137 |
+
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
| 138 |
+
const Word &word = phrase.GetWord(pos);
|
| 139 |
+
if (word.IsNonTerminal()) {
|
| 140 |
+
std::size_t nonTermInd = nonTermIndexMap[pos];
|
| 141 |
+
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
| 142 |
+
scoreBreakdown->PlusEquals(*GetOutputScoreBreakdown(subderivation));
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
return scoreBreakdown;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// Generate the target tree of the derivation d.
|
| 150 |
+
TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
|
| 151 |
+
{
|
| 152 |
+
const ChartHypothesis &hypo = d.edge.head->hypothesis;
|
| 153 |
+
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
|
| 154 |
+
if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
|
| 155 |
+
const std::string *tree = property->GetValueString();
|
| 156 |
+
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
|
| 157 |
+
|
| 158 |
+
//get subtrees (in target order)
|
| 159 |
+
std::vector<TreePointer> previous_trees;
|
| 160 |
+
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
|
| 161 |
+
const Word &word = phrase.GetWord(pos);
|
| 162 |
+
if (word.IsNonTerminal()) {
|
| 163 |
+
size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos];
|
| 164 |
+
const Derivation &subderivation = *d.subderivations[nonTermInd];
|
| 165 |
+
const TreePointer prev_tree = GetOutputTree(subderivation);
|
| 166 |
+
previous_trees.push_back(prev_tree);
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
mytree->Combine(previous_trees);
|
| 171 |
+
mytree->Unbinarize();
|
| 172 |
+
return mytree;
|
| 173 |
+
} else {
|
| 174 |
+
UTIL_THROW2("Error: k-best tree output active, but no internal tree structure found");
|
| 175 |
+
}
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
|
| 179 |
+
ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
|
| 180 |
+
const ChartHypothesis &h)
|
| 181 |
+
{
|
| 182 |
+
UnweightedHyperarc edge;
|
| 183 |
+
edge.head = FindOrCreateVertex(h);
|
| 184 |
+
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
| 185 |
+
edge.tail.resize(prevHypos.size());
|
| 186 |
+
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
| 187 |
+
const ChartHypothesis *prevHypo = prevHypos[i];
|
| 188 |
+
edge.tail[i] = FindOrCreateVertex(*prevHypo);
|
| 189 |
+
}
|
| 190 |
+
return edge;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
// Look for the vertex corresponding to a given ChartHypothesis, creating
|
| 194 |
+
// a new one if necessary.
|
| 195 |
+
boost::shared_ptr<ChartKBestExtractor::Vertex>
|
| 196 |
+
ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
|
| 197 |
+
{
|
| 198 |
+
VertexMap::value_type element(&h, boost::shared_ptr<Vertex>());
|
| 199 |
+
std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element);
|
| 200 |
+
boost::shared_ptr<Vertex> &sp = p.first->second;
|
| 201 |
+
if (!p.second) {
|
| 202 |
+
return sp; // Vertex was already in m_vertexMap.
|
| 203 |
+
}
|
| 204 |
+
sp.reset(new Vertex(h));
|
| 205 |
+
// Create the 1-best derivation and add it to the vertex's kBestList.
|
| 206 |
+
UnweightedHyperarc bestEdge;
|
| 207 |
+
bestEdge.head = sp;
|
| 208 |
+
const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
|
| 209 |
+
bestEdge.tail.resize(prevHypos.size());
|
| 210 |
+
for (std::size_t i = 0; i < prevHypos.size(); ++i) {
|
| 211 |
+
const ChartHypothesis *prevHypo = prevHypos[i];
|
| 212 |
+
bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
|
| 213 |
+
}
|
| 214 |
+
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
|
| 215 |
+
#ifndef NDEBUG
|
| 216 |
+
std::pair<DerivationSet::iterator, bool> q =
|
| 217 |
+
#endif
|
| 218 |
+
m_derivations.insert(bestDerivation);
|
| 219 |
+
assert(q.second);
|
| 220 |
+
sp->kBestList.push_back(bestDerivation);
|
| 221 |
+
return sp;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
// Create the 1-best derivation for each edge in BS(v) (except the best one)
|
| 225 |
+
// and add it to v's candidate queue.
|
| 226 |
+
void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
|
| 227 |
+
{
|
| 228 |
+
// Create derivations for all of v's incoming edges except the best. This
|
| 229 |
+
// means everything in v.hypothesis.GetArcList() and not the edge defined
|
| 230 |
+
// by v.hypothesis itself. The 1-best derivation for that edge will already
|
| 231 |
+
// have been created.
|
| 232 |
+
const ChartArcList *arcList = v.hypothesis.GetArcList();
|
| 233 |
+
if (arcList) {
|
| 234 |
+
for (std::size_t i = 0; i < arcList->size(); ++i) {
|
| 235 |
+
const ChartHypothesis &recombinedHypo = *(*arcList)[i];
|
| 236 |
+
boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
|
| 237 |
+
assert(w->kBestList.size() == 1);
|
| 238 |
+
v.candidates.push(w->kBestList[0]);
|
| 239 |
+
}
|
| 240 |
+
}
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// Lazily fill v's k-best list.
|
| 244 |
+
void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
|
| 245 |
+
std::size_t globalK)
|
| 246 |
+
{
|
| 247 |
+
// If this is the first visit to vertex v then initialize the priority queue.
|
| 248 |
+
if (v.visited == false) {
|
| 249 |
+
// The 1-best derivation should already be in v's k-best list.
|
| 250 |
+
assert(v.kBestList.size() == 1);
|
| 251 |
+
// Initialize v's priority queue.
|
| 252 |
+
GetCandidates(v, globalK);
|
| 253 |
+
v.visited = true;
|
| 254 |
+
}
|
| 255 |
+
// Add derivations to the k-best list until it contains k or there are none
|
| 256 |
+
// left to add.
|
| 257 |
+
while (v.kBestList.size() < k) {
|
| 258 |
+
assert(!v.kBestList.empty());
|
| 259 |
+
// Update the priority queue by adding the successors of the last
|
| 260 |
+
// derivation (unless they've been seen before).
|
| 261 |
+
boost::shared_ptr<Derivation> d(v.kBestList.back());
|
| 262 |
+
LazyNext(v, *d, globalK);
|
| 263 |
+
// Check if there are any derivations left in the queue.
|
| 264 |
+
if (v.candidates.empty()) {
|
| 265 |
+
break;
|
| 266 |
+
}
|
| 267 |
+
// Get the next best derivation and delete it from the queue.
|
| 268 |
+
boost::weak_ptr<Derivation> next = v.candidates.top();
|
| 269 |
+
v.candidates.pop();
|
| 270 |
+
// Add it to the k-best list.
|
| 271 |
+
v.kBestList.push_back(next);
|
| 272 |
+
}
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
// Create the neighbours of Derivation d and add them to v's candidate queue.
|
| 276 |
+
void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
|
| 277 |
+
std::size_t globalK)
|
| 278 |
+
{
|
| 279 |
+
for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
|
| 280 |
+
Vertex &pred = *d.edge.tail[i];
|
| 281 |
+
// Ensure that pred's k-best list contains enough derivations.
|
| 282 |
+
std::size_t k = d.backPointers[i] + 2;
|
| 283 |
+
LazyKthBest(pred, k, globalK);
|
| 284 |
+
if (pred.kBestList.size() < k) {
|
| 285 |
+
// pred's derivations have been exhausted.
|
| 286 |
+
continue;
|
| 287 |
+
}
|
| 288 |
+
// Create the neighbour.
|
| 289 |
+
boost::shared_ptr<Derivation> next(new Derivation(d, i));
|
| 290 |
+
// Check if it has been created before.
|
| 291 |
+
std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
|
| 292 |
+
if (p.second) {
|
| 293 |
+
v.candidates.push(next); // Haven't previously seen it.
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
// Construct the 1-best Derivation that ends at edge e.
|
| 299 |
+
ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
|
| 300 |
+
{
|
| 301 |
+
edge = e;
|
| 302 |
+
std::size_t arity = edge.tail.size();
|
| 303 |
+
backPointers.resize(arity, 0);
|
| 304 |
+
subderivations.reserve(arity);
|
| 305 |
+
for (std::size_t i = 0; i < arity; ++i) {
|
| 306 |
+
const Vertex &pred = *edge.tail[i];
|
| 307 |
+
assert(pred.kBestList.size() >= 1);
|
| 308 |
+
boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
|
| 309 |
+
subderivations.push_back(sub);
|
| 310 |
+
}
|
| 311 |
+
score = edge.head->hypothesis.GetFutureScore();
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
// Construct a Derivation that neighbours an existing Derivation.
|
| 315 |
+
ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
|
| 316 |
+
{
|
| 317 |
+
edge.head = d.edge.head;
|
| 318 |
+
edge.tail = d.edge.tail;
|
| 319 |
+
backPointers = d.backPointers;
|
| 320 |
+
subderivations = d.subderivations;
|
| 321 |
+
std::size_t j = ++backPointers[i];
|
| 322 |
+
score = d.score;
|
| 323 |
+
// Deduct the score of the old subderivation.
|
| 324 |
+
score -= subderivations[i]->score;
|
| 325 |
+
// Update the subderivation pointer.
|
| 326 |
+
boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
|
| 327 |
+
subderivations[i] = newSub;
|
| 328 |
+
// Add the score of the new subderivation.
|
| 329 |
+
score += subderivations[i]->score;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartKBestExtractor.h
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2014 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include <cassert>
|
| 23 |
+
#include "ChartHypothesis.h"
|
| 24 |
+
#include "ScoreComponentCollection.h"
|
| 25 |
+
#include "FF/InternalTree.h"
|
| 26 |
+
|
| 27 |
+
#include <boost/unordered_set.hpp>
|
| 28 |
+
#include <boost/weak_ptr.hpp>
|
| 29 |
+
#include <boost/shared_ptr.hpp>
|
| 30 |
+
|
| 31 |
+
#include <queue>
|
| 32 |
+
#include <vector>
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
// k-best list extractor that implements algorithm 3 from this paper:
|
| 38 |
+
//
|
| 39 |
+
// Liang Huang and David Chiang
|
| 40 |
+
// "Better k-best parsing"
|
| 41 |
+
// In Proceedings of IWPT 2005
|
| 42 |
+
//
|
| 43 |
+
class ChartKBestExtractor
|
| 44 |
+
{
|
| 45 |
+
public:
|
| 46 |
+
struct Vertex;
|
| 47 |
+
|
| 48 |
+
struct UnweightedHyperarc {
|
| 49 |
+
boost::shared_ptr<Vertex> head;
|
| 50 |
+
std::vector<boost::shared_ptr<Vertex> > tail;
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
struct Derivation {
|
| 54 |
+
Derivation(const UnweightedHyperarc &);
|
| 55 |
+
Derivation(const Derivation &, std::size_t);
|
| 56 |
+
|
| 57 |
+
UnweightedHyperarc edge;
|
| 58 |
+
std::vector<std::size_t> backPointers;
|
| 59 |
+
std::vector<boost::shared_ptr<Derivation> > subderivations;
|
| 60 |
+
float score;
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
struct DerivationOrderer {
|
| 64 |
+
bool operator()(const boost::weak_ptr<Derivation> &d1,
|
| 65 |
+
const boost::weak_ptr<Derivation> &d2) const {
|
| 66 |
+
boost::shared_ptr<Derivation> s1(d1);
|
| 67 |
+
boost::shared_ptr<Derivation> s2(d2);
|
| 68 |
+
return s1->score < s2->score;
|
| 69 |
+
}
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
struct Vertex {
|
| 73 |
+
typedef std::priority_queue<boost::weak_ptr<Derivation>,
|
| 74 |
+
std::vector<boost::weak_ptr<Derivation> >,
|
| 75 |
+
DerivationOrderer> DerivationQueue;
|
| 76 |
+
|
| 77 |
+
Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
|
| 78 |
+
|
| 79 |
+
const ChartHypothesis &hypothesis;
|
| 80 |
+
std::vector<boost::weak_ptr<Derivation> > kBestList;
|
| 81 |
+
DerivationQueue candidates;
|
| 82 |
+
bool visited;
|
| 83 |
+
};
|
| 84 |
+
|
| 85 |
+
typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
|
| 86 |
+
|
| 87 |
+
// Extract the k-best list from the search hypergraph given the full, sorted
|
| 88 |
+
// list of top-level vertices.
|
| 89 |
+
void Extract(const std::vector<const ChartHypothesis*> &topHypos,
|
| 90 |
+
std::size_t k, KBestVec &);
|
| 91 |
+
|
| 92 |
+
static Phrase GetOutputPhrase(const Derivation &);
|
| 93 |
+
static boost::shared_ptr<ScoreComponentCollection> GetOutputScoreBreakdown(const Derivation &);
|
| 94 |
+
static TreePointer GetOutputTree(const Derivation &);
|
| 95 |
+
|
| 96 |
+
private:
|
| 97 |
+
typedef boost::unordered_map<const ChartHypothesis *,
|
| 98 |
+
boost::shared_ptr<Vertex> > VertexMap;
|
| 99 |
+
|
| 100 |
+
struct DerivationHasher {
|
| 101 |
+
std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
|
| 102 |
+
std::size_t seed = 0;
|
| 103 |
+
boost::hash_combine(seed, d->edge.head);
|
| 104 |
+
boost::hash_combine(seed, d->edge.tail);
|
| 105 |
+
boost::hash_combine(seed, d->backPointers);
|
| 106 |
+
return seed;
|
| 107 |
+
}
|
| 108 |
+
};
|
| 109 |
+
|
| 110 |
+
struct DerivationEqualityPred {
|
| 111 |
+
bool operator()(const boost::shared_ptr<Derivation> &d1,
|
| 112 |
+
const boost::shared_ptr<Derivation> &d2) const {
|
| 113 |
+
return d1->edge.head == d2->edge.head &&
|
| 114 |
+
d1->edge.tail == d2->edge.tail &&
|
| 115 |
+
d1->backPointers == d2->backPointers;
|
| 116 |
+
}
|
| 117 |
+
};
|
| 118 |
+
|
| 119 |
+
typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
|
| 120 |
+
DerivationEqualityPred> DerivationSet;
|
| 121 |
+
|
| 122 |
+
UnweightedHyperarc CreateEdge(const ChartHypothesis &);
|
| 123 |
+
boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
|
| 124 |
+
void GetCandidates(Vertex &, std::size_t);
|
| 125 |
+
void LazyKthBest(Vertex &, std::size_t, std::size_t);
|
| 126 |
+
void LazyNext(Vertex &, const Derivation &, std::size_t);
|
| 127 |
+
|
| 128 |
+
VertexMap m_vertexMap;
|
| 129 |
+
DerivationSet m_derivations;
|
| 130 |
+
};
|
| 131 |
+
|
| 132 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartManager.cpp
ADDED
|
@@ -0,0 +1,867 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <cstdio>
|
| 23 |
+
#include "ChartManager.h"
|
| 24 |
+
#include "ChartCell.h"
|
| 25 |
+
#include "ChartHypothesis.h"
|
| 26 |
+
#include "ChartKBestExtractor.h"
|
| 27 |
+
#include "ChartTranslationOptions.h"
|
| 28 |
+
#include "HypergraphOutput.h"
|
| 29 |
+
#include "StaticData.h"
|
| 30 |
+
#include "DecodeStep.h"
|
| 31 |
+
#include "TreeInput.h"
|
| 32 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 33 |
+
#include "moses/FF/WordPenaltyProducer.h"
|
| 34 |
+
#include "moses/OutputCollector.h"
|
| 35 |
+
#include "moses/ChartKBestExtractor.h"
|
| 36 |
+
#include "moses/HypergraphOutput.h"
|
| 37 |
+
#include "moses/TranslationTask.h"
|
| 38 |
+
|
| 39 |
+
using namespace std;
|
| 40 |
+
|
| 41 |
+
namespace Moses
|
| 42 |
+
{
|
| 43 |
+
|
| 44 |
+
/* constructor. Initialize everything prior to decoding a particular sentence.
|
| 45 |
+
* \param source the sentence to be decoded
|
| 46 |
+
* \param system which particular set of models to use.
|
| 47 |
+
*/
|
| 48 |
+
ChartManager::ChartManager(ttasksptr const& ttask)
|
| 49 |
+
: BaseManager(ttask)
|
| 50 |
+
, m_hypoStackColl(m_source, *this)
|
| 51 |
+
, m_start(clock())
|
| 52 |
+
, m_hypothesisId(0)
|
| 53 |
+
, m_parser(ttask, m_hypoStackColl)
|
| 54 |
+
, m_translationOptionList(ttask->options()->syntax.rule_limit, m_source)
|
| 55 |
+
{ }
|
| 56 |
+
|
| 57 |
+
ChartManager::~ChartManager()
|
| 58 |
+
{
|
| 59 |
+
clock_t end = clock();
|
| 60 |
+
float et = (end - m_start);
|
| 61 |
+
et /= (float)CLOCKS_PER_SEC;
|
| 62 |
+
VERBOSE(1, "Translation took " << et << " seconds" << endl);
|
| 63 |
+
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
//! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
|
| 67 |
+
void ChartManager::Decode()
|
| 68 |
+
{
|
| 69 |
+
|
| 70 |
+
VERBOSE(1,"Translating: " << m_source << endl);
|
| 71 |
+
|
| 72 |
+
ResetSentenceStats(m_source);
|
| 73 |
+
|
| 74 |
+
VERBOSE(2,"Decoding: " << endl);
|
| 75 |
+
//ChartHypothesis::ResetHypoCount();
|
| 76 |
+
|
| 77 |
+
AddXmlChartOptions();
|
| 78 |
+
|
| 79 |
+
// MAIN LOOP
|
| 80 |
+
size_t size = m_source.GetSize();
|
| 81 |
+
for (int startPos = size-1; startPos >= 0; --startPos) {
|
| 82 |
+
for (size_t width = 1; width <= size-startPos; ++width) {
|
| 83 |
+
size_t endPos = startPos + width - 1;
|
| 84 |
+
Range range(startPos, endPos);
|
| 85 |
+
|
| 86 |
+
// create trans opt
|
| 87 |
+
m_translationOptionList.Clear();
|
| 88 |
+
m_parser.Create(range, m_translationOptionList);
|
| 89 |
+
m_translationOptionList.ApplyThreshold(options()->search.trans_opt_threshold);
|
| 90 |
+
|
| 91 |
+
const InputPath &inputPath = m_parser.GetInputPath(range);
|
| 92 |
+
m_translationOptionList.EvaluateWithSourceContext(m_source, inputPath);
|
| 93 |
+
|
| 94 |
+
// decode
|
| 95 |
+
ChartCell &cell = m_hypoStackColl.Get(range);
|
| 96 |
+
cell.Decode(m_translationOptionList, m_hypoStackColl);
|
| 97 |
+
|
| 98 |
+
m_translationOptionList.Clear();
|
| 99 |
+
cell.PruneToSize();
|
| 100 |
+
cell.CleanupArcList();
|
| 101 |
+
cell.SortHypotheses();
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
IFVERBOSE(1) {
|
| 106 |
+
|
| 107 |
+
for (size_t startPos = 0; startPos < size; ++startPos) {
|
| 108 |
+
cerr.width(3);
|
| 109 |
+
cerr << startPos << " ";
|
| 110 |
+
}
|
| 111 |
+
cerr << endl;
|
| 112 |
+
for (size_t width = 1; width <= size; width++) {
|
| 113 |
+
for( size_t space = 0; space < width-1; space++ ) {
|
| 114 |
+
cerr << " ";
|
| 115 |
+
}
|
| 116 |
+
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
| 117 |
+
Range range(startPos, startPos+width-1);
|
| 118 |
+
cerr.width(3);
|
| 119 |
+
cerr << m_hypoStackColl.Get(range).GetSize() << " ";
|
| 120 |
+
}
|
| 121 |
+
cerr << endl;
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
/** add specific translation options and hypotheses according to the XML override translation scheme.
|
| 127 |
+
* Doesn't seem to do anything about walls and zones.
|
| 128 |
+
* @todo check walls & zones. Check that the implementation doesn't leak, xml options sometimes does if you're not careful
|
| 129 |
+
*/
|
| 130 |
+
void ChartManager::AddXmlChartOptions()
|
| 131 |
+
{
|
| 132 |
+
const std::vector <ChartTranslationOptions*> xmlChartOptionsList
|
| 133 |
+
= m_source.GetXmlChartTranslationOptions();
|
| 134 |
+
IFVERBOSE(2) {
|
| 135 |
+
cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl;
|
| 136 |
+
}
|
| 137 |
+
if (xmlChartOptionsList.size() == 0) return;
|
| 138 |
+
|
| 139 |
+
typedef std::vector<ChartTranslationOptions*>::const_iterator citer;
|
| 140 |
+
for(citer i = xmlChartOptionsList.begin(); i != xmlChartOptionsList.end(); ++i) {
|
| 141 |
+
ChartTranslationOptions* opt = *i;
|
| 142 |
+
|
| 143 |
+
const Range &range = opt->GetSourceWordsRange();
|
| 144 |
+
|
| 145 |
+
RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
|
| 146 |
+
ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
|
| 147 |
+
hypo->EvaluateWhenApplied();
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
ChartCell &cell = m_hypoStackColl.Get(range);
|
| 151 |
+
cell.AddHypothesis(hypo);
|
| 152 |
+
}
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
//! get best complete translation from the top chart cell.
|
| 156 |
+
const ChartHypothesis *ChartManager::GetBestHypothesis() const
|
| 157 |
+
{
|
| 158 |
+
size_t size = m_source.GetSize();
|
| 159 |
+
|
| 160 |
+
if (size == 0) // empty source
|
| 161 |
+
return NULL;
|
| 162 |
+
else {
|
| 163 |
+
Range range(0, size-1);
|
| 164 |
+
const ChartCell &lastCell = m_hypoStackColl.Get(range);
|
| 165 |
+
return lastCell.GetBestHypothesis();
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/** Calculate the n-best paths through the output hypergraph.
|
| 170 |
+
* Return the list of paths with the variable ret
|
| 171 |
+
* \param n how may paths to return
|
| 172 |
+
* \param ret return argument
|
| 173 |
+
* \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
|
| 174 |
+
*/
|
| 175 |
+
void ChartManager::CalcNBest(
|
| 176 |
+
std::size_t n,
|
| 177 |
+
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
|
| 178 |
+
bool onlyDistinct) const
|
| 179 |
+
{
|
| 180 |
+
nBestList.clear();
|
| 181 |
+
if (n == 0 || m_source.GetSize() == 0) {
|
| 182 |
+
return;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// Get the list of top-level hypotheses, sorted by score.
|
| 186 |
+
Range range(0, m_source.GetSize()-1);
|
| 187 |
+
const ChartCell &lastCell = m_hypoStackColl.Get(range);
|
| 188 |
+
boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
|
| 189 |
+
lastCell.GetAllSortedHypotheses());
|
| 190 |
+
if (!topLevelHypos) {
|
| 191 |
+
return;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
ChartKBestExtractor extractor;
|
| 195 |
+
|
| 196 |
+
if (!onlyDistinct) {
|
| 197 |
+
// Return the n-best list as is, including duplicate translations.
|
| 198 |
+
extractor.Extract(*topLevelHypos, n, nBestList);
|
| 199 |
+
return;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
// Determine how many derivations to extract. If the n-best list is
|
| 203 |
+
// restricted to distinct translations then this limit should be bigger
|
| 204 |
+
// than n. The n-best factor determines how much bigger the limit should be,
|
| 205 |
+
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
| 206 |
+
// too many translations are identical.
|
| 207 |
+
const std::size_t nBestFactor = options()->nbest.factor;
|
| 208 |
+
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
|
| 209 |
+
|
| 210 |
+
// Extract the derivations.
|
| 211 |
+
ChartKBestExtractor::KBestVec bigList;
|
| 212 |
+
bigList.reserve(numDerivations);
|
| 213 |
+
extractor.Extract(*topLevelHypos, numDerivations, bigList);
|
| 214 |
+
|
| 215 |
+
// Copy derivations into nBestList, skipping ones with repeated translations.
|
| 216 |
+
std::set<Phrase> distinct;
|
| 217 |
+
for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
|
| 218 |
+
nBestList.size() < n && p != bigList.end(); ++p) {
|
| 219 |
+
boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
|
| 220 |
+
Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
|
| 221 |
+
if (distinct.insert(translation).second) {
|
| 222 |
+
nBestList.push_back(derivation);
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
void ChartManager::WriteSearchGraph(const ChartSearchGraphWriter& writer) const
|
| 228 |
+
{
|
| 229 |
+
|
| 230 |
+
size_t size = m_source.GetSize();
|
| 231 |
+
|
| 232 |
+
// which hypotheses are reachable?
|
| 233 |
+
std::map<unsigned,bool> reachable;
|
| 234 |
+
Range fullRange(0, size-1);
|
| 235 |
+
const ChartCell &lastCell = m_hypoStackColl.Get(fullRange);
|
| 236 |
+
const ChartHypothesis *hypo = lastCell.GetBestHypothesis();
|
| 237 |
+
|
| 238 |
+
if (hypo == NULL) {
|
| 239 |
+
// no hypothesis
|
| 240 |
+
return;
|
| 241 |
+
}
|
| 242 |
+
size_t winners = 0;
|
| 243 |
+
size_t losers = 0;
|
| 244 |
+
|
| 245 |
+
FindReachableHypotheses( hypo, reachable, &winners, &losers);
|
| 246 |
+
writer.WriteHeader(winners, losers);
|
| 247 |
+
|
| 248 |
+
for (size_t width = 1; width <= size; ++width) {
|
| 249 |
+
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
| 250 |
+
size_t endPos = startPos + width - 1;
|
| 251 |
+
Range range(startPos, endPos);
|
| 252 |
+
TRACE_ERR(" " << range << "=");
|
| 253 |
+
|
| 254 |
+
const ChartCell &cell = m_hypoStackColl.Get(range);
|
| 255 |
+
cell.WriteSearchGraph(writer, reachable);
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
void ChartManager::FindReachableHypotheses(
|
| 261 |
+
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable, size_t* winners, size_t* losers) const
|
| 262 |
+
{
|
| 263 |
+
// do not recurse, if already visited
|
| 264 |
+
if (reachable.find(hypo->GetId()) != reachable.end()) {
|
| 265 |
+
return;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
// recurse
|
| 269 |
+
reachable[ hypo->GetId() ] = true;
|
| 270 |
+
if (hypo->GetWinningHypothesis() == hypo) {
|
| 271 |
+
(*winners)++;
|
| 272 |
+
} else {
|
| 273 |
+
(*losers)++;
|
| 274 |
+
}
|
| 275 |
+
const std::vector<const ChartHypothesis*> &previous = hypo->GetPrevHypos();
|
| 276 |
+
for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i) {
|
| 277 |
+
FindReachableHypotheses( *i, reachable, winners, losers );
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
// also loop over recombined hypotheses (arcs)
|
| 281 |
+
const ChartArcList *arcList = hypo->GetArcList();
|
| 282 |
+
if (arcList) {
|
| 283 |
+
ChartArcList::const_iterator iterArc;
|
| 284 |
+
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
|
| 285 |
+
const ChartHypothesis &arc = **iterArc;
|
| 286 |
+
FindReachableHypotheses( &arc, reachable, winners, losers );
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
void
|
| 292 |
+
ChartManager::
|
| 293 |
+
OutputSearchGraphAsHypergraph(std::ostream& out) const
|
| 294 |
+
{
|
| 295 |
+
ChartSearchGraphWriterHypergraph writer(options(), &out);
|
| 296 |
+
WriteSearchGraph(writer);
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
|
| 300 |
+
{
|
| 301 |
+
ChartSearchGraphWriterMoses writer(options(), &outputSearchGraphStream,
|
| 302 |
+
m_source.GetTranslationId());
|
| 303 |
+
WriteSearchGraph(writer);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
void ChartManager::OutputBest(OutputCollector *collector) const
|
| 307 |
+
{
|
| 308 |
+
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
| 309 |
+
if (collector && bestHypo) {
|
| 310 |
+
const size_t translationId = m_source.GetTranslationId();
|
| 311 |
+
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
| 312 |
+
OutputBestHypo(collector, bestHypo, translationId);
|
| 313 |
+
}
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
void ChartManager::OutputNBest(OutputCollector *collector) const
|
| 317 |
+
{
|
| 318 |
+
size_t nBestSize = options()->nbest.nbest_size;
|
| 319 |
+
if (nBestSize > 0) {
|
| 320 |
+
const size_t translationId = m_source.GetTranslationId();
|
| 321 |
+
|
| 322 |
+
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
|
| 323 |
+
<< options()->nbest.output_file_path << endl);
|
| 324 |
+
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
| 325 |
+
CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
|
| 326 |
+
OutputNBestList(collector, nBestList, translationId);
|
| 327 |
+
IFVERBOSE(2) {
|
| 328 |
+
PrintUserTime("N-Best Hypotheses Generation Time:");
|
| 329 |
+
}
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
void ChartManager::OutputNBestList(OutputCollector *collector,
|
| 335 |
+
const ChartKBestExtractor::KBestVec &nBestList,
|
| 336 |
+
long translationId) const
|
| 337 |
+
{
|
| 338 |
+
std::ostringstream out;
|
| 339 |
+
|
| 340 |
+
if (collector->OutputIsCout()) {
|
| 341 |
+
// Set precision only if we're writing the n-best list to cout. This is to
|
| 342 |
+
// preserve existing behaviour, but should probably be done either way.
|
| 343 |
+
FixPrecision(out);
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
NBestOptions const& nbo = options()->nbest;
|
| 347 |
+
bool includeWordAlignment = nbo.include_alignment_info;
|
| 348 |
+
bool PrintNBestTrees = nbo.print_trees;
|
| 349 |
+
|
| 350 |
+
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
| 351 |
+
p != nBestList.end(); ++p) {
|
| 352 |
+
const ChartKBestExtractor::Derivation &derivation = **p;
|
| 353 |
+
|
| 354 |
+
// get the derivation's target-side yield
|
| 355 |
+
Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
|
| 356 |
+
|
| 357 |
+
// delete <s> and </s>
|
| 358 |
+
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
| 359 |
+
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
| 360 |
+
outputPhrase.RemoveWord(0);
|
| 361 |
+
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
| 362 |
+
|
| 363 |
+
// print the translation ID, surface factors, and scores
|
| 364 |
+
out << translationId << " ||| ";
|
| 365 |
+
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
|
| 366 |
+
out << " ||| ";
|
| 367 |
+
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
|
| 368 |
+
bool with_labels = options()->nbest.include_feature_labels;
|
| 369 |
+
scoreBreakdown->OutputAllFeatureScores(out, with_labels);
|
| 370 |
+
out << " ||| " << derivation.score;
|
| 371 |
+
|
| 372 |
+
// optionally, print word alignments
|
| 373 |
+
if (includeWordAlignment) {
|
| 374 |
+
out << " ||| ";
|
| 375 |
+
Alignments align;
|
| 376 |
+
OutputAlignmentNBest(align, derivation, 0);
|
| 377 |
+
for (Alignments::const_iterator q = align.begin(); q != align.end();
|
| 378 |
+
++q) {
|
| 379 |
+
out << q->first << "-" << q->second << " ";
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
// optionally, print tree
|
| 384 |
+
if (PrintNBestTrees) {
|
| 385 |
+
TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
|
| 386 |
+
out << " ||| " << tree->GetString();
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
out << std::endl;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
assert(collector);
|
| 393 |
+
collector->Write(translationId, out.str());
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
|
| 397 |
+
{
|
| 398 |
+
size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
|
| 399 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 400 |
+
for (size_t i = 0; i < prevHypos.size(); ++i) {
|
| 401 |
+
size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
|
| 402 |
+
ret -= (childSize - 1);
|
| 403 |
+
}
|
| 404 |
+
return ret;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
size_t ChartManager::OutputAlignmentNBest(
|
| 408 |
+
Alignments &retAlign,
|
| 409 |
+
const Moses::ChartKBestExtractor::Derivation &derivation,
|
| 410 |
+
size_t startTarget) const
|
| 411 |
+
{
|
| 412 |
+
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
|
| 413 |
+
|
| 414 |
+
size_t totalTargetSize = 0;
|
| 415 |
+
size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
|
| 416 |
+
|
| 417 |
+
const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
|
| 418 |
+
|
| 419 |
+
size_t thisSourceSize = CalcSourceSize(&hypo);
|
| 420 |
+
|
| 421 |
+
// position of each terminal word in translation rule, irrespective of alignment
|
| 422 |
+
// if non-term, number is undefined
|
| 423 |
+
vector<size_t> sourceOffsets(thisSourceSize, 0);
|
| 424 |
+
vector<size_t> targetOffsets(tp.GetSize(), 0);
|
| 425 |
+
|
| 426 |
+
const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
|
| 427 |
+
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
| 428 |
+
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
|
| 429 |
+
|
| 430 |
+
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
|
| 431 |
+
"Error");
|
| 432 |
+
|
| 433 |
+
size_t targetInd = 0;
|
| 434 |
+
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
| 435 |
+
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
| 436 |
+
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
| 437 |
+
size_t sourceInd = targetPos2SourceInd[targetPos];
|
| 438 |
+
size_t sourcePos = sourceInd2pos[sourceInd];
|
| 439 |
+
|
| 440 |
+
const Moses::ChartKBestExtractor::Derivation &subderivation =
|
| 441 |
+
*derivation.subderivations[sourceInd];
|
| 442 |
+
|
| 443 |
+
// calc source size
|
| 444 |
+
size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
|
| 445 |
+
sourceOffsets[sourcePos] = sourceSize;
|
| 446 |
+
|
| 447 |
+
// calc target size.
|
| 448 |
+
// Recursively look thru child hypos
|
| 449 |
+
size_t currStartTarget = startTarget + totalTargetSize;
|
| 450 |
+
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
|
| 451 |
+
currStartTarget);
|
| 452 |
+
targetOffsets[targetPos] = targetSize;
|
| 453 |
+
|
| 454 |
+
totalTargetSize += targetSize;
|
| 455 |
+
++targetInd;
|
| 456 |
+
} else {
|
| 457 |
+
++totalTargetSize;
|
| 458 |
+
}
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
// convert position within translation rule to absolute position within
|
| 462 |
+
// source sentence / output sentence
|
| 463 |
+
ShiftOffsets(sourceOffsets, startSource);
|
| 464 |
+
ShiftOffsets(targetOffsets, startTarget);
|
| 465 |
+
|
| 466 |
+
// get alignments from this hypo
|
| 467 |
+
const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
|
| 468 |
+
|
| 469 |
+
// add to output arg, offsetting by source & target
|
| 470 |
+
AlignmentInfo::const_iterator iter;
|
| 471 |
+
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
| 472 |
+
const std::pair<size_t,size_t> &align = *iter;
|
| 473 |
+
size_t relSource = align.first;
|
| 474 |
+
size_t relTarget = align.second;
|
| 475 |
+
size_t absSource = sourceOffsets[relSource];
|
| 476 |
+
size_t absTarget = targetOffsets[relTarget];
|
| 477 |
+
|
| 478 |
+
pair<size_t, size_t> alignPoint(absSource, absTarget);
|
| 479 |
+
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
| 480 |
+
UTIL_THROW_IF2(!ret.second, "Error");
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
return totalTargetSize;
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
void ChartManager::OutputAlignment(OutputCollector *collector) const
|
| 487 |
+
{
|
| 488 |
+
if (collector == NULL) {
|
| 489 |
+
return;
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
ostringstream out;
|
| 493 |
+
|
| 494 |
+
const ChartHypothesis *hypo = GetBestHypothesis();
|
| 495 |
+
if (hypo) {
|
| 496 |
+
Alignments retAlign;
|
| 497 |
+
OutputAlignment(retAlign, hypo, 0);
|
| 498 |
+
|
| 499 |
+
// output alignments
|
| 500 |
+
Alignments::const_iterator iter;
|
| 501 |
+
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
|
| 502 |
+
const pair<size_t, size_t> &alignPoint = *iter;
|
| 503 |
+
out << alignPoint.first << "-" << alignPoint.second << " ";
|
| 504 |
+
}
|
| 505 |
+
}
|
| 506 |
+
out << endl;
|
| 507 |
+
|
| 508 |
+
collector->Write(m_source.GetTranslationId(), out.str());
|
| 509 |
+
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
size_t ChartManager::OutputAlignment(Alignments &retAlign,
|
| 513 |
+
const Moses::ChartHypothesis *hypo,
|
| 514 |
+
size_t startTarget) const
|
| 515 |
+
{
|
| 516 |
+
size_t totalTargetSize = 0;
|
| 517 |
+
size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
|
| 518 |
+
|
| 519 |
+
const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
|
| 520 |
+
|
| 521 |
+
size_t thisSourceSize = CalcSourceSize(hypo);
|
| 522 |
+
|
| 523 |
+
// position of each terminal word in translation rule, irrespective of alignment
|
| 524 |
+
// if non-term, number is undefined
|
| 525 |
+
vector<size_t> sourceOffsets(thisSourceSize, 0);
|
| 526 |
+
vector<size_t> targetOffsets(tp.GetSize(), 0);
|
| 527 |
+
|
| 528 |
+
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 529 |
+
|
| 530 |
+
const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
|
| 531 |
+
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
| 532 |
+
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
|
| 533 |
+
|
| 534 |
+
UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
|
| 535 |
+
|
| 536 |
+
size_t targetInd = 0;
|
| 537 |
+
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
| 538 |
+
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
| 539 |
+
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
| 540 |
+
size_t sourceInd = targetPos2SourceInd[targetPos];
|
| 541 |
+
size_t sourcePos = sourceInd2pos[sourceInd];
|
| 542 |
+
|
| 543 |
+
const ChartHypothesis *prevHypo = prevHypos[sourceInd];
|
| 544 |
+
|
| 545 |
+
// calc source size
|
| 546 |
+
size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
|
| 547 |
+
sourceOffsets[sourcePos] = sourceSize;
|
| 548 |
+
|
| 549 |
+
// calc target size.
|
| 550 |
+
// Recursively look thru child hypos
|
| 551 |
+
size_t currStartTarget = startTarget + totalTargetSize;
|
| 552 |
+
size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
|
| 553 |
+
targetOffsets[targetPos] = targetSize;
|
| 554 |
+
|
| 555 |
+
totalTargetSize += targetSize;
|
| 556 |
+
++targetInd;
|
| 557 |
+
} else {
|
| 558 |
+
++totalTargetSize;
|
| 559 |
+
}
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
// convert position within translation rule to absolute position within
|
| 563 |
+
// source sentence / output sentence
|
| 564 |
+
ShiftOffsets(sourceOffsets, startSource);
|
| 565 |
+
ShiftOffsets(targetOffsets, startTarget);
|
| 566 |
+
|
| 567 |
+
// get alignments from this hypo
|
| 568 |
+
const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
|
| 569 |
+
|
| 570 |
+
// add to output arg, offsetting by source & target
|
| 571 |
+
AlignmentInfo::const_iterator iter;
|
| 572 |
+
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
| 573 |
+
const std::pair<size_t,size_t> &align = *iter;
|
| 574 |
+
size_t relSource = align.first;
|
| 575 |
+
size_t relTarget = align.second;
|
| 576 |
+
size_t absSource = sourceOffsets[relSource];
|
| 577 |
+
size_t absTarget = targetOffsets[relTarget];
|
| 578 |
+
|
| 579 |
+
pair<size_t, size_t> alignPoint(absSource, absTarget);
|
| 580 |
+
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
| 581 |
+
UTIL_THROW_IF2(!ret.second, "Error");
|
| 582 |
+
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
return totalTargetSize;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
|
| 589 |
+
{
|
| 590 |
+
if (collector) {
|
| 591 |
+
OutputDetailedTranslationReport(collector,
|
| 592 |
+
GetBestHypothesis(),
|
| 593 |
+
static_cast<const Sentence&>(m_source),
|
| 594 |
+
m_source.GetTranslationId());
|
| 595 |
+
}
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
void ChartManager::OutputDetailedTranslationReport(
|
| 599 |
+
OutputCollector *collector,
|
| 600 |
+
const ChartHypothesis *hypo,
|
| 601 |
+
const Sentence &sentence,
|
| 602 |
+
long translationId) const
|
| 603 |
+
{
|
| 604 |
+
if (hypo == NULL) {
|
| 605 |
+
return;
|
| 606 |
+
}
|
| 607 |
+
std::ostringstream out;
|
| 608 |
+
ApplicationContext applicationContext;
|
| 609 |
+
|
| 610 |
+
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
| 611 |
+
collector->Write(translationId, out.str());
|
| 612 |
+
|
| 613 |
+
//DIMw
|
| 614 |
+
if (options()->output.detailed_all_transrep_filepath.size()) {
|
| 615 |
+
const Sentence &sentence = static_cast<const Sentence &>(m_source);
|
| 616 |
+
size_t nBestSize = options()->nbest.nbest_size;
|
| 617 |
+
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
| 618 |
+
CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
|
| 619 |
+
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
void ChartManager::OutputTranslationOptions(std::ostream &out,
|
| 625 |
+
ApplicationContext &applicationContext,
|
| 626 |
+
const ChartHypothesis *hypo,
|
| 627 |
+
const Sentence &sentence,
|
| 628 |
+
long translationId) const
|
| 629 |
+
{
|
| 630 |
+
if (hypo != NULL) {
|
| 631 |
+
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
| 632 |
+
out << std::endl;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
// recursive
|
| 636 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 637 |
+
std::vector<const ChartHypothesis*>::const_iterator iter;
|
| 638 |
+
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
| 639 |
+
const ChartHypothesis *prevHypo = *iter;
|
| 640 |
+
OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
| 641 |
+
}
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
void ChartManager::OutputTranslationOption(std::ostream &out,
|
| 645 |
+
ApplicationContext &applicationContext,
|
| 646 |
+
const ChartHypothesis *hypo,
|
| 647 |
+
const Sentence &sentence,
|
| 648 |
+
long translationId) const
|
| 649 |
+
{
|
| 650 |
+
ReconstructApplicationContext(*hypo, sentence, applicationContext);
|
| 651 |
+
out << "Trans Opt " << translationId
|
| 652 |
+
<< " " << hypo->GetCurrSourceRange()
|
| 653 |
+
<< ": ";
|
| 654 |
+
WriteApplicationContext(out, applicationContext);
|
| 655 |
+
out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
|
| 656 |
+
<< "->" << hypo->GetCurrTargetPhrase()
|
| 657 |
+
<< " " << hypo->GetFutureScore() << hypo->GetScoreBreakdown();
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
// Given a hypothesis and sentence, reconstructs the 'application context' --
|
| 661 |
+
// the source RHS symbols of the SCFG rule that was applied, plus their spans.
|
| 662 |
+
void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
|
| 663 |
+
const Sentence &sentence,
|
| 664 |
+
ApplicationContext &context) const
|
| 665 |
+
{
|
| 666 |
+
context.clear();
|
| 667 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
|
| 668 |
+
std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
|
| 669 |
+
std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
|
| 670 |
+
const Range &span = hypo.GetCurrSourceRange();
|
| 671 |
+
size_t i = span.GetStartPos();
|
| 672 |
+
while (i <= span.GetEndPos()) {
|
| 673 |
+
if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
|
| 674 |
+
// Symbol is a terminal.
|
| 675 |
+
const Word &symbol = sentence.GetWord(i);
|
| 676 |
+
context.push_back(std::make_pair(symbol, Range(i, i)));
|
| 677 |
+
++i;
|
| 678 |
+
} else {
|
| 679 |
+
// Symbol is a non-terminal.
|
| 680 |
+
const Word &symbol = (*p)->GetTargetLHS();
|
| 681 |
+
const Range &range = (*p)->GetCurrSourceRange();
|
| 682 |
+
context.push_back(std::make_pair(symbol, range));
|
| 683 |
+
i = range.GetEndPos()+1;
|
| 684 |
+
++p;
|
| 685 |
+
}
|
| 686 |
+
}
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
void ChartManager::OutputUnknowns(OutputCollector *collector) const
|
| 690 |
+
{
|
| 691 |
+
if (collector) {
|
| 692 |
+
long translationId = m_source.GetTranslationId();
|
| 693 |
+
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
|
| 694 |
+
|
| 695 |
+
std::ostringstream out;
|
| 696 |
+
for (std::vector<Phrase*>::const_iterator p = oovs.begin();
|
| 697 |
+
p != oovs.end(); ++p) {
|
| 698 |
+
out << **p;
|
| 699 |
+
}
|
| 700 |
+
out << std::endl;
|
| 701 |
+
collector->Write(translationId, out.str());
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
|
| 707 |
+
{
|
| 708 |
+
const ChartHypothesis *hypo = GetBestHypothesis();
|
| 709 |
+
if (collector == NULL || hypo == NULL) {
|
| 710 |
+
return;
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
std::ostringstream out;
|
| 714 |
+
ApplicationContext applicationContext;
|
| 715 |
+
|
| 716 |
+
const Sentence &sentence = static_cast<const Sentence &>(m_source);
|
| 717 |
+
const size_t translationId = m_source.GetTranslationId();
|
| 718 |
+
|
| 719 |
+
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
| 720 |
+
|
| 721 |
+
//Tree of full sentence
|
| 722 |
+
const StatefulFeatureFunction* treeStructure;
|
| 723 |
+
treeStructure = StaticData::Instance().GetTreeStructure();
|
| 724 |
+
if (treeStructure != NULL) {
|
| 725 |
+
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 726 |
+
for( size_t i=0; i<sff.size(); i++ ) {
|
| 727 |
+
if (sff[i] == treeStructure) {
|
| 728 |
+
const TreeState* tree = static_cast<const TreeState*>(hypo->GetFFState(i));
|
| 729 |
+
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
|
| 730 |
+
break;
|
| 731 |
+
}
|
| 732 |
+
}
|
| 733 |
+
}
|
| 734 |
+
|
| 735 |
+
collector->Write(translationId, out.str());
|
| 736 |
+
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
| 740 |
+
ApplicationContext &applicationContext,
|
| 741 |
+
const ChartHypothesis *hypo,
|
| 742 |
+
const Sentence &sentence,
|
| 743 |
+
long translationId) const
|
| 744 |
+
{
|
| 745 |
+
|
| 746 |
+
if (hypo != NULL) {
|
| 747 |
+
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
| 748 |
+
|
| 749 |
+
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
|
| 750 |
+
|
| 751 |
+
out << " ||| ";
|
| 752 |
+
if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
|
| 753 |
+
out << " " << *property->GetValueString();
|
| 754 |
+
} else {
|
| 755 |
+
out << " " << "noTreeInfo";
|
| 756 |
+
}
|
| 757 |
+
out << std::endl;
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
// recursive
|
| 761 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 762 |
+
std::vector<const ChartHypothesis*>::const_iterator iter;
|
| 763 |
+
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
| 764 |
+
const ChartHypothesis *prevHypo = *iter;
|
| 765 |
+
OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
| 766 |
+
}
|
| 767 |
+
}
|
| 768 |
+
|
| 769 |
+
void ChartManager::OutputSearchGraph(OutputCollector *collector) const
|
| 770 |
+
{
|
| 771 |
+
if (collector) {
|
| 772 |
+
long translationId = m_source.GetTranslationId();
|
| 773 |
+
std::ostringstream out;
|
| 774 |
+
OutputSearchGraphMoses( out);
|
| 775 |
+
collector->Write(translationId, out.str());
|
| 776 |
+
}
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
//DIMw
|
| 780 |
+
void ChartManager::OutputDetailedAllTranslationReport(
|
| 781 |
+
OutputCollector *collector,
|
| 782 |
+
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
| 783 |
+
const Sentence &sentence,
|
| 784 |
+
long translationId) const
|
| 785 |
+
{
|
| 786 |
+
std::ostringstream out;
|
| 787 |
+
ApplicationContext applicationContext;
|
| 788 |
+
|
| 789 |
+
const ChartCellCollection& cells = GetChartCellCollection();
|
| 790 |
+
size_t size = GetSource().GetSize();
|
| 791 |
+
for (size_t width = 1; width <= size; ++width) {
|
| 792 |
+
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
| 793 |
+
size_t endPos = startPos + width - 1;
|
| 794 |
+
Range range(startPos, endPos);
|
| 795 |
+
const ChartCell& cell = cells.Get(range);
|
| 796 |
+
const HypoList* hyps = cell.GetAllSortedHypotheses();
|
| 797 |
+
out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
|
| 798 |
+
HypoList::const_iterator iter;
|
| 799 |
+
size_t c = 1;
|
| 800 |
+
for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
|
| 801 |
+
out << "----------------Item " << c++ << " ---------------------"
|
| 802 |
+
<< endl;
|
| 803 |
+
OutputTranslationOptions(out, applicationContext, *iter,
|
| 804 |
+
sentence, translationId);
|
| 805 |
+
}
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
collector->Write(translationId, out.str());
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
|
| 812 |
+
{
|
| 813 |
+
if (!collector)
|
| 814 |
+
return;
|
| 815 |
+
std::ostringstream out;
|
| 816 |
+
FixPrecision(out);
|
| 817 |
+
if (hypo != NULL) {
|
| 818 |
+
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
|
| 819 |
+
VERBOSE(3,"Best path: ");
|
| 820 |
+
Backtrack(hypo);
|
| 821 |
+
VERBOSE(3,"0" << std::endl);
|
| 822 |
+
|
| 823 |
+
if (options()->output.ReportHypoScore) {
|
| 824 |
+
out << hypo->GetFutureScore() << " ";
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
if (options()->output.RecoverPath) {
|
| 828 |
+
out << "||| ";
|
| 829 |
+
}
|
| 830 |
+
Phrase outPhrase(ARRAY_SIZE_INCR);
|
| 831 |
+
hypo->GetOutputPhrase(outPhrase);
|
| 832 |
+
|
| 833 |
+
// delete 1st & last
|
| 834 |
+
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
|
| 835 |
+
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
| 836 |
+
|
| 837 |
+
outPhrase.RemoveWord(0);
|
| 838 |
+
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
|
| 839 |
+
|
| 840 |
+
string output = outPhrase.GetStringRep(options()->output.factor_order);
|
| 841 |
+
out << output << endl;
|
| 842 |
+
} else {
|
| 843 |
+
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
| 844 |
+
|
| 845 |
+
if (options()->output.ReportHypoScore) {
|
| 846 |
+
out << "0 ";
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
out << endl;
|
| 850 |
+
}
|
| 851 |
+
collector->Write(translationId, out.str());
|
| 852 |
+
}
|
| 853 |
+
|
| 854 |
+
void ChartManager::Backtrack(const ChartHypothesis *hypo) const
|
| 855 |
+
{
|
| 856 |
+
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 857 |
+
|
| 858 |
+
vector<const ChartHypothesis*>::const_iterator iter;
|
| 859 |
+
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
| 860 |
+
const ChartHypothesis *prevHypo = *iter;
|
| 861 |
+
|
| 862 |
+
VERBOSE(3,prevHypo->GetId() << " <= ");
|
| 863 |
+
Backtrack(prevHypo);
|
| 864 |
+
}
|
| 865 |
+
}
|
| 866 |
+
|
| 867 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartManager.h
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <vector>
|
| 25 |
+
#include <boost/unordered_map.hpp>
|
| 26 |
+
#include "ChartCell.h"
|
| 27 |
+
#include "ChartCellCollection.h"
|
| 28 |
+
#include "Range.h"
|
| 29 |
+
#include "SentenceStats.h"
|
| 30 |
+
#include "ChartTranslationOptionList.h"
|
| 31 |
+
#include "ChartParser.h"
|
| 32 |
+
#include "ChartKBestExtractor.h"
|
| 33 |
+
#include "BaseManager.h"
|
| 34 |
+
#include "moses/Syntax/KBestExtractor.h"
|
| 35 |
+
|
| 36 |
+
namespace Moses
|
| 37 |
+
{
|
| 38 |
+
|
| 39 |
+
class ChartHypothesis;
|
| 40 |
+
class ChartSearchGraphWriter;
|
| 41 |
+
|
| 42 |
+
/** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
|
| 43 |
+
*/
|
| 44 |
+
class ChartManager : public BaseManager
|
| 45 |
+
{
|
| 46 |
+
private:
|
| 47 |
+
ChartCellCollection m_hypoStackColl;
|
| 48 |
+
std::auto_ptr<SentenceStats> m_sentenceStats;
|
| 49 |
+
clock_t m_start; /**< starting time, used for logging */
|
| 50 |
+
unsigned m_hypothesisId; /* For handing out hypothesis ids to ChartHypothesis */
|
| 51 |
+
|
| 52 |
+
ChartParser m_parser;
|
| 53 |
+
|
| 54 |
+
ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
|
| 55 |
+
|
| 56 |
+
/* auxilliary functions for SearchGraphs */
|
| 57 |
+
void FindReachableHypotheses(
|
| 58 |
+
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
|
| 59 |
+
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
|
| 60 |
+
|
| 61 |
+
// output
|
| 62 |
+
void OutputNBestList(OutputCollector *collector,
|
| 63 |
+
const ChartKBestExtractor::KBestVec &nBestList,
|
| 64 |
+
long translationId) const;
|
| 65 |
+
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
|
| 66 |
+
size_t OutputAlignmentNBest(Alignments &retAlign,
|
| 67 |
+
const Moses::ChartKBestExtractor::Derivation &derivation,
|
| 68 |
+
size_t startTarget) const;
|
| 69 |
+
size_t OutputAlignment(Alignments &retAlign,
|
| 70 |
+
const Moses::ChartHypothesis *hypo,
|
| 71 |
+
size_t startTarget) const;
|
| 72 |
+
void OutputDetailedTranslationReport(
|
| 73 |
+
OutputCollector *collector,
|
| 74 |
+
const ChartHypothesis *hypo,
|
| 75 |
+
const Sentence &sentence,
|
| 76 |
+
long translationId) const;
|
| 77 |
+
void OutputTranslationOptions(std::ostream &out,
|
| 78 |
+
ApplicationContext &applicationContext,
|
| 79 |
+
const ChartHypothesis *hypo,
|
| 80 |
+
const Sentence &sentence,
|
| 81 |
+
long translationId) const;
|
| 82 |
+
void OutputTranslationOption(std::ostream &out,
|
| 83 |
+
ApplicationContext &applicationContext,
|
| 84 |
+
const ChartHypothesis *hypo,
|
| 85 |
+
const Sentence &sentence,
|
| 86 |
+
long translationId) const;
|
| 87 |
+
void ReconstructApplicationContext(const ChartHypothesis &hypo,
|
| 88 |
+
const Sentence &sentence,
|
| 89 |
+
ApplicationContext &context) const;
|
| 90 |
+
void OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
| 91 |
+
ApplicationContext &applicationContext,
|
| 92 |
+
const ChartHypothesis *hypo,
|
| 93 |
+
const Sentence &sentence,
|
| 94 |
+
long translationId) const;
|
| 95 |
+
void OutputDetailedAllTranslationReport(
|
| 96 |
+
OutputCollector *collector,
|
| 97 |
+
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
| 98 |
+
const Sentence &sentence,
|
| 99 |
+
long translationId) const;
|
| 100 |
+
void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
|
| 101 |
+
void Backtrack(const ChartHypothesis *hypo) const;
|
| 102 |
+
|
| 103 |
+
public:
|
| 104 |
+
ChartManager(ttasksptr const& ttask);
|
| 105 |
+
~ChartManager();
|
| 106 |
+
void Decode();
|
| 107 |
+
void AddXmlChartOptions();
|
| 108 |
+
const ChartHypothesis *GetBestHypothesis() const;
|
| 109 |
+
void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
|
| 110 |
+
|
| 111 |
+
/** "Moses" (osg) type format */
|
| 112 |
+
void OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const;
|
| 113 |
+
|
| 114 |
+
/** Output in (modified) Kenneth hypergraph format */
|
| 115 |
+
void OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const;
|
| 116 |
+
|
| 117 |
+
//! debug data collected when decoding sentence
|
| 118 |
+
SentenceStats& GetSentenceStats() const {
|
| 119 |
+
return *m_sentenceStats;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
//DIMw
|
| 123 |
+
const ChartCellCollection& GetChartCellCollection() const {
|
| 124 |
+
return m_hypoStackColl;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
void CalcDecoderStatistics() const {
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
void ResetSentenceStats(const InputType& source) {
|
| 131 |
+
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
//! contigious hypo id for each input sentence. For debugging purposes
|
| 135 |
+
unsigned GetNextHypoId() {
|
| 136 |
+
return m_hypothesisId++;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
const ChartParser &GetParser() const {
|
| 140 |
+
return m_parser;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
// outputs
|
| 144 |
+
void OutputBest(OutputCollector *collector) const;
|
| 145 |
+
void OutputNBest(OutputCollector *collector) const;
|
| 146 |
+
void OutputLatticeSamples(OutputCollector *collector) const {
|
| 147 |
+
}
|
| 148 |
+
void OutputAlignment(OutputCollector *collector) const;
|
| 149 |
+
void OutputDetailedTranslationReport(OutputCollector *collector) const;
|
| 150 |
+
void OutputUnknowns(OutputCollector *collector) const;
|
| 151 |
+
void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
|
| 152 |
+
void OutputWordGraph(OutputCollector *collector) const {
|
| 153 |
+
}
|
| 154 |
+
void OutputSearchGraph(OutputCollector *collector) const;
|
| 155 |
+
void OutputSearchGraphSLF() const {
|
| 156 |
+
}
|
| 157 |
+
// void OutputSearchGraphHypergraph() const;
|
| 158 |
+
|
| 159 |
+
};
|
| 160 |
+
|
| 161 |
+
}
|
| 162 |
+
|
mosesdecoder/moses/ChartParser.cpp
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "ChartParser.h"
|
| 23 |
+
#include "ChartParserCallback.h"
|
| 24 |
+
#include "ChartRuleLookupManager.h"
|
| 25 |
+
#include "StaticData.h"
|
| 26 |
+
#include "TreeInput.h"
|
| 27 |
+
#include "Sentence.h"
|
| 28 |
+
#include "DecodeGraph.h"
|
| 29 |
+
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
| 30 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 31 |
+
#include "moses/TranslationTask.h"
|
| 32 |
+
|
| 33 |
+
using namespace std;
|
| 34 |
+
using namespace Moses;
|
| 35 |
+
|
| 36 |
+
namespace Moses
|
| 37 |
+
{
|
| 38 |
+
|
| 39 |
+
ChartParserUnknown
|
| 40 |
+
::ChartParserUnknown(ttasksptr const& ttask)
|
| 41 |
+
: m_ttask(ttask)
|
| 42 |
+
{ }
|
| 43 |
+
|
| 44 |
+
ChartParserUnknown::~ChartParserUnknown()
|
| 45 |
+
{
|
| 46 |
+
RemoveAllInColl(m_unksrcs);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
AllOptions::ptr const&
|
| 50 |
+
ChartParserUnknown::
|
| 51 |
+
options() const
|
| 52 |
+
{
|
| 53 |
+
return m_ttask.lock()->options();
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
void
|
| 57 |
+
ChartParserUnknown::
|
| 58 |
+
Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
|
| 59 |
+
{
|
| 60 |
+
// unknown word, add as trans opt
|
| 61 |
+
const StaticData &staticData = StaticData::Instance();
|
| 62 |
+
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer
|
| 63 |
+
= UnknownWordPenaltyProducer::Instance();
|
| 64 |
+
|
| 65 |
+
size_t isDigit = 0;
|
| 66 |
+
if (options()->unk.drop) {
|
| 67 |
+
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
|
| 68 |
+
const StringPiece s = f->GetString();
|
| 69 |
+
isDigit = s.find_first_of("0123456789");
|
| 70 |
+
if (isDigit == string::npos)
|
| 71 |
+
isDigit = 0;
|
| 72 |
+
else
|
| 73 |
+
isDigit = 1;
|
| 74 |
+
// modify the starting bitmap
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
Phrase* unksrc = new Phrase(1);
|
| 78 |
+
unksrc->AddWord() = sourceWord;
|
| 79 |
+
Word &newWord = unksrc->GetWord(0);
|
| 80 |
+
newWord.SetIsOOV(true);
|
| 81 |
+
|
| 82 |
+
m_unksrcs.push_back(unksrc);
|
| 83 |
+
|
| 84 |
+
// hack. Once the OOV FF is a phrase table, get rid of this
|
| 85 |
+
PhraseDictionary *firstPt = NULL;
|
| 86 |
+
if (PhraseDictionary::GetColl().size() == 0) {
|
| 87 |
+
firstPt = PhraseDictionary::GetColl()[0];
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
//TranslationOption *transOpt;
|
| 91 |
+
if (! options()->unk.drop || isDigit) {
|
| 92 |
+
// loop
|
| 93 |
+
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs; // staticData.GetUnknownLHS();
|
| 94 |
+
UnknownLHSList::const_iterator iterLHS;
|
| 95 |
+
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
| 96 |
+
const string &targetLHSStr = iterLHS->first;
|
| 97 |
+
float prob = iterLHS->second;
|
| 98 |
+
|
| 99 |
+
// lhs
|
| 100 |
+
//const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
|
| 101 |
+
Word *targetLHS = new Word(true);
|
| 102 |
+
|
| 103 |
+
targetLHS->CreateFromString(Output, options()->output.factor_order,
|
| 104 |
+
targetLHSStr, true);
|
| 105 |
+
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
| 106 |
+
|
| 107 |
+
// add to dictionary
|
| 108 |
+
TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
|
| 109 |
+
Word &targetWord = targetPhrase->AddWord();
|
| 110 |
+
targetWord.CreateUnknownWord(sourceWord);
|
| 111 |
+
|
| 112 |
+
// scores
|
| 113 |
+
float unknownScore = FloorScore(TransformScore(prob));
|
| 114 |
+
|
| 115 |
+
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
|
| 116 |
+
targetPhrase->SetTargetLHS(targetLHS);
|
| 117 |
+
targetPhrase->SetAlignmentInfo("0-0");
|
| 118 |
+
targetPhrase->EvaluateInIsolation(*unksrc);
|
| 119 |
+
|
| 120 |
+
if (!options()->output.detailed_tree_transrep_filepath.empty() ||
|
| 121 |
+
options()->nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
| 122 |
+
std::string prop = "[ ";
|
| 123 |
+
prop += (*targetLHS)[0]->GetString().as_string() + " ";
|
| 124 |
+
prop += sourceWord[0]->GetString().as_string() + " ]";
|
| 125 |
+
targetPhrase->SetProperty("Tree", prop);
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
// chart rule
|
| 129 |
+
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
|
| 130 |
+
} // for (iterLHS
|
| 131 |
+
} else {
|
| 132 |
+
// drop source word. create blank trans opt
|
| 133 |
+
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
|
| 134 |
+
|
| 135 |
+
TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
|
| 136 |
+
// loop
|
| 137 |
+
const UnknownLHSList &lhsList = options()->syntax.unknown_lhs;//staticData.GetUnknownLHS();
|
| 138 |
+
UnknownLHSList::const_iterator iterLHS;
|
| 139 |
+
for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
|
| 140 |
+
const string &targetLHSStr = iterLHS->first;
|
| 141 |
+
//float prob = iterLHS->second;
|
| 142 |
+
|
| 143 |
+
Word *targetLHS = new Word(true);
|
| 144 |
+
targetLHS->CreateFromString(Output, staticData.options()->output.factor_order,
|
| 145 |
+
targetLHSStr, true);
|
| 146 |
+
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
| 147 |
+
|
| 148 |
+
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
|
| 149 |
+
targetPhrase->EvaluateInIsolation(*unksrc);
|
| 150 |
+
|
| 151 |
+
targetPhrase->SetTargetLHS(targetLHS);
|
| 152 |
+
|
| 153 |
+
// chart rule
|
| 154 |
+
to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
ChartParser
|
| 160 |
+
::ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells)
|
| 161 |
+
: m_ttask(ttask)
|
| 162 |
+
, m_unknown(ttask)
|
| 163 |
+
, m_decodeGraphList(StaticData::Instance().GetDecodeGraphs())
|
| 164 |
+
, m_source(*(ttask->GetSource().get()))
|
| 165 |
+
{
|
| 166 |
+
const StaticData &staticData = StaticData::Instance();
|
| 167 |
+
|
| 168 |
+
staticData.InitializeForInput(ttask);
|
| 169 |
+
CreateInputPaths(m_source);
|
| 170 |
+
|
| 171 |
+
const std::vector<PhraseDictionary*> &dictionaries = PhraseDictionary::GetColl();
|
| 172 |
+
assert(dictionaries.size() == m_decodeGraphList.size());
|
| 173 |
+
m_ruleLookupManagers.reserve(dictionaries.size());
|
| 174 |
+
for (std::size_t i = 0; i < dictionaries.size(); ++i) {
|
| 175 |
+
const PhraseDictionary *dict = dictionaries[i];
|
| 176 |
+
PhraseDictionary *nonConstDict = const_cast<PhraseDictionary*>(dict);
|
| 177 |
+
std::size_t maxChartSpan = m_decodeGraphList[i]->GetMaxChartSpan();
|
| 178 |
+
ChartRuleLookupManager *lookupMgr = nonConstDict->CreateRuleLookupManager(*this, cells, maxChartSpan);
|
| 179 |
+
m_ruleLookupManagers.push_back(lookupMgr);
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
ChartParser::~ChartParser()
|
| 185 |
+
{
|
| 186 |
+
RemoveAllInColl(m_ruleLookupManagers);
|
| 187 |
+
StaticData::Instance().CleanUpAfterSentenceProcessing(m_ttask.lock());
|
| 188 |
+
|
| 189 |
+
InputPathMatrix::const_iterator iterOuter;
|
| 190 |
+
for (iterOuter = m_inputPathMatrix.begin(); iterOuter != m_inputPathMatrix.end(); ++iterOuter) {
|
| 191 |
+
const std::vector<InputPath*> &outer = *iterOuter;
|
| 192 |
+
|
| 193 |
+
std::vector<InputPath*>::const_iterator iterInner;
|
| 194 |
+
for (iterInner = outer.begin(); iterInner != outer.end(); ++iterInner) {
|
| 195 |
+
InputPath *path = *iterInner;
|
| 196 |
+
delete path;
|
| 197 |
+
}
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
void ChartParser::Create(const Range &range, ChartParserCallback &to)
|
| 202 |
+
{
|
| 203 |
+
assert(m_decodeGraphList.size() == m_ruleLookupManagers.size());
|
| 204 |
+
|
| 205 |
+
std::vector <DecodeGraph*>::const_iterator iterDecodeGraph;
|
| 206 |
+
std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin();
|
| 207 |
+
for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) {
|
| 208 |
+
const DecodeGraph &decodeGraph = **iterDecodeGraph;
|
| 209 |
+
assert(decodeGraph.GetSize() == 1);
|
| 210 |
+
ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers;
|
| 211 |
+
size_t maxSpan = decodeGraph.GetMaxChartSpan();
|
| 212 |
+
size_t last = m_source.GetSize()-1;
|
| 213 |
+
if (maxSpan != 0) {
|
| 214 |
+
last = min(last, range.GetStartPos()+maxSpan);
|
| 215 |
+
}
|
| 216 |
+
if (maxSpan == 0 || range.GetNumWordsCovered() <= maxSpan) {
|
| 217 |
+
const InputPath &inputPath = GetInputPath(range);
|
| 218 |
+
ruleLookupManager.GetChartRuleCollection(inputPath, last, to);
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
if (range.GetNumWordsCovered() == 1
|
| 223 |
+
&& range.GetStartPos() != 0
|
| 224 |
+
&& range.GetStartPos() != m_source.GetSize()-1) {
|
| 225 |
+
bool always = options()->unk.always_create_direct_transopt;
|
| 226 |
+
if (to.Empty() || always) {
|
| 227 |
+
// create unknown words for 1 word coverage where we don't have any trans options
|
| 228 |
+
const Word &sourceWord = m_source.GetWord(range.GetStartPos());
|
| 229 |
+
m_unknown.Process(sourceWord, range, to);
|
| 230 |
+
}
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
void ChartParser::CreateInputPaths(const InputType &input)
|
| 235 |
+
{
|
| 236 |
+
size_t size = input.GetSize();
|
| 237 |
+
m_inputPathMatrix.resize(size);
|
| 238 |
+
|
| 239 |
+
UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
|
| 240 |
+
"Input must be a sentence or a tree, " <<
|
| 241 |
+
"not lattice or confusion networks");
|
| 242 |
+
|
| 243 |
+
TranslationTask const* ttask = m_ttask.lock().get();
|
| 244 |
+
for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
|
| 245 |
+
for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
|
| 246 |
+
size_t endPos = startPos + phaseSize -1;
|
| 247 |
+
vector<InputPath*> &vec = m_inputPathMatrix[startPos];
|
| 248 |
+
|
| 249 |
+
Range range(startPos, endPos);
|
| 250 |
+
Phrase subphrase(input.GetSubString(Range(startPos, endPos)));
|
| 251 |
+
const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
|
| 252 |
+
|
| 253 |
+
InputPath *node;
|
| 254 |
+
if (range.GetNumWordsCovered() == 1) {
|
| 255 |
+
node = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
|
| 256 |
+
vec.push_back(node);
|
| 257 |
+
} else {
|
| 258 |
+
const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
|
| 259 |
+
node = new InputPath(ttask, subphrase, labels, range, &prevNode, NULL);
|
| 260 |
+
vec.push_back(node);
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
//m_inputPathQueue.push_back(node);
|
| 264 |
+
}
|
| 265 |
+
}
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
const InputPath &ChartParser::GetInputPath(const Range &range) const
|
| 269 |
+
{
|
| 270 |
+
return GetInputPath(range.GetStartPos(), range.GetEndPos());
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
|
| 274 |
+
{
|
| 275 |
+
size_t offset = endPos - startPos;
|
| 276 |
+
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
|
| 277 |
+
"Out of bound: " << offset);
|
| 278 |
+
return *m_inputPathMatrix[startPos][offset];
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos)
|
| 282 |
+
{
|
| 283 |
+
size_t offset = endPos - startPos;
|
| 284 |
+
UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
|
| 285 |
+
"Out of bound: " << offset);
|
| 286 |
+
return *m_inputPathMatrix[startPos][offset];
|
| 287 |
+
}
|
| 288 |
+
/*
|
| 289 |
+
const Sentence &ChartParser::GetSentence() const {
|
| 290 |
+
const Sentence &sentence = static_cast<const Sentence&>(m_source);
|
| 291 |
+
return sentence;
|
| 292 |
+
}
|
| 293 |
+
*/
|
| 294 |
+
size_t ChartParser::GetSize() const
|
| 295 |
+
{
|
| 296 |
+
return m_source.GetSize();
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
long ChartParser::GetTranslationId() const
|
| 300 |
+
{
|
| 301 |
+
return m_source.GetTranslationId();
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
AllOptions::ptr const&
|
| 306 |
+
ChartParser::
|
| 307 |
+
options() const
|
| 308 |
+
{
|
| 309 |
+
return m_ttask.lock()->options();
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartParser.h
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- c++ -*-
|
| 2 |
+
// $Id$
|
| 3 |
+
// vim:tabstop=2
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2010 Hieu Hoang
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#pragma once
|
| 24 |
+
|
| 25 |
+
#include <list>
|
| 26 |
+
#include <vector>
|
| 27 |
+
#include "Range.h"
|
| 28 |
+
#include "StackVec.h"
|
| 29 |
+
#include "InputPath.h"
|
| 30 |
+
#include "TargetPhraseCollection.h"
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
class ChartParserCallback;
|
| 35 |
+
class ChartRuleLookupManager;
|
| 36 |
+
class InputType;
|
| 37 |
+
class Sentence;
|
| 38 |
+
class ChartCellCollectionBase;
|
| 39 |
+
class Word;
|
| 40 |
+
class Phrase;
|
| 41 |
+
// class TargetPhraseCollection;
|
| 42 |
+
class DecodeGraph;
|
| 43 |
+
|
| 44 |
+
class ChartParserUnknown
|
| 45 |
+
{
|
| 46 |
+
ttaskwptr m_ttask;
|
| 47 |
+
public:
|
| 48 |
+
ChartParserUnknown(ttasksptr const& ttask);
|
| 49 |
+
~ChartParserUnknown();
|
| 50 |
+
|
| 51 |
+
void Process(const Word &sourceWord, const Range &range, ChartParserCallback &to);
|
| 52 |
+
|
| 53 |
+
const std::vector<Phrase*> &GetUnknownSources() const {
|
| 54 |
+
return m_unksrcs;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
private:
|
| 58 |
+
std::vector<Phrase*> m_unksrcs;
|
| 59 |
+
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
|
| 60 |
+
AllOptions::ptr const& options() const;
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
class ChartParser
|
| 64 |
+
{
|
| 65 |
+
ttaskwptr m_ttask;
|
| 66 |
+
public:
|
| 67 |
+
ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells);
|
| 68 |
+
~ChartParser();
|
| 69 |
+
|
| 70 |
+
void Create(const Range &range, ChartParserCallback &to);
|
| 71 |
+
|
| 72 |
+
//! the sentence being decoded
|
| 73 |
+
//const Sentence &GetSentence() const;
|
| 74 |
+
long GetTranslationId() const;
|
| 75 |
+
size_t GetSize() const;
|
| 76 |
+
const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
|
| 77 |
+
const InputPath &GetInputPath(const Range &range) const;
|
| 78 |
+
const std::vector<Phrase*> &GetUnknownSources() const {
|
| 79 |
+
return m_unknown.GetUnknownSources();
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
AllOptions::ptr const& options() const;
|
| 83 |
+
|
| 84 |
+
private:
|
| 85 |
+
ChartParserUnknown m_unknown;
|
| 86 |
+
std::vector <DecodeGraph*> m_decodeGraphList;
|
| 87 |
+
std::vector<ChartRuleLookupManager*> m_ruleLookupManagers;
|
| 88 |
+
InputType const& m_source; /**< source sentence to be translated */
|
| 89 |
+
|
| 90 |
+
typedef std::vector< std::vector<InputPath*> > InputPathMatrix;
|
| 91 |
+
InputPathMatrix m_inputPathMatrix;
|
| 92 |
+
|
| 93 |
+
void CreateInputPaths(const InputType &input);
|
| 94 |
+
InputPath &GetInputPath(size_t startPos, size_t endPos);
|
| 95 |
+
|
| 96 |
+
};
|
| 97 |
+
|
| 98 |
+
}
|
| 99 |
+
|
mosesdecoder/moses/ChartParserCallback.h
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "StackVec.h"
|
| 4 |
+
|
| 5 |
+
#include <list>
|
| 6 |
+
#include "TargetPhraseCollection.h"
|
| 7 |
+
|
| 8 |
+
namespace Moses
|
| 9 |
+
{
|
| 10 |
+
|
| 11 |
+
class TargetPhraseCollection;
|
| 12 |
+
class Range;
|
| 13 |
+
class TargetPhrase;
|
| 14 |
+
class InputPath;
|
| 15 |
+
class InputType;
|
| 16 |
+
class ChartCellLabel;
|
| 17 |
+
|
| 18 |
+
class ChartParserCallback
|
| 19 |
+
{
|
| 20 |
+
public:
|
| 21 |
+
virtual ~ChartParserCallback() {}
|
| 22 |
+
|
| 23 |
+
virtual void Add(const TargetPhraseCollection &, const StackVec &, const Range &) = 0;
|
| 24 |
+
|
| 25 |
+
virtual bool Empty() const = 0;
|
| 26 |
+
|
| 27 |
+
virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const Range &range) = 0;
|
| 28 |
+
|
| 29 |
+
virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;
|
| 30 |
+
|
| 31 |
+
virtual float GetBestScore(const ChartCellLabel *chartCell) const = 0;
|
| 32 |
+
|
| 33 |
+
};
|
| 34 |
+
|
| 35 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartRuleLookupManager.cpp
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "ChartRuleLookupManager.h"
|
| 2 |
+
#include "ChartParser.h"
|
| 3 |
+
|
| 4 |
+
namespace Moses
|
| 5 |
+
{
|
| 6 |
+
ChartRuleLookupManager::~ChartRuleLookupManager()
|
| 7 |
+
{}
|
| 8 |
+
} // namespace Moses
|
| 9 |
+
|
mosesdecoder/moses/ChartRuleLookupManager.h
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
#ifndef moses_ChartRuleLookupManager_h
|
| 22 |
+
#define moses_ChartRuleLookupManager_h
|
| 23 |
+
|
| 24 |
+
#include "ChartCellCollection.h"
|
| 25 |
+
#include "InputType.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
class ChartParser;
|
| 30 |
+
class ChartParserCallback;
|
| 31 |
+
class Range;
|
| 32 |
+
class Sentence;
|
| 33 |
+
|
| 34 |
+
/** Defines an interface for looking up rules in a rule table. Concrete
|
| 35 |
+
* implementation classes should correspond to specific PhraseDictionary
|
| 36 |
+
* subclasses (memory or on-disk). Since a ChartRuleLookupManager object
|
| 37 |
+
* maintains sentence-specific state, exactly one should be created for
|
| 38 |
+
* each sentence that is to be decoded.
|
| 39 |
+
*/
|
| 40 |
+
class ChartRuleLookupManager
|
| 41 |
+
{
|
| 42 |
+
public:
|
| 43 |
+
ChartRuleLookupManager(const ChartParser &parser,
|
| 44 |
+
const ChartCellCollectionBase &cellColl)
|
| 45 |
+
: m_parser(parser)
|
| 46 |
+
, m_cellCollection(cellColl) {}
|
| 47 |
+
|
| 48 |
+
virtual ~ChartRuleLookupManager();
|
| 49 |
+
|
| 50 |
+
const ChartCellLabelSet &GetTargetLabelSet(size_t begin, size_t end) const {
|
| 51 |
+
return m_cellCollection.GetBase(Range(begin, end)).GetTargetLabelSet();
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
const ChartParser &GetParser() const {
|
| 55 |
+
return m_parser;
|
| 56 |
+
}
|
| 57 |
+
//const Sentence &GetSentence() const;
|
| 58 |
+
|
| 59 |
+
const ChartCellLabel &GetSourceAt(size_t at) const {
|
| 60 |
+
return m_cellCollection.GetSourceWordLabel(at);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/** abstract function. Return a vector of translation options for given a range in the input sentence
|
| 64 |
+
* \param range source range for which you want the translation options
|
| 65 |
+
* \param outColl return argument
|
| 66 |
+
*/
|
| 67 |
+
virtual void GetChartRuleCollection(
|
| 68 |
+
const InputPath &inputPath,
|
| 69 |
+
size_t lastPos, // last position to consider if using lookahead
|
| 70 |
+
ChartParserCallback &outColl) = 0;
|
| 71 |
+
|
| 72 |
+
private:
|
| 73 |
+
//! Non-copyable: copy constructor and assignment operator not implemented.
|
| 74 |
+
ChartRuleLookupManager(const ChartRuleLookupManager &);
|
| 75 |
+
//! Non-copyable: copy constructor and assignment operator not implemented.
|
| 76 |
+
ChartRuleLookupManager &operator=(const ChartRuleLookupManager &);
|
| 77 |
+
|
| 78 |
+
const ChartParser &m_parser;
|
| 79 |
+
const ChartCellCollectionBase &m_cellCollection;
|
| 80 |
+
};
|
| 81 |
+
|
| 82 |
+
} // namespace Moses
|
| 83 |
+
|
| 84 |
+
#endif
|
mosesdecoder/moses/ChartTranslationOption.cpp
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "ChartTranslationOptions.h"
|
| 2 |
+
#include "InputType.h"
|
| 3 |
+
#include "InputPath.h"
|
| 4 |
+
|
| 5 |
+
namespace Moses
|
| 6 |
+
{
|
| 7 |
+
ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
|
| 8 |
+
:m_targetPhrase(targetPhrase)
|
| 9 |
+
,m_scoreBreakdown(targetPhrase.GetScoreBreakdown())
|
| 10 |
+
{
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
|
| 14 |
+
const InputPath &inputPath,
|
| 15 |
+
const StackVec &stackVec)
|
| 16 |
+
{
|
| 17 |
+
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
| 18 |
+
|
| 19 |
+
for (size_t i = 0; i < ffs.size(); ++i) {
|
| 20 |
+
const FeatureFunction &ff = *ffs[i];
|
| 21 |
+
ff.EvaluateWithSourceContext(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown);
|
| 22 |
+
}
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
std::ostream& operator<<(std::ostream &out, const ChartTranslationOption &transOpt)
|
| 27 |
+
{
|
| 28 |
+
out << transOpt.m_targetPhrase << " " << transOpt.m_scoreBreakdown;
|
| 29 |
+
return out;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
}
|
| 33 |
+
|
mosesdecoder/moses/ChartTranslationOption.h
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "ScoreComponentCollection.h"
|
| 4 |
+
|
| 5 |
+
namespace Moses
|
| 6 |
+
{
|
| 7 |
+
class TargetPhrase;
|
| 8 |
+
class InputPath;
|
| 9 |
+
class InputType;
|
| 10 |
+
class StackVec;
|
| 11 |
+
|
| 12 |
+
class ChartTranslationOption
|
| 13 |
+
{
|
| 14 |
+
friend std::ostream& operator<<(std::ostream&, const ChartTranslationOption&);
|
| 15 |
+
|
| 16 |
+
protected:
|
| 17 |
+
const TargetPhrase &m_targetPhrase;
|
| 18 |
+
ScoreComponentCollection m_scoreBreakdown;
|
| 19 |
+
const InputPath *m_inputPath;
|
| 20 |
+
const std::vector<const Word*> *m_ruleSourceFromInputPath; // used by placeholders
|
| 21 |
+
|
| 22 |
+
public:
|
| 23 |
+
ChartTranslationOption(const TargetPhrase &targetPhrase);
|
| 24 |
+
|
| 25 |
+
const TargetPhrase &GetPhrase() const {
|
| 26 |
+
return m_targetPhrase;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
const InputPath *GetInputPath() const {
|
| 30 |
+
return m_inputPath;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
void SetInputPath(const InputPath *inputPath) {
|
| 34 |
+
m_inputPath = inputPath;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
const std::vector<const Word*> *GetSourceRuleFromInputPath() const {
|
| 38 |
+
return m_ruleSourceFromInputPath;
|
| 39 |
+
}
|
| 40 |
+
void SetSourceRuleFromInputPath(const std::vector<const Word*> *obj) {
|
| 41 |
+
m_ruleSourceFromInputPath = obj;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
const ScoreComponentCollection &GetScores() const {
|
| 45 |
+
return m_scoreBreakdown;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
void EvaluateWithSourceContext(const InputType &input,
|
| 49 |
+
const InputPath &inputPath,
|
| 50 |
+
const StackVec &stackVec);
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
|
mosesdecoder/moses/ChartTranslationOptionList.cpp
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 Hieu Hoang
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include <algorithm>
|
| 21 |
+
#include <iostream>
|
| 22 |
+
#include <vector>
|
| 23 |
+
#include "StaticData.h"
|
| 24 |
+
#include "ChartTranslationOptionList.h"
|
| 25 |
+
#include "ChartTranslationOptions.h"
|
| 26 |
+
#include "ChartCellCollection.h"
|
| 27 |
+
#include "Range.h"
|
| 28 |
+
#include "InputType.h"
|
| 29 |
+
#include "InputPath.h"
|
| 30 |
+
|
| 31 |
+
using namespace std;
|
| 32 |
+
|
| 33 |
+
namespace Moses
|
| 34 |
+
{
|
| 35 |
+
|
| 36 |
+
ChartTranslationOptionList::
|
| 37 |
+
ChartTranslationOptionList(size_t ruleLimit, const InputType &input)
|
| 38 |
+
: m_size(0)
|
| 39 |
+
, m_ruleLimit(ruleLimit)
|
| 40 |
+
{
|
| 41 |
+
m_scoreThreshold = std::numeric_limits<float>::infinity();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
ChartTranslationOptionList::~ChartTranslationOptionList()
|
| 45 |
+
{
|
| 46 |
+
RemoveAllInColl(m_collection);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
void ChartTranslationOptionList::Clear()
|
| 50 |
+
{
|
| 51 |
+
m_size = 0;
|
| 52 |
+
m_scoreThreshold = std::numeric_limits<float>::infinity();
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
class ChartTranslationOptionOrderer
|
| 56 |
+
{
|
| 57 |
+
public:
|
| 58 |
+
bool operator()(const ChartTranslationOptions* itemA, const ChartTranslationOptions* itemB) const {
|
| 59 |
+
return itemA->GetEstimateOfBestScore() > itemB->GetEstimateOfBestScore();
|
| 60 |
+
}
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
|
| 64 |
+
const StackVec &stackVec,
|
| 65 |
+
const Range &range)
|
| 66 |
+
{
|
| 67 |
+
if (tpc.IsEmpty()) {
|
| 68 |
+
return;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
for (size_t i = 0; i < stackVec.size(); ++i) {
|
| 72 |
+
const ChartCellLabel &chartCellLabel = *stackVec[i];
|
| 73 |
+
size_t numHypos = chartCellLabel.GetStack().cube->size();
|
| 74 |
+
if (numHypos == 0) {
|
| 75 |
+
return; // empty stack. These rules can't be used
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
const TargetPhrase &targetPhrase = **(tpc.begin());
|
| 80 |
+
float score = targetPhrase.GetFutureScore();
|
| 81 |
+
for (StackVec::const_iterator p = stackVec.begin(); p != stackVec.end(); ++p) {
|
| 82 |
+
score += (*p)->GetBestScore(this);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
// If the rule limit has already been reached then don't add the option
|
| 86 |
+
// unless it is better than at least one existing option.
|
| 87 |
+
if (m_ruleLimit && m_size > m_ruleLimit && score < m_scoreThreshold) {
|
| 88 |
+
return;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
// Add the option to the list.
|
| 92 |
+
if (m_size == m_collection.size()) {
|
| 93 |
+
// m_collection has reached capacity: create a new object.
|
| 94 |
+
m_collection.push_back(new ChartTranslationOptions(tpc, stackVec,
|
| 95 |
+
range, score));
|
| 96 |
+
} else {
|
| 97 |
+
// Overwrite an unused object.
|
| 98 |
+
*(m_collection[m_size]) = ChartTranslationOptions(tpc, stackVec,
|
| 99 |
+
range, score);
|
| 100 |
+
}
|
| 101 |
+
++m_size;
|
| 102 |
+
|
| 103 |
+
// If the rule limit hasn't been exceeded then update the threshold.
|
| 104 |
+
if (!m_ruleLimit || m_size <= m_ruleLimit) {
|
| 105 |
+
m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// Prune if bursting
|
| 109 |
+
if (m_ruleLimit && m_size == m_ruleLimit * 2) {
|
| 110 |
+
NTH_ELEMENT4(m_collection.begin(),
|
| 111 |
+
m_collection.begin() + m_ruleLimit - 1,
|
| 112 |
+
m_collection.begin() + m_size,
|
| 113 |
+
ChartTranslationOptionOrderer());
|
| 114 |
+
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetEstimateOfBestScore();
|
| 115 |
+
m_size = m_ruleLimit;
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
void
|
| 120 |
+
ChartTranslationOptionList::
|
| 121 |
+
AddPhraseOOV(TargetPhrase &phrase,
|
| 122 |
+
std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
|
| 123 |
+
const Range &range)
|
| 124 |
+
{
|
| 125 |
+
TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
|
| 126 |
+
tpc->Add(&phrase);
|
| 127 |
+
waste_memory.push_back(tpc);
|
| 128 |
+
StackVec empty;
|
| 129 |
+
Add(*tpc, empty, range);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
void ChartTranslationOptionList::ApplyThreshold(float const threshold)
|
| 133 |
+
{
|
| 134 |
+
if (m_ruleLimit && m_size > m_ruleLimit) {
|
| 135 |
+
// Something's gone wrong if the list has grown to m_ruleLimit * 2
|
| 136 |
+
// without being pruned.
|
| 137 |
+
assert(m_size < m_ruleLimit * 2);
|
| 138 |
+
// Reduce the list to the best m_ruleLimit options. The remaining
|
| 139 |
+
// options can be overwritten on subsequent calls to Add().
|
| 140 |
+
NTH_ELEMENT4(m_collection.begin(),
|
| 141 |
+
m_collection.begin()+m_ruleLimit,
|
| 142 |
+
m_collection.begin()+m_size,
|
| 143 |
+
ChartTranslationOptionOrderer());
|
| 144 |
+
m_size = m_ruleLimit;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
// keep only those over best + threshold
|
| 148 |
+
|
| 149 |
+
float scoreThreshold = -std::numeric_limits<float>::infinity();
|
| 150 |
+
|
| 151 |
+
CollType::const_iterator iter;
|
| 152 |
+
for (iter = m_collection.begin(); iter != m_collection.begin()+m_size; ++iter) {
|
| 153 |
+
const ChartTranslationOptions *transOpt = *iter;
|
| 154 |
+
float score = transOpt->GetEstimateOfBestScore();
|
| 155 |
+
scoreThreshold = (score > scoreThreshold) ? score : scoreThreshold;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
scoreThreshold += threshold; // StaticData::Instance().GetTranslationOptionThreshold();
|
| 159 |
+
|
| 160 |
+
CollType::iterator bound = std::partition(m_collection.begin(),
|
| 161 |
+
m_collection.begin()+m_size,
|
| 162 |
+
ScoreThresholdPred(scoreThreshold));
|
| 163 |
+
|
| 164 |
+
m_size = std::distance(m_collection.begin(), bound);
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell) const
|
| 168 |
+
{
|
| 169 |
+
const HypoList *stack = chartCell->GetStack().cube;
|
| 170 |
+
assert(stack);
|
| 171 |
+
assert(!stack->empty());
|
| 172 |
+
const ChartHypothesis &bestHypo = **(stack->begin());
|
| 173 |
+
return bestHypo.GetFutureScore();
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
|
| 177 |
+
{
|
| 178 |
+
// NEVER iterate over ALL of the collection. Just over the first m_size
|
| 179 |
+
CollType::iterator iter;
|
| 180 |
+
for (iter = m_collection.begin(); iter != m_collection.begin() + m_size; ++iter) {
|
| 181 |
+
ChartTranslationOptions &transOpts = **iter;
|
| 182 |
+
transOpts.EvaluateWithSourceContext(input, inputPath);
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// get rid of empty trans opts
|
| 186 |
+
size_t numDiscard = 0;
|
| 187 |
+
for (size_t i = 0; i < m_size; ++i) {
|
| 188 |
+
ChartTranslationOptions *transOpts = m_collection[i];
|
| 189 |
+
if (transOpts->GetSize() == 0) {
|
| 190 |
+
//delete transOpts;
|
| 191 |
+
++numDiscard;
|
| 192 |
+
} else if (numDiscard) {
|
| 193 |
+
SwapTranslationOptions(i - numDiscard, i);
|
| 194 |
+
//m_collection[] = transOpts;
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
size_t newSize = m_size - numDiscard;
|
| 199 |
+
m_size = newSize;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
void ChartTranslationOptionList::SwapTranslationOptions(size_t a, size_t b)
|
| 203 |
+
{
|
| 204 |
+
ChartTranslationOptions *transOptsA = m_collection[a];
|
| 205 |
+
ChartTranslationOptions *transOptsB = m_collection[b];
|
| 206 |
+
m_collection[a] = transOptsB;
|
| 207 |
+
m_collection[b] = transOptsA;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptionList &obj)
|
| 211 |
+
{
|
| 212 |
+
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
| 213 |
+
const ChartTranslationOptions &transOpts = *obj.m_collection[i];
|
| 214 |
+
out << transOpts << endl;
|
| 215 |
+
}
|
| 216 |
+
return out;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
}
|
mosesdecoder/moses/ChartTranslationOptionList.h
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2006 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "ChartTranslationOptions.h"
|
| 23 |
+
#include "ChartParserCallback.h"
|
| 24 |
+
#include "StackVec.h"
|
| 25 |
+
|
| 26 |
+
#include <vector>
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
class TargetPhraseCollection;
|
| 32 |
+
class Range;
|
| 33 |
+
class InputType;
|
| 34 |
+
class InputPath;
|
| 35 |
+
class ChartCellLabel;
|
| 36 |
+
|
| 37 |
+
//! a vector of translations options for a specific range, in a specific sentence
|
| 38 |
+
class ChartTranslationOptionList : public ChartParserCallback
|
| 39 |
+
{
|
| 40 |
+
friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptionList&);
|
| 41 |
+
|
| 42 |
+
public:
|
| 43 |
+
ChartTranslationOptionList(size_t ruleLimit, const InputType &input);
|
| 44 |
+
~ChartTranslationOptionList();
|
| 45 |
+
|
| 46 |
+
const ChartTranslationOptions &Get(size_t i) const {
|
| 47 |
+
return *m_collection[i];
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
//! number of translation options
|
| 51 |
+
size_t GetSize() const {
|
| 52 |
+
return m_size;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void Add(const TargetPhraseCollection &, const StackVec &,
|
| 56 |
+
const Range &);
|
| 57 |
+
|
| 58 |
+
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const Range &range);
|
| 59 |
+
|
| 60 |
+
bool Empty() const {
|
| 61 |
+
return m_size == 0;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
float GetBestScore(const ChartCellLabel *chartCell) const;
|
| 65 |
+
|
| 66 |
+
void Clear();
|
| 67 |
+
void ApplyThreshold(float threshold);
|
| 68 |
+
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
|
| 69 |
+
|
| 70 |
+
private:
|
| 71 |
+
typedef std::vector<ChartTranslationOptions*> CollType;
|
| 72 |
+
|
| 73 |
+
struct ScoreThresholdPred {
|
| 74 |
+
ScoreThresholdPred(float threshold) : m_thresholdScore(threshold) {}
|
| 75 |
+
bool operator()(const ChartTranslationOptions *option) {
|
| 76 |
+
return option->GetEstimateOfBestScore() >= m_thresholdScore;
|
| 77 |
+
}
|
| 78 |
+
float m_thresholdScore;
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
void SwapTranslationOptions(size_t a, size_t b);
|
| 82 |
+
|
| 83 |
+
CollType m_collection;
|
| 84 |
+
size_t m_size;
|
| 85 |
+
float m_scoreThreshold;
|
| 86 |
+
const size_t m_ruleLimit;
|
| 87 |
+
|
| 88 |
+
};
|
| 89 |
+
|
| 90 |
+
}
|
mosesdecoder/moses/ChartTranslationOptions.cpp
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 Hieu Hoang
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "ChartTranslationOptions.h"
|
| 21 |
+
#include "ChartHypothesis.h"
|
| 22 |
+
#include "ChartCellLabel.h"
|
| 23 |
+
#include "ChartTranslationOption.h"
|
| 24 |
+
#include "InputPath.h"
|
| 25 |
+
#include "StaticData.h"
|
| 26 |
+
#include "TranslationTask.h"
|
| 27 |
+
|
| 28 |
+
using namespace std;
|
| 29 |
+
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
ChartTranslationOptions::ChartTranslationOptions(const TargetPhraseCollection &targetPhraseColl,
|
| 34 |
+
const StackVec &stackVec,
|
| 35 |
+
const Range &range,
|
| 36 |
+
float score)
|
| 37 |
+
: m_stackVec(stackVec)
|
| 38 |
+
, m_wordsRange(&range)
|
| 39 |
+
, m_estimateOfBestScore(score)
|
| 40 |
+
{
|
| 41 |
+
TargetPhraseCollection::const_iterator iter;
|
| 42 |
+
for (iter = targetPhraseColl.begin(); iter != targetPhraseColl.end(); ++iter) {
|
| 43 |
+
const TargetPhrase *origTP = *iter;
|
| 44 |
+
|
| 45 |
+
boost::shared_ptr<ChartTranslationOption> ptr(new ChartTranslationOption(*origTP));
|
| 46 |
+
m_collection.push_back(ptr);
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
ChartTranslationOptions::~ChartTranslationOptions()
|
| 51 |
+
{
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
//! functor to compare (chart) hypotheses by (descending) score
|
| 56 |
+
class ChartTranslationOptionScoreOrderer
|
| 57 |
+
{
|
| 58 |
+
public:
|
| 59 |
+
bool operator()(const boost::shared_ptr<ChartTranslationOption> &transOptA
|
| 60 |
+
, const boost::shared_ptr<ChartTranslationOption> &transOptB) const {
|
| 61 |
+
const ScoreComponentCollection &scoresA = transOptA->GetScores();
|
| 62 |
+
const ScoreComponentCollection &scoresB = transOptB->GetScores();
|
| 63 |
+
return scoresA.GetWeightedScore() > scoresB.GetWeightedScore();
|
| 64 |
+
}
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
|
| 68 |
+
{
|
| 69 |
+
SetInputPath(&inputPath);
|
| 70 |
+
// if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
|
| 71 |
+
if (inputPath.ttask->options()->input.placeholder_factor != NOT_FOUND) {
|
| 72 |
+
CreateSourceRuleFromInputPath();
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
CollType::iterator iter;
|
| 76 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 77 |
+
ChartTranslationOption &transOpt = **iter;
|
| 78 |
+
transOpt.SetInputPath(&inputPath);
|
| 79 |
+
transOpt.EvaluateWithSourceContext(input, inputPath, m_stackVec);
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// get rid of -inf trans opts
|
| 83 |
+
size_t numDiscard = 0;
|
| 84 |
+
for (size_t i = 0; i < m_collection.size(); ++i) {
|
| 85 |
+
ChartTranslationOption *transOpt = m_collection[i].get();
|
| 86 |
+
|
| 87 |
+
if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
|
| 88 |
+
++numDiscard;
|
| 89 |
+
} else if (numDiscard) {
|
| 90 |
+
m_collection[i - numDiscard] = m_collection[i];
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
size_t newSize = m_collection.size() - numDiscard;
|
| 95 |
+
m_collection.resize(newSize);
|
| 96 |
+
|
| 97 |
+
// sort if necessary
|
| 98 |
+
const StaticData &staticData = StaticData::Instance();
|
| 99 |
+
if (staticData.RequireSortingAfterSourceContext()) {
|
| 100 |
+
std::sort(m_collection.begin()
|
| 101 |
+
, m_collection.begin() + newSize
|
| 102 |
+
, ChartTranslationOptionScoreOrderer());
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
void ChartTranslationOptions::SetInputPath(const InputPath *inputPath)
|
| 108 |
+
{
|
| 109 |
+
CollType::iterator iter;
|
| 110 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 111 |
+
ChartTranslationOption &transOpt = **iter;
|
| 112 |
+
transOpt.SetInputPath(inputPath);
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
void ChartTranslationOptions::CreateSourceRuleFromInputPath()
|
| 117 |
+
{
|
| 118 |
+
if (m_collection.size() == 0) {
|
| 119 |
+
return;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
const InputPath *inputPath = m_collection.front()->GetInputPath();
|
| 123 |
+
assert(inputPath);
|
| 124 |
+
std::vector<const Word*> &ruleSourceFromInputPath = inputPath->AddRuleSourceFromInputPath();
|
| 125 |
+
|
| 126 |
+
size_t chartCellIndex = 0;
|
| 127 |
+
const ChartCellLabel *chartCellLabel = (chartCellIndex < m_stackVec.size()) ? m_stackVec[chartCellIndex] : NULL;
|
| 128 |
+
|
| 129 |
+
size_t ind = 0;
|
| 130 |
+
for (size_t sourcePos = m_wordsRange->GetStartPos(); sourcePos <= m_wordsRange->GetEndPos(); ++sourcePos, ++ind) {
|
| 131 |
+
if (chartCellLabel) {
|
| 132 |
+
if (sourcePos == chartCellLabel->GetCoverage().GetEndPos()) {
|
| 133 |
+
// end of child range. push an empty word to denote non-term
|
| 134 |
+
ruleSourceFromInputPath.push_back(NULL);
|
| 135 |
+
++chartCellIndex;
|
| 136 |
+
chartCellLabel = (chartCellIndex < m_stackVec.size()) ? m_stackVec[chartCellIndex] : NULL;
|
| 137 |
+
} else if (sourcePos >= chartCellLabel->GetCoverage().GetStartPos()) {
|
| 138 |
+
// in the range of child hypo. do nothing
|
| 139 |
+
} else {
|
| 140 |
+
// not yet reached child range. add word
|
| 141 |
+
ruleSourceFromInputPath.push_back(&inputPath->GetPhrase().GetWord(ind));
|
| 142 |
+
}
|
| 143 |
+
} else {
|
| 144 |
+
// no child in sight. add word
|
| 145 |
+
ruleSourceFromInputPath.push_back(&inputPath->GetPhrase().GetWord(ind));
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// save it to each trans opt
|
| 150 |
+
CollType::iterator iter;
|
| 151 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 152 |
+
ChartTranslationOption &transOpt = **iter;
|
| 153 |
+
transOpt.SetSourceRuleFromInputPath(&ruleSourceFromInputPath);
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
|
| 159 |
+
{
|
| 160 |
+
for (size_t i = 0; i < obj.m_collection.size(); ++i) {
|
| 161 |
+
const ChartTranslationOption &transOpt = *obj.m_collection[i];
|
| 162 |
+
out << transOpt << endl;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
return out;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
}
|
mosesdecoder/moses/ChartTranslationOptions.h
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 Hieu Hoang
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "StackVec.h"
|
| 23 |
+
#include "TargetPhrase.h"
|
| 24 |
+
#include "TargetPhraseCollection.h"
|
| 25 |
+
#include "Range.h"
|
| 26 |
+
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include <boost/shared_ptr.hpp>
|
| 29 |
+
#include "ChartTranslationOption.h"
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
class ChartTranslationOption;
|
| 34 |
+
class InputPath;
|
| 35 |
+
class InputType;
|
| 36 |
+
|
| 37 |
+
/** Similar to a DottedRule, but contains a direct reference to a list
|
| 38 |
+
* of translations and provdes an estimate of the best score. For a specific range in the input sentence
|
| 39 |
+
*/
|
| 40 |
+
class ChartTranslationOptions
|
| 41 |
+
{
|
| 42 |
+
friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptions&);
|
| 43 |
+
|
| 44 |
+
public:
|
| 45 |
+
typedef std::vector<boost::shared_ptr<ChartTranslationOption> > CollType;
|
| 46 |
+
|
| 47 |
+
/** Constructor
|
| 48 |
+
\param targetPhraseColl @todo dunno
|
| 49 |
+
\param stackVec @todo dunno
|
| 50 |
+
\param range the range in the source sentence this translation option covers
|
| 51 |
+
\param score @todo dunno
|
| 52 |
+
*/
|
| 53 |
+
ChartTranslationOptions(const TargetPhraseCollection &targetPhraseColl,
|
| 54 |
+
const StackVec &stackVec,
|
| 55 |
+
const Range &range,
|
| 56 |
+
float score);
|
| 57 |
+
~ChartTranslationOptions();
|
| 58 |
+
|
| 59 |
+
static float CalcEstimateOfBestScore(const TargetPhraseCollection &,
|
| 60 |
+
const StackVec &);
|
| 61 |
+
|
| 62 |
+
size_t GetSize() const {
|
| 63 |
+
return m_collection.size();
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
//! @todo dunno
|
| 67 |
+
const StackVec &GetStackVec() const {
|
| 68 |
+
return m_stackVec;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
//! @todo isn't the translation suppose to just contain 1 target phrase, not a whole collection of them?
|
| 72 |
+
const CollType &GetTargetPhrases() const {
|
| 73 |
+
return m_collection;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
//! the range in the source sentence this translation option covers
|
| 77 |
+
const Range &GetSourceWordsRange() const {
|
| 78 |
+
return *m_wordsRange;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
/** return an estimate of the best score possible with this translation option.
|
| 82 |
+
* the estimate is the sum of the top target phrase's estimated score plus the
|
| 83 |
+
* scores of the best child hypotheses.
|
| 84 |
+
*/
|
| 85 |
+
inline float GetEstimateOfBestScore() const {
|
| 86 |
+
return m_estimateOfBestScore;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
|
| 90 |
+
|
| 91 |
+
void SetInputPath(const InputPath *inputPath);
|
| 92 |
+
|
| 93 |
+
void CreateSourceRuleFromInputPath();
|
| 94 |
+
|
| 95 |
+
private:
|
| 96 |
+
|
| 97 |
+
StackVec m_stackVec; //! vector of hypothesis list!
|
| 98 |
+
CollType m_collection;
|
| 99 |
+
|
| 100 |
+
const Range *m_wordsRange;
|
| 101 |
+
float m_estimateOfBestScore;
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
}
|
mosesdecoder/moses/ConfusionNet.cpp
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
| 2 |
+
// $Id$
|
| 3 |
+
|
| 4 |
+
#include "ConfusionNet.h"
|
| 5 |
+
#include <sstream>
|
| 6 |
+
|
| 7 |
+
#include "FactorCollection.h"
|
| 8 |
+
#include "Util.h"
|
| 9 |
+
#include "TranslationOptionCollectionConfusionNet.h"
|
| 10 |
+
#include "StaticData.h"
|
| 11 |
+
#include "Sentence.h"
|
| 12 |
+
#include "moses/FF/InputFeature.h"
|
| 13 |
+
#include "util/exception.hh"
|
| 14 |
+
#include "moses/TranslationTask.h"
|
| 15 |
+
namespace Moses
|
| 16 |
+
{
|
| 17 |
+
struct CNStats {
|
| 18 |
+
size_t created,destr,read,colls,words;
|
| 19 |
+
|
| 20 |
+
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
|
| 21 |
+
~CNStats() {
|
| 22 |
+
print(std::cerr);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
void createOne() {
|
| 26 |
+
++created;
|
| 27 |
+
}
|
| 28 |
+
void destroyOne() {
|
| 29 |
+
++destr;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
void collect(const ConfusionNet& cn) {
|
| 33 |
+
++read;
|
| 34 |
+
colls+=cn.GetSize();
|
| 35 |
+
for(size_t i=0; i<cn.GetSize(); ++i)
|
| 36 |
+
words+=cn[i].size();
|
| 37 |
+
}
|
| 38 |
+
void print(std::ostream& out) const {
|
| 39 |
+
if(created>0) {
|
| 40 |
+
out<<"confusion net statistics:\n"
|
| 41 |
+
" created:\t"<<created<<"\n"
|
| 42 |
+
" destroyed:\t"<<destr<<"\n"
|
| 43 |
+
" succ. read:\t"<<read<<"\n"
|
| 44 |
+
" columns:\t"<<colls<<"\n"
|
| 45 |
+
" words:\t"<<words<<"\n"
|
| 46 |
+
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
|
| 47 |
+
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
|
| 48 |
+
"\n\n";
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
CNStats stats;
|
| 54 |
+
|
| 55 |
+
size_t
|
| 56 |
+
ConfusionNet::
|
| 57 |
+
GetColumnIncrement(size_t i, size_t j) const
|
| 58 |
+
{
|
| 59 |
+
(void) i;
|
| 60 |
+
(void) j;
|
| 61 |
+
return 1;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
ConfusionNet::
|
| 65 |
+
ConfusionNet(AllOptions::ptr const& opts) : InputType(opts)
|
| 66 |
+
{
|
| 67 |
+
stats.createOne();
|
| 68 |
+
|
| 69 |
+
if (is_syntax(opts->search.algo)) {
|
| 70 |
+
m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
|
| 71 |
+
}
|
| 72 |
+
UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified");
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
ConfusionNet::
|
| 76 |
+
~ConfusionNet()
|
| 77 |
+
{
|
| 78 |
+
stats.destroyOne();
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
ConfusionNet::
|
| 82 |
+
ConfusionNet(Sentence const& s) : InputType(s.options())
|
| 83 |
+
{
|
| 84 |
+
data.resize(s.GetSize());
|
| 85 |
+
for(size_t i=0; i<s.GetSize(); ++i) {
|
| 86 |
+
ScorePair scorePair;
|
| 87 |
+
std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
|
| 88 |
+
data[i].push_back(temp);
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
bool
|
| 93 |
+
ConfusionNet::
|
| 94 |
+
ReadF(std::istream& in, int format)
|
| 95 |
+
{
|
| 96 |
+
VERBOSE(2, "read confusion net with format "<<format<<"\n");
|
| 97 |
+
switch(format) {
|
| 98 |
+
case 0:
|
| 99 |
+
return ReadFormat0(in);
|
| 100 |
+
case 1:
|
| 101 |
+
return ReadFormat1(in);
|
| 102 |
+
default:
|
| 103 |
+
std::cerr << "ERROR: unknown format '"<<format
|
| 104 |
+
<<"' in ConfusionNet::Read";
|
| 105 |
+
}
|
| 106 |
+
return false;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
int
|
| 110 |
+
ConfusionNet::
|
| 111 |
+
Read(std::istream& in)
|
| 112 |
+
{
|
| 113 |
+
int rv=ReadF(in,0);
|
| 114 |
+
if(rv) stats.collect(*this);
|
| 115 |
+
return rv;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
bool
|
| 119 |
+
ConfusionNet::
|
| 120 |
+
ReadFormat0(std::istream& in)
|
| 121 |
+
{
|
| 122 |
+
Clear();
|
| 123 |
+
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
| 124 |
+
|
| 125 |
+
const InputFeature *inputFeature = InputFeature::InstancePtr();
|
| 126 |
+
size_t numInputScores = inputFeature->GetNumInputScores();
|
| 127 |
+
size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
|
| 128 |
+
|
| 129 |
+
size_t totalCount = numInputScores + numRealWordCount;
|
| 130 |
+
bool addRealWordCount = (numRealWordCount > 0);
|
| 131 |
+
|
| 132 |
+
std::string line;
|
| 133 |
+
while(getline(in,line)) {
|
| 134 |
+
std::istringstream is(line);
|
| 135 |
+
std::string word;
|
| 136 |
+
|
| 137 |
+
Column col;
|
| 138 |
+
while(is>>word) {
|
| 139 |
+
Word w;
|
| 140 |
+
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
| 141 |
+
std::vector<float> probs(totalCount, 0.0);
|
| 142 |
+
for(size_t i=0; i < numInputScores; i++) {
|
| 143 |
+
double prob;
|
| 144 |
+
if (!(is>>prob)) {
|
| 145 |
+
TRACE_ERR("ERROR: unable to parse CN input - bad link probability, "
|
| 146 |
+
<< "or wrong number of scores\n");
|
| 147 |
+
return false;
|
| 148 |
+
}
|
| 149 |
+
if(prob<0.0) {
|
| 150 |
+
VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
|
| 151 |
+
prob=0.0;
|
| 152 |
+
} else if (prob>1.0) {
|
| 153 |
+
VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
|
| 154 |
+
prob=1.0;
|
| 155 |
+
}
|
| 156 |
+
probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
|
| 157 |
+
|
| 158 |
+
}
|
| 159 |
+
// store 'real' word count in last feature if we have one more
|
| 160 |
+
// weight than we do arc scores and not epsilon
|
| 161 |
+
if (addRealWordCount && word!=EPSILON && word!="")
|
| 162 |
+
probs.back() = -1.0;
|
| 163 |
+
|
| 164 |
+
ScorePair scorePair(probs);
|
| 165 |
+
|
| 166 |
+
col.push_back(std::make_pair(w,scorePair));
|
| 167 |
+
}
|
| 168 |
+
if(col.size()) {
|
| 169 |
+
data.push_back(col);
|
| 170 |
+
ShrinkToFit(data.back());
|
| 171 |
+
} else break;
|
| 172 |
+
}
|
| 173 |
+
return !data.empty();
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
bool
|
| 177 |
+
ConfusionNet::
|
| 178 |
+
ReadFormat1(std::istream& in)
|
| 179 |
+
{
|
| 180 |
+
Clear();
|
| 181 |
+
const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
|
| 182 |
+
std::string line;
|
| 183 |
+
if(!getline(in,line)) return 0;
|
| 184 |
+
size_t s;
|
| 185 |
+
if(getline(in,line)) s=atoi(line.c_str());
|
| 186 |
+
else return 0;
|
| 187 |
+
data.resize(s);
|
| 188 |
+
for(size_t i=0; i<data.size(); ++i) {
|
| 189 |
+
if(!getline(in,line)) return 0;
|
| 190 |
+
std::istringstream is(line);
|
| 191 |
+
if(!(is>>s)) return 0;
|
| 192 |
+
std::string word;
|
| 193 |
+
double prob;
|
| 194 |
+
data[i].resize(s);
|
| 195 |
+
for(size_t j=0; j<s; ++j)
|
| 196 |
+
if(is>>word>>prob) {
|
| 197 |
+
//TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
|
| 198 |
+
data[i][j].second.denseScores = std::vector<float> (1);
|
| 199 |
+
data[i][j].second.denseScores.push_back((float) log(prob));
|
| 200 |
+
if(data[i][j].second.denseScores[0]<0) {
|
| 201 |
+
VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
|
| 202 |
+
data[i][j].second.denseScores[0]=0.0;
|
| 203 |
+
}
|
| 204 |
+
// String2Word(word,data[i][j].first,factorOrder);
|
| 205 |
+
Word& w = data[i][j].first;
|
| 206 |
+
w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
|
| 207 |
+
} else return 0;
|
| 208 |
+
}
|
| 209 |
+
return !data.empty();
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
void ConfusionNet::Print(std::ostream& out) const
|
| 213 |
+
{
|
| 214 |
+
out<<"conf net: "<<data.size()<<"\n";
|
| 215 |
+
for(size_t i=0; i<data.size(); ++i) {
|
| 216 |
+
out<<i<<" -- ";
|
| 217 |
+
for(size_t j=0; j<data[i].size(); ++j) {
|
| 218 |
+
out<<"("<<data[i][j].first.ToString()<<", ";
|
| 219 |
+
|
| 220 |
+
// dense
|
| 221 |
+
std::vector<float>::const_iterator iterDense;
|
| 222 |
+
for(iterDense = data[i][j].second.denseScores.begin();
|
| 223 |
+
iterDense < data[i][j].second.denseScores.end();
|
| 224 |
+
++iterDense) {
|
| 225 |
+
out<<", "<<*iterDense;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
// sparse
|
| 229 |
+
std::map<StringPiece, float>::const_iterator iterSparse;
|
| 230 |
+
for(iterSparse = data[i][j].second.sparseScores.begin();
|
| 231 |
+
iterSparse != data[i][j].second.sparseScores.end();
|
| 232 |
+
++iterSparse) {
|
| 233 |
+
out << ", " << iterSparse->first << "=" << iterSparse->second;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
out<<") ";
|
| 237 |
+
}
|
| 238 |
+
out<<"\n";
|
| 239 |
+
}
|
| 240 |
+
out<<"\n\n";
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
#ifdef _WIN32
|
| 244 |
+
#pragma warning(disable:4716)
|
| 245 |
+
#endif
|
| 246 |
+
Phrase
|
| 247 |
+
ConfusionNet::
|
| 248 |
+
GetSubString(const Range&) const
|
| 249 |
+
{
|
| 250 |
+
UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
|
| 251 |
+
//return Phrase(Input);
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
std::string
|
| 255 |
+
ConfusionNet::
|
| 256 |
+
GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
|
| 257 |
+
{
|
| 258 |
+
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
|
| 259 |
+
return "";
|
| 260 |
+
}
|
| 261 |
+
#ifdef _WIN32
|
| 262 |
+
#pragma warning(disable:4716)
|
| 263 |
+
#endif
|
| 264 |
+
const Word& ConfusionNet::GetWord(size_t) const
|
| 265 |
+
{
|
| 266 |
+
UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
|
| 267 |
+
}
|
| 268 |
+
#ifdef _WIN32
|
| 269 |
+
#pragma warning(default:4716)
|
| 270 |
+
#endif
|
| 271 |
+
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
|
| 272 |
+
{
|
| 273 |
+
cn.Print(out);
|
| 274 |
+
return out;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
TranslationOptionCollection*
|
| 278 |
+
ConfusionNet::
|
| 279 |
+
CreateTranslationOptionCollection(ttasksptr const& ttask) const
|
| 280 |
+
{
|
| 281 |
+
// size_t maxNoTransOptPerCoverage
|
| 282 |
+
// = ttask->options()->search.max_trans_opt_per_cov;
|
| 283 |
+
// float translationOptionThreshold
|
| 284 |
+
// = ttask->options()->search.trans_opt_threshold;
|
| 285 |
+
TranslationOptionCollection *rv
|
| 286 |
+
= new TranslationOptionCollectionConfusionNet(ttask, *this);
|
| 287 |
+
//, maxNoTransOptPerCoverage, translationOptionThreshold);
|
| 288 |
+
assert(rv);
|
| 289 |
+
return rv;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
|
mosesdecoder/moses/ConfusionNet.h
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
#ifndef moses_ConfusionNet_h
|
| 4 |
+
#define moses_ConfusionNet_h
|
| 5 |
+
|
| 6 |
+
#include <vector>
|
| 7 |
+
#include <iostream>
|
| 8 |
+
#include "Word.h"
|
| 9 |
+
#include "InputType.h"
|
| 10 |
+
#include "NonTerminal.h"
|
| 11 |
+
#include "util/exception.hh"
|
| 12 |
+
|
| 13 |
+
namespace Moses
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
class FactorCollection;
|
| 17 |
+
class TranslationOptionCollection;
|
| 18 |
+
class Sentence;
|
| 19 |
+
class TranslationTask;
|
| 20 |
+
|
| 21 |
+
/** An input to the decoder where each position can be 1 of a number of words,
|
| 22 |
+
* each with an associated probability. Compared with a sentence, where each position is a word
|
| 23 |
+
*/
|
| 24 |
+
class ConfusionNet : public InputType
|
| 25 |
+
{
|
| 26 |
+
public:
|
| 27 |
+
typedef std::vector<std::pair<Word, ScorePair > > Column;
|
| 28 |
+
|
| 29 |
+
protected:
|
| 30 |
+
std::vector<Column> data;
|
| 31 |
+
NonTerminalSet m_defaultLabelSet;
|
| 32 |
+
|
| 33 |
+
bool ReadFormat0(std::istream&);
|
| 34 |
+
bool ReadFormat1(std::istream&);
|
| 35 |
+
void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
|
| 36 |
+
|
| 37 |
+
public:
|
| 38 |
+
ConfusionNet(AllOptions::ptr const& opts);
|
| 39 |
+
virtual ~ConfusionNet();
|
| 40 |
+
|
| 41 |
+
ConfusionNet(Sentence const& s);
|
| 42 |
+
|
| 43 |
+
InputTypeEnum GetType() const {
|
| 44 |
+
return ConfusionNetworkInput;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
const Column& GetColumn(size_t i) const {
|
| 48 |
+
UTIL_THROW_IF2(i >= data.size(),
|
| 49 |
+
"Out of bounds. Trying to access " << i
|
| 50 |
+
<< " when vector only contains " << data.size());
|
| 51 |
+
return data[i];
|
| 52 |
+
}
|
| 53 |
+
const Column& operator[](size_t i) const {
|
| 54 |
+
return GetColumn(i);
|
| 55 |
+
}
|
| 56 |
+
virtual size_t GetColumnIncrement(size_t i, size_t j) const; //! returns 1 for CNs
|
| 57 |
+
|
| 58 |
+
bool Empty() const {
|
| 59 |
+
return data.empty();
|
| 60 |
+
}
|
| 61 |
+
size_t GetSize() const {
|
| 62 |
+
return data.size();
|
| 63 |
+
}
|
| 64 |
+
void Clear() {
|
| 65 |
+
data.clear();
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
bool ReadF(std::istream&, int format=0);
|
| 69 |
+
virtual void Print(std::ostream&) const;
|
| 70 |
+
|
| 71 |
+
int Read(std::istream& in);
|
| 72 |
+
|
| 73 |
+
Phrase GetSubString(const Range&) const; //TODO not defined
|
| 74 |
+
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
|
| 75 |
+
const Word& GetWord(size_t pos) const;
|
| 76 |
+
|
| 77 |
+
TranslationOptionCollection*
|
| 78 |
+
CreateTranslationOptionCollection(ttasksptr const& ttask) const;
|
| 79 |
+
|
| 80 |
+
const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
|
| 81 |
+
return m_defaultLabelSet;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn);
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
#endif
|
mosesdecoder/moses/ContextScope.h
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
| 2 |
+
// A class to store "local" information (such as task-specific caches).
|
| 3 |
+
// The idea is for each translation task to have a scope, which stores
|
| 4 |
+
// shared pointers to task-specific objects such as caches and priors.
|
| 5 |
+
// Since these objects are referenced via shared pointers, sopes can
|
| 6 |
+
// share information.
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
#ifdef WITH_THREADS
|
| 10 |
+
#include <boost/thread/shared_mutex.hpp>
|
| 11 |
+
#include <boost/thread/locks.hpp>
|
| 12 |
+
#include <boost/foreach.hpp>
|
| 13 |
+
#endif
|
| 14 |
+
|
| 15 |
+
// for some reason, the xmlrpc_c headers must be included AFTER the
|
| 16 |
+
// boost thread-related ones ...
|
| 17 |
+
#include "xmlrpc-c.h"
|
| 18 |
+
|
| 19 |
+
#include <map>
|
| 20 |
+
#include <boost/shared_ptr.hpp>
|
| 21 |
+
#include "TypeDef.h"
|
| 22 |
+
#include "Util.h"
|
| 23 |
+
|
| 24 |
+
namespace Moses
|
| 25 |
+
{
|
| 26 |
+
class ContextScope
|
| 27 |
+
{
|
| 28 |
+
protected:
|
| 29 |
+
typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
|
| 30 |
+
typedef scratchpad_t::iterator iter_t;
|
| 31 |
+
typedef scratchpad_t::value_type entry_t;
|
| 32 |
+
typedef scratchpad_t::const_iterator const_iter_t;
|
| 33 |
+
scratchpad_t m_scratchpad;
|
| 34 |
+
#ifdef WITH_THREADS
|
| 35 |
+
mutable boost::shared_mutex m_lock;
|
| 36 |
+
#endif
|
| 37 |
+
SPTR<std::map<std::string,float> const> m_context_weights;
|
| 38 |
+
public:
|
| 39 |
+
typedef boost::shared_ptr<ContextScope> ptr;
|
| 40 |
+
template<typename T>
|
| 41 |
+
boost::shared_ptr<void> const&
|
| 42 |
+
set(void const* const key, boost::shared_ptr<T> const& val) {
|
| 43 |
+
#ifdef WITH_THREADS
|
| 44 |
+
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
| 45 |
+
#endif
|
| 46 |
+
return (m_scratchpad[key] = val);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
template<typename T>
|
| 50 |
+
boost::shared_ptr<T> const
|
| 51 |
+
get(void const* key, bool CreateNewIfNecessary=false) {
|
| 52 |
+
#ifdef WITH_THREADS
|
| 53 |
+
using boost::shared_mutex;
|
| 54 |
+
using boost::upgrade_lock;
|
| 55 |
+
// T const* key = reinterpret_cast<T const*>(xkey);
|
| 56 |
+
upgrade_lock<shared_mutex> lock(m_lock);
|
| 57 |
+
#endif
|
| 58 |
+
iter_t m = m_scratchpad.find(key);
|
| 59 |
+
boost::shared_ptr< T > ret;
|
| 60 |
+
if (m != m_scratchpad.end()) {
|
| 61 |
+
if (m->second == NULL && CreateNewIfNecessary) {
|
| 62 |
+
#ifdef WITH_THREADS
|
| 63 |
+
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
| 64 |
+
#endif
|
| 65 |
+
m->second.reset(new T);
|
| 66 |
+
}
|
| 67 |
+
ret = boost::static_pointer_cast< T >(m->second);
|
| 68 |
+
return ret;
|
| 69 |
+
}
|
| 70 |
+
if (!CreateNewIfNecessary) return ret;
|
| 71 |
+
#ifdef WITH_THREADS
|
| 72 |
+
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
|
| 73 |
+
#endif
|
| 74 |
+
ret.reset(new T);
|
| 75 |
+
m_scratchpad[key] = ret;
|
| 76 |
+
return ret;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
ContextScope() { }
|
| 80 |
+
|
| 81 |
+
ContextScope(ContextScope const& other) {
|
| 82 |
+
#ifdef WITH_THREADS
|
| 83 |
+
boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
|
| 84 |
+
boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
|
| 85 |
+
#endif
|
| 86 |
+
m_scratchpad = other.m_scratchpad;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
SPTR<std::map<std::string,float> const>
|
| 90 |
+
GetContextWeights() {
|
| 91 |
+
return m_context_weights;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
bool
|
| 95 |
+
SetContextWeights(std::string const& spec) {
|
| 96 |
+
if (m_context_weights) return false;
|
| 97 |
+
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
| 98 |
+
SPTR<std::map<std::string,float> > M(new std::map<std::string, float>);
|
| 99 |
+
|
| 100 |
+
// TO DO; This needs to be done with StringPiece.find, not Tokenize
|
| 101 |
+
// PRIORITY: low
|
| 102 |
+
std::vector<std::string> tokens = Tokenize(spec,":");
|
| 103 |
+
for (std::vector<std::string>::iterator it = tokens.begin();
|
| 104 |
+
it != tokens.end(); it++) {
|
| 105 |
+
std::vector<std::string> key_and_value = Tokenize(*it, ",");
|
| 106 |
+
(*M)[key_and_value[0]] = atof(key_and_value[1].c_str());
|
| 107 |
+
}
|
| 108 |
+
m_context_weights = M;
|
| 109 |
+
return true;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
bool
|
| 113 |
+
SetContextWeights(SPTR<std::map<std::string,float> const> const& w) {
|
| 114 |
+
if (m_context_weights) return false;
|
| 115 |
+
#ifdef WITH_THREADS
|
| 116 |
+
boost::unique_lock<boost::shared_mutex> lock(m_lock);
|
| 117 |
+
#endif
|
| 118 |
+
m_context_weights = w;
|
| 119 |
+
return true;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
};
|
| 123 |
+
|
| 124 |
+
};
|
mosesdecoder/moses/DecodeGraph.cpp
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2006 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#include "DecodeGraph.h"
|
| 24 |
+
#include "DecodeStep.h"
|
| 25 |
+
#include "TypeDef.h"
|
| 26 |
+
#include "Util.h"
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
DecodeGraph::~DecodeGraph()
|
| 31 |
+
{
|
| 32 |
+
RemoveAllInColl(m_steps);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
//! Add another decode step to the graph
|
| 36 |
+
void DecodeGraph::Add(DecodeStep *decodeStep)
|
| 37 |
+
{
|
| 38 |
+
m_steps.push_back(decodeStep);
|
| 39 |
+
decodeStep->SetContainer(this);
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
}
|
| 43 |
+
|
mosesdecoder/moses/DecodeGraph.h
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2006 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#ifndef moses_DecodeGraph_h
|
| 24 |
+
#define moses_DecodeGraph_h
|
| 25 |
+
|
| 26 |
+
#include "util/exception.hh"
|
| 27 |
+
#include <list>
|
| 28 |
+
#include <iterator>
|
| 29 |
+
#include "TypeDef.h"
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
class DecodeStep;
|
| 35 |
+
|
| 36 |
+
//! list of DecodeSteps which factorizes the translation
|
| 37 |
+
class DecodeGraph
|
| 38 |
+
{
|
| 39 |
+
protected:
|
| 40 |
+
std::list<const DecodeStep*> m_steps;
|
| 41 |
+
size_t m_id; // contiguous unique id, starting from 0
|
| 42 |
+
size_t m_maxChartSpan;
|
| 43 |
+
size_t m_backoff;
|
| 44 |
+
|
| 45 |
+
public:
|
| 46 |
+
/**
|
| 47 |
+
* position: The position of this graph within the decode sequence.
|
| 48 |
+
**/
|
| 49 |
+
DecodeGraph(size_t id)
|
| 50 |
+
: m_id(id)
|
| 51 |
+
, m_maxChartSpan(NOT_FOUND)
|
| 52 |
+
, m_backoff(0) {
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// for chart decoding
|
| 56 |
+
DecodeGraph(size_t id, size_t maxChartSpan)
|
| 57 |
+
: m_id(id)
|
| 58 |
+
, m_maxChartSpan(maxChartSpan) {
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
//! iterators
|
| 62 |
+
typedef std::list<const DecodeStep*>::iterator iterator;
|
| 63 |
+
typedef std::list<const DecodeStep*>::const_iterator const_iterator;
|
| 64 |
+
const_iterator begin() const {
|
| 65 |
+
return m_steps.begin();
|
| 66 |
+
}
|
| 67 |
+
const_iterator end() const {
|
| 68 |
+
return m_steps.end();
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
virtual ~DecodeGraph();
|
| 72 |
+
|
| 73 |
+
//! Add another decode step to the graph
|
| 74 |
+
void Add(DecodeStep *decodeStep);
|
| 75 |
+
|
| 76 |
+
size_t GetSize() const {
|
| 77 |
+
return m_steps.size();
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
size_t GetMaxChartSpan() const {
|
| 81 |
+
UTIL_THROW_IF2(m_maxChartSpan == NOT_FOUND, "Max chart span not specified");
|
| 82 |
+
return m_maxChartSpan;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
size_t GetBackoff() const {
|
| 86 |
+
return m_backoff;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
void SetBackoff(size_t backoff) {
|
| 90 |
+
m_backoff = backoff;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
size_t GetId() const {
|
| 94 |
+
return m_id;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
};
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
}
|
| 101 |
+
#endif
|
mosesdecoder/moses/DecodeStep.cpp
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "DecodeStep.h"
|
| 23 |
+
#include "GenerationDictionary.h"
|
| 24 |
+
#include "StaticData.h"
|
| 25 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
DecodeStep::DecodeStep(DecodeFeature *decodeFeature,
|
| 30 |
+
const DecodeStep* prev,
|
| 31 |
+
const std::vector<FeatureFunction*> &features)
|
| 32 |
+
: m_decodeFeature(decodeFeature)
|
| 33 |
+
{
|
| 34 |
+
FactorMask prevOutputFactors;
|
| 35 |
+
if (prev) prevOutputFactors = prev->m_outputFactors;
|
| 36 |
+
m_outputFactors = prevOutputFactors;
|
| 37 |
+
FactorMask conflictMask = (m_outputFactors & decodeFeature->GetOutputFactorMask());
|
| 38 |
+
m_outputFactors |= decodeFeature->GetOutputFactorMask();
|
| 39 |
+
FactorMask newOutputFactorMask = m_outputFactors ^ prevOutputFactors; //xor
|
| 40 |
+
m_newOutputFactors.resize(newOutputFactorMask.count());
|
| 41 |
+
m_conflictFactors.resize(conflictMask.count());
|
| 42 |
+
size_t j=0, k=0;
|
| 43 |
+
for (size_t i = 0; i < MAX_NUM_FACTORS; i++) {
|
| 44 |
+
if (newOutputFactorMask[i]) m_newOutputFactors[j++] = i;
|
| 45 |
+
if (conflictMask[i]) m_conflictFactors[k++] = i;
|
| 46 |
+
}
|
| 47 |
+
VERBOSE(2,"DecodeStep():\n\toutputFactors=" << m_outputFactors
|
| 48 |
+
<< "\n\tconflictFactors=" << conflictMask
|
| 49 |
+
<< "\n\tnewOutputFactors=" << newOutputFactorMask << std::endl);
|
| 50 |
+
|
| 51 |
+
// find out which feature function can be applied in this decode step
|
| 52 |
+
for (size_t i = 0; i < features.size(); ++i) {
|
| 53 |
+
FeatureFunction *feature = features[i];
|
| 54 |
+
if (feature->IsUseable(m_outputFactors)) {
|
| 55 |
+
m_featuresToApply.push_back(feature);
|
| 56 |
+
} else {
|
| 57 |
+
m_featuresRemaining.push_back(feature);
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
decodeFeature->SetContainer(this);
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
DecodeStep::~DecodeStep() {}
|
| 65 |
+
|
| 66 |
+
/** returns phrase feature (dictionary) for translation step */
|
| 67 |
+
const PhraseDictionary* DecodeStep::GetPhraseDictionaryFeature() const
|
| 68 |
+
{
|
| 69 |
+
return dynamic_cast<const PhraseDictionary*>(m_decodeFeature);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/** returns generation feature (dictionary) for generation step */
|
| 73 |
+
const GenerationDictionary* DecodeStep::GetGenerationDictionaryFeature() const
|
| 74 |
+
{
|
| 75 |
+
return dynamic_cast<const GenerationDictionary*>(m_decodeFeature);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
void DecodeStep::RemoveFeature(const FeatureFunction *ff)
|
| 79 |
+
{
|
| 80 |
+
for (size_t i = 0; i < m_featuresToApply.size(); ++i) {
|
| 81 |
+
if (ff == m_featuresToApply[i]) {
|
| 82 |
+
m_featuresToApply.erase(m_featuresToApply.begin() + i);
|
| 83 |
+
return;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
mosesdecoder/moses/DecodeStep.h
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_DecodeStep_h
|
| 23 |
+
#define moses_DecodeStep_h
|
| 24 |
+
|
| 25 |
+
#include "TypeDef.h"
|
| 26 |
+
#include "FactorTypeSet.h"
|
| 27 |
+
#include "Phrase.h"
|
| 28 |
+
|
| 29 |
+
namespace Moses
|
| 30 |
+
{
|
| 31 |
+
|
| 32 |
+
class DecodeFeature;
|
| 33 |
+
class PhraseDictionary;
|
| 34 |
+
class GenerationDictionary;
|
| 35 |
+
class TranslationOption;
|
| 36 |
+
class TranslationOptionCollection;
|
| 37 |
+
class PartialTranslOptColl;
|
| 38 |
+
class FactorCollection;
|
| 39 |
+
class InputType;
|
| 40 |
+
class FeatureFunction;
|
| 41 |
+
class DecodeGraph;
|
| 42 |
+
|
| 43 |
+
/** Specification for a decoding step.
|
| 44 |
+
* The factored translation model consists of Translation and Generation
|
| 45 |
+
* steps, which consult a Dictionary of phrase translations or word
|
| 46 |
+
* generations. This class implements the specification for one of these
|
| 47 |
+
* steps, both the DecodeType and a pointer to the Translation or Generation Feature
|
| 48 |
+
**/
|
| 49 |
+
class DecodeStep
|
| 50 |
+
{
|
| 51 |
+
protected:
|
| 52 |
+
FactorMask m_outputFactors; //! mask of what factors exist on the output side after this decode step
|
| 53 |
+
std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
|
| 54 |
+
std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
|
| 55 |
+
const DecodeFeature* m_decodeFeature;
|
| 56 |
+
const DecodeGraph *m_container;
|
| 57 |
+
|
| 58 |
+
std::vector<FeatureFunction*> m_featuresToApply, m_featuresRemaining;
|
| 59 |
+
public:
|
| 60 |
+
DecodeStep(); //! not implemented
|
| 61 |
+
DecodeStep(DecodeFeature *featurePtr,
|
| 62 |
+
const DecodeStep* prevDecodeStep,
|
| 63 |
+
const std::vector<FeatureFunction*> &features);
|
| 64 |
+
virtual ~DecodeStep();
|
| 65 |
+
|
| 66 |
+
//! mask of factors that are present after this decode step
|
| 67 |
+
const FactorMask& GetOutputFactorMask() const {
|
| 68 |
+
return m_outputFactors;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
//! returns true if this decode step must match some pre-existing factors
|
| 72 |
+
bool IsFilteringStep() const {
|
| 73 |
+
return !m_conflictFactors.empty();
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
//! returns true if this decode step produces one or more new factors
|
| 77 |
+
bool IsFactorProducingStep() const {
|
| 78 |
+
return !m_newOutputFactors.empty();
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
const std::vector<FeatureFunction*> &GetFeaturesRemaining() const {
|
| 82 |
+
return m_featuresRemaining;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/*! returns a list (possibly empty) of the (target side) factors that
|
| 86 |
+
* are produced in this decoding step. For example, if a previous step
|
| 87 |
+
* generated factor 1, and this step generates 1,2, then only 2 will be
|
| 88 |
+
* in the returned vector. */
|
| 89 |
+
const std::vector<FactorType>& GetNewOutputFactors() const {
|
| 90 |
+
return m_newOutputFactors;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/*! returns a list (possibly empty) of the (target side) factors that
|
| 94 |
+
* are produced BUT ALREADY EXIST and therefore must be checked for
|
| 95 |
+
* conflict or compatibility */
|
| 96 |
+
const std::vector<FactorType>& GetConflictFactors() const {
|
| 97 |
+
return m_conflictFactors;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
/*! returns phrase table feature for translation step */
|
| 101 |
+
const PhraseDictionary* GetPhraseDictionaryFeature() const;
|
| 102 |
+
|
| 103 |
+
/*! returns generation table feature for generation step */
|
| 104 |
+
const GenerationDictionary* GetGenerationDictionaryFeature() const;
|
| 105 |
+
|
| 106 |
+
void RemoveFeature(const FeatureFunction *ff);
|
| 107 |
+
|
| 108 |
+
void SetContainer(const DecodeGraph *container) {
|
| 109 |
+
m_container = container;
|
| 110 |
+
}
|
| 111 |
+
const DecodeGraph *GetContainer() const {
|
| 112 |
+
return m_container;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
};
|
| 116 |
+
|
| 117 |
+
}
|
| 118 |
+
#endif
|
mosesdecoder/moses/DecodeStepGeneration.cpp
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "DecodeStepGeneration.h"
|
| 23 |
+
#include "GenerationDictionary.h"
|
| 24 |
+
#include "TranslationOption.h"
|
| 25 |
+
#include "TranslationOptionCollection.h"
|
| 26 |
+
#include "PartialTranslOptColl.h"
|
| 27 |
+
#include "FactorCollection.h"
|
| 28 |
+
|
| 29 |
+
namespace Moses
|
| 30 |
+
{
|
| 31 |
+
using namespace std;
|
| 32 |
+
|
| 33 |
+
DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict,
|
| 34 |
+
const DecodeStep* prev,
|
| 35 |
+
const std::vector<FeatureFunction*> &features)
|
| 36 |
+
: DecodeStep(dict, prev, features)
|
| 37 |
+
{
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
// helpers
|
| 41 |
+
typedef pair<Word, ScoreComponentCollection> WordPair;
|
| 42 |
+
typedef list< WordPair > WordList;
|
| 43 |
+
// 1st = word
|
| 44 |
+
// 2nd = score
|
| 45 |
+
typedef list< WordPair >::const_iterator WordListIterator;
|
| 46 |
+
|
| 47 |
+
/** used in generation: increases iterators when looping through the exponential number of generation expansions */
|
| 48 |
+
inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
|
| 49 |
+
, const vector< WordList > &wordListVector)
|
| 50 |
+
{
|
| 51 |
+
for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++) {
|
| 52 |
+
WordListIterator &iter = wordListIterVector[currPos];
|
| 53 |
+
iter++;
|
| 54 |
+
if (iter != wordListVector[currPos].end()) {
|
| 55 |
+
// eg. 4 -> 5
|
| 56 |
+
return;
|
| 57 |
+
} else {
|
| 58 |
+
// eg 9 -> 10
|
| 59 |
+
iter = wordListVector[currPos].begin();
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
|
| 65 |
+
, const DecodeStep &decodeStep
|
| 66 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 67 |
+
, TranslationOptionCollection * /* toc */
|
| 68 |
+
, bool /*adhereTableLimit*/) const
|
| 69 |
+
{
|
| 70 |
+
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
| 71 |
+
// word deletion
|
| 72 |
+
|
| 73 |
+
TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
|
| 74 |
+
outputPartialTranslOptColl.Add(newTransOpt);
|
| 75 |
+
|
| 76 |
+
return;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
// normal generation step
|
| 80 |
+
const GenerationDictionary* generationDictionary = decodeStep.GetGenerationDictionaryFeature();
|
| 81 |
+
|
| 82 |
+
const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
| 83 |
+
const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
|
| 84 |
+
size_t targetLength = targetPhrase.GetSize();
|
| 85 |
+
|
| 86 |
+
// generation list for each word in phrase
|
| 87 |
+
vector< WordList > wordListVector(targetLength);
|
| 88 |
+
|
| 89 |
+
// create generation list
|
| 90 |
+
int wordListVectorPos = 0;
|
| 91 |
+
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words
|
| 92 |
+
// generatable factors for this word to be put in wordList
|
| 93 |
+
WordList &wordList = wordListVector[wordListVectorPos];
|
| 94 |
+
const Word &word = targetPhrase.GetWord(currPos);
|
| 95 |
+
|
| 96 |
+
// consult dictionary for possible generations for this word
|
| 97 |
+
const OutputWordCollection *wordColl = generationDictionary->FindWord(word);
|
| 98 |
+
|
| 99 |
+
if (wordColl == NULL) {
|
| 100 |
+
// word not found in generation dictionary
|
| 101 |
+
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
| 102 |
+
return; // can't be part of a phrase, special handling
|
| 103 |
+
} else {
|
| 104 |
+
// sort(*wordColl, CompareWordCollScore);
|
| 105 |
+
OutputWordCollection::const_iterator iterWordColl;
|
| 106 |
+
for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) {
|
| 107 |
+
const Word &outputWord = (*iterWordColl).first;
|
| 108 |
+
const ScoreComponentCollection& score = (*iterWordColl).second;
|
| 109 |
+
// enter into word list generated factor(s) and its(their) score(s)
|
| 110 |
+
wordList.push_back(WordPair(outputWord, score));
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
wordListVectorPos++; // done, next word
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
// use generation list (wordList)
|
| 118 |
+
// set up iterators (total number of expansions)
|
| 119 |
+
size_t numIteration = 1;
|
| 120 |
+
vector< WordListIterator > wordListIterVector(targetLength);
|
| 121 |
+
vector< const Word* > mergeWords(targetLength);
|
| 122 |
+
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
|
| 123 |
+
wordListIterVector[currPos] = wordListVector[currPos].begin();
|
| 124 |
+
numIteration *= wordListVector[currPos].size();
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
// go thru each possible factor for each word & create hypothesis
|
| 128 |
+
for (size_t currIter = 0 ; currIter < numIteration ; currIter++) {
|
| 129 |
+
ScoreComponentCollection generationScore; // total score for this string of words
|
| 130 |
+
|
| 131 |
+
// create vector of words with new factors for last phrase
|
| 132 |
+
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
|
| 133 |
+
const WordPair &wordPair = *wordListIterVector[currPos];
|
| 134 |
+
mergeWords[currPos] = &(wordPair.first);
|
| 135 |
+
generationScore.PlusEquals(wordPair.second);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
// merge with existing trans opt
|
| 139 |
+
Phrase genPhrase( mergeWords);
|
| 140 |
+
|
| 141 |
+
if (IsFilteringStep()) {
|
| 142 |
+
if (!inputPartialTranslOpt.IsCompatible(genPhrase, m_conflictFactors))
|
| 143 |
+
continue;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
| 147 |
+
TargetPhrase outPhrase(inPhrase);
|
| 148 |
+
outPhrase.GetScoreBreakdown().PlusEquals(generationScore);
|
| 149 |
+
|
| 150 |
+
outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
|
| 151 |
+
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply);
|
| 152 |
+
|
| 153 |
+
const Range &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
|
| 154 |
+
|
| 155 |
+
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
|
| 156 |
+
assert(newTransOpt);
|
| 157 |
+
|
| 158 |
+
newTransOpt->SetInputPath(inputPath);
|
| 159 |
+
|
| 160 |
+
outputPartialTranslOptColl.Add(newTransOpt);
|
| 161 |
+
|
| 162 |
+
// increment iterators
|
| 163 |
+
IncrementIterators(wordListIterVector, wordListVector);
|
| 164 |
+
}
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
|
mosesdecoder/moses/DecodeStepGeneration.h
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_DecodeStepGeneration_h
|
| 23 |
+
#define moses_DecodeStepGeneration_h
|
| 24 |
+
|
| 25 |
+
#include "DecodeStep.h"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
|
| 30 |
+
class GenerationDictionary;
|
| 31 |
+
class Phrase;
|
| 32 |
+
class ScoreComponentCollection;
|
| 33 |
+
|
| 34 |
+
//! subclass of DecodeStep for generation step
|
| 35 |
+
class DecodeStepGeneration : public DecodeStep
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
DecodeStepGeneration(GenerationDictionary* dict,
|
| 39 |
+
const DecodeStep* prev,
|
| 40 |
+
const std::vector<FeatureFunction*> &features);
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
void Process(const TranslationOption &inputPartialTranslOpt
|
| 44 |
+
, const DecodeStep &decodeStep
|
| 45 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 46 |
+
, TranslationOptionCollection *toc
|
| 47 |
+
, bool adhereTableLimit) const;
|
| 48 |
+
|
| 49 |
+
private:
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
#endif
|
mosesdecoder/moses/DecodeStepTranslation.cpp
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "DecodeStepTranslation.h"
|
| 23 |
+
#include "TranslationOption.h"
|
| 24 |
+
#include "TranslationOptionCollection.h"
|
| 25 |
+
#include "PartialTranslOptColl.h"
|
| 26 |
+
#include "FactorCollection.h"
|
| 27 |
+
#include "util/exception.hh"
|
| 28 |
+
|
| 29 |
+
using namespace std;
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* pdf,
|
| 34 |
+
const DecodeStep* prev,
|
| 35 |
+
const std::vector<FeatureFunction*> &features)
|
| 36 |
+
: DecodeStep(pdf, prev, features)
|
| 37 |
+
{
|
| 38 |
+
// don't apply feature functions that are from current phrase table.It should already have been
|
| 39 |
+
// dont by the phrase table.
|
| 40 |
+
const std::vector<FeatureFunction*> &pdfFeatures = pdf->GetFeaturesToApply();
|
| 41 |
+
for (size_t i = 0; i < pdfFeatures.size(); ++i) {
|
| 42 |
+
FeatureFunction *ff = pdfFeatures[i];
|
| 43 |
+
RemoveFeature(ff);
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
|
| 48 |
+
, const DecodeStep &decodeStep
|
| 49 |
+
, PartialTranslOptColl &outputPartialTranslOptColl
|
| 50 |
+
, TranslationOptionCollection *toc
|
| 51 |
+
, bool adhereTableLimit
|
| 52 |
+
, TargetPhraseCollection::shared_ptr phraseColl) const
|
| 53 |
+
{
|
| 54 |
+
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
|
| 55 |
+
// word deletion
|
| 56 |
+
outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
|
| 57 |
+
return;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
// normal trans step
|
| 61 |
+
const Range &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
|
| 62 |
+
const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
|
| 63 |
+
const PhraseDictionary* phraseDictionary =
|
| 64 |
+
decodeStep.GetPhraseDictionaryFeature();
|
| 65 |
+
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
|
| 66 |
+
const size_t currSize = inPhrase.GetSize();
|
| 67 |
+
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
| 68 |
+
|
| 69 |
+
if (phraseColl != NULL) {
|
| 70 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 71 |
+
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
|
| 72 |
+
|
| 73 |
+
for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase) {
|
| 74 |
+
const TargetPhrase& targetPhrase = **iterTargetPhrase;
|
| 75 |
+
// const ScoreComponentCollection &transScores = targetPhrase.GetScoreBreakdown();
|
| 76 |
+
// skip if the
|
| 77 |
+
if (targetPhrase.GetSize() != currSize) continue;
|
| 78 |
+
|
| 79 |
+
TargetPhrase outPhrase(inPhrase);
|
| 80 |
+
|
| 81 |
+
if (IsFilteringStep()) {
|
| 82 |
+
if (!inputPartialTranslOpt.IsCompatible(targetPhrase, m_conflictFactors))
|
| 83 |
+
continue;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
| 87 |
+
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
| 88 |
+
|
| 89 |
+
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
|
| 90 |
+
assert(newTransOpt != NULL);
|
| 91 |
+
|
| 92 |
+
newTransOpt->SetInputPath(inputPath);
|
| 93 |
+
|
| 94 |
+
outputPartialTranslOptColl.Add(newTransOpt );
|
| 95 |
+
|
| 96 |
+
}
|
| 97 |
+
} else if (sourceWordsRange.GetNumWordsCovered() == 1) {
|
| 98 |
+
// unknown handler
|
| 99 |
+
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
void
|
| 104 |
+
DecodeStepTranslation::
|
| 105 |
+
ProcessInitialTranslation(InputType const& source,
|
| 106 |
+
PartialTranslOptColl &outputPartialTranslOptColl,
|
| 107 |
+
size_t startPos, size_t endPos,
|
| 108 |
+
bool adhereTableLimit,
|
| 109 |
+
InputPath const& inputPath,
|
| 110 |
+
TargetPhraseCollection::shared_ptr phraseColl) const
|
| 111 |
+
{
|
| 112 |
+
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
| 113 |
+
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
| 114 |
+
|
| 115 |
+
const Range range(startPos, endPos);
|
| 116 |
+
|
| 117 |
+
if (phraseColl != NULL) {
|
| 118 |
+
IFVERBOSE(3) {
|
| 119 |
+
if(source.GetType() == SentenceInput)
|
| 120 |
+
TRACE_ERR("[" << source.GetSubString(range) << "; "
|
| 121 |
+
<< startPos << "-" << endPos << "]\n");
|
| 122 |
+
else
|
| 123 |
+
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 127 |
+
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
|
| 128 |
+
|
| 129 |
+
for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase) {
|
| 130 |
+
const TargetPhrase &targetPhrase = **iterTargetPhrase;
|
| 131 |
+
TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
|
| 132 |
+
|
| 133 |
+
transOpt->SetInputPath(inputPath);
|
| 134 |
+
|
| 135 |
+
outputPartialTranslOptColl.Add (transOpt);
|
| 136 |
+
|
| 137 |
+
VERBOSE(3,"\t" << targetPhrase << "\n");
|
| 138 |
+
}
|
| 139 |
+
VERBOSE(3,std::endl);
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
void
|
| 144 |
+
DecodeStepTranslation::
|
| 145 |
+
ProcessInitialTransLEGACY(InputType const& source,
|
| 146 |
+
PartialTranslOptColl &outputPartialTranslOptColl,
|
| 147 |
+
size_t startPos, size_t endPos,
|
| 148 |
+
bool adhereTableLimit,
|
| 149 |
+
InputPathList const& inputPathList) const
|
| 150 |
+
{
|
| 151 |
+
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
|
| 152 |
+
const size_t tableLimit = phraseDictionary->GetTableLimit();
|
| 153 |
+
|
| 154 |
+
const Range range(startPos, endPos);
|
| 155 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
| 156 |
+
= phraseDictionary->GetTargetPhraseCollectionLEGACY(source,range);
|
| 157 |
+
|
| 158 |
+
if (phraseColl != NULL) {
|
| 159 |
+
IFVERBOSE(3) {
|
| 160 |
+
if(source.GetType() == SentenceInput)
|
| 161 |
+
TRACE_ERR("[" << source.GetSubString(range) << "; "
|
| 162 |
+
<< startPos << "-" << endPos << "]\n");
|
| 163 |
+
else
|
| 164 |
+
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
const std::vector<Phrase> &sourcePhrases = phraseColl->GetSourcePhrases();
|
| 168 |
+
|
| 169 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 170 |
+
std::vector<Phrase>::const_iterator iterSourcePhrase;
|
| 171 |
+
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
|
| 172 |
+
|
| 173 |
+
for (iterTargetPhrase = phraseColl->begin(), iterSourcePhrase = sourcePhrases.begin()
|
| 174 |
+
; iterTargetPhrase != iterEnd
|
| 175 |
+
; ++iterTargetPhrase, ++iterSourcePhrase) {
|
| 176 |
+
assert(iterSourcePhrase != sourcePhrases.end());
|
| 177 |
+
|
| 178 |
+
const TargetPhrase &targetPhrase = **iterTargetPhrase;
|
| 179 |
+
const Phrase &sourcePhrase = *iterSourcePhrase;
|
| 180 |
+
|
| 181 |
+
const InputPath &inputPath = GetInputPathLEGACY(targetPhrase, sourcePhrase, inputPathList);
|
| 182 |
+
|
| 183 |
+
TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
|
| 184 |
+
transOpt->SetInputPath(inputPath);
|
| 185 |
+
|
| 186 |
+
outputPartialTranslOptColl.Add (transOpt);
|
| 187 |
+
|
| 188 |
+
VERBOSE(3,"\t" << targetPhrase << "\n");
|
| 189 |
+
}
|
| 190 |
+
VERBOSE(3,std::endl);
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
|
| 195 |
+
const TargetPhrase targetPhrase,
|
| 196 |
+
const Phrase sourcePhrase,
|
| 197 |
+
const InputPathList &inputPathList) const
|
| 198 |
+
{
|
| 199 |
+
const Word &wordFromPt = sourcePhrase.GetWord(0);
|
| 200 |
+
|
| 201 |
+
InputPathList::const_iterator iter;
|
| 202 |
+
for (iter = inputPathList.begin(); iter != inputPathList.end(); ++iter) {
|
| 203 |
+
const InputPath &inputPath = **iter;
|
| 204 |
+
const Phrase &phraseFromIP = inputPath.GetPhrase();
|
| 205 |
+
|
| 206 |
+
const Word *wordIP = NULL;
|
| 207 |
+
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
|
| 208 |
+
const Word &tempWord = phraseFromIP.GetWord(i);
|
| 209 |
+
if (!tempWord.IsEpsilon()) {
|
| 210 |
+
wordIP = &tempWord;
|
| 211 |
+
break;
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
// const Range &range = inputPath.GetWordsRange();
|
| 216 |
+
|
| 217 |
+
if (wordIP && *wordIP == wordFromPt) {
|
| 218 |
+
return inputPath;
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
UTIL_THROW(util::Exception, "Input path not found");
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
void
|
| 226 |
+
DecodeStepTranslation::
|
| 227 |
+
ProcessLEGACY(TranslationOption const& in,
|
| 228 |
+
DecodeStep const& decodeStep,
|
| 229 |
+
PartialTranslOptColl &out,
|
| 230 |
+
TranslationOptionCollection *toc,
|
| 231 |
+
bool adhereTableLimit) const
|
| 232 |
+
{
|
| 233 |
+
if (in.GetTargetPhrase().GetSize() == 0) {
|
| 234 |
+
// word deletion
|
| 235 |
+
out.Add(new TranslationOption(in));
|
| 236 |
+
return;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
// normal trans step
|
| 240 |
+
Range const& srcRange = in.GetSourceWordsRange();
|
| 241 |
+
InputPath const& inputPath = in.GetInputPath();
|
| 242 |
+
PhraseDictionary const* pdict = decodeStep.GetPhraseDictionaryFeature();
|
| 243 |
+
TargetPhrase const& inPhrase = in.GetTargetPhrase();
|
| 244 |
+
size_t const currSize = inPhrase.GetSize();
|
| 245 |
+
size_t const tableLimit = pdict->GetTableLimit();
|
| 246 |
+
|
| 247 |
+
TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
|
| 248 |
+
= pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
|
| 249 |
+
|
| 250 |
+
if (phraseColl != NULL) {
|
| 251 |
+
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
|
| 252 |
+
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
|
| 253 |
+
? phraseColl->begin() + tableLimit : phraseColl->end());
|
| 254 |
+
|
| 255 |
+
for (iterTargetPhrase = phraseColl->begin();
|
| 256 |
+
iterTargetPhrase != iterEnd;
|
| 257 |
+
++iterTargetPhrase) {
|
| 258 |
+
TargetPhrase const& targetPhrase = **iterTargetPhrase;
|
| 259 |
+
if (targetPhrase.GetSize() != currSize ||
|
| 260 |
+
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
|
| 261 |
+
continue;
|
| 262 |
+
|
| 263 |
+
TargetPhrase outPhrase(inPhrase);
|
| 264 |
+
outPhrase.Merge(targetPhrase, m_newOutputFactors);
|
| 265 |
+
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
|
| 266 |
+
|
| 267 |
+
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
|
| 268 |
+
assert(newTransOpt != NULL);
|
| 269 |
+
|
| 270 |
+
newTransOpt->SetInputPath(inputPath);
|
| 271 |
+
|
| 272 |
+
out.Add(newTransOpt);
|
| 273 |
+
|
| 274 |
+
}
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
|