Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- mosesdecoder/moses/AlignmentInfo.cpp +170 -0
- mosesdecoder/moses/BitmapContainer.h +275 -0
- mosesdecoder/moses/ChartCell.cpp +233 -0
- mosesdecoder/moses/ChartCellLabelSet.h +147 -0
- mosesdecoder/moses/ChartHypothesisCollection.h +108 -0
- mosesdecoder/moses/ChartManager.cpp +867 -0
- mosesdecoder/moses/ChartParser.h +99 -0
- mosesdecoder/moses/ChartTranslationOptionList.h +90 -0
- mosesdecoder/moses/Factor.h +100 -0
- mosesdecoder/moses/FactorCollection.cpp +105 -0
- mosesdecoder/moses/FactorCollection.h +132 -0
- mosesdecoder/moses/FactorTypeSet.cpp +73 -0
- mosesdecoder/moses/FactorTypeSet.h +54 -0
- mosesdecoder/moses/FilePtr.h +89 -0
- mosesdecoder/moses/HypergraphOutput.h +107 -0
- mosesdecoder/moses/HypothesisStack.cpp +30 -0
- mosesdecoder/moses/HypothesisStackNormal.cpp +294 -0
- mosesdecoder/moses/IOWrapper.cpp +272 -0
- mosesdecoder/moses/InputFileStream.h +48 -0
- mosesdecoder/moses/LatticeMBR.cpp +680 -0
- mosesdecoder/moses/NonTerminal.cpp +21 -0
- mosesdecoder/moses/PartialTranslOptColl.cpp +112 -0
- mosesdecoder/moses/RuleCubeQueue.cpp +69 -0
- mosesdecoder/moses/RuleCubeQueue.h +66 -0
- mosesdecoder/moses/SquareMatrix.h +86 -0
- mosesdecoder/moses/StackVec.h +34 -0
- mosesdecoder/moses/SyntacticLanguageModelFiles.h +103 -0
- mosesdecoder/moses/SyntacticLanguageModelState.h +311 -0
- mosesdecoder/moses/Syntax/F2S/DerivationWriter.h +36 -0
- mosesdecoder/moses/Syntax/F2S/Forest.h +51 -0
- mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.cpp +85 -0
- mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.h +40 -0
- mosesdecoder/moses/Syntax/F2S/HyperPath.cpp +20 -0
- mosesdecoder/moses/Syntax/F2S/HyperPath.h +34 -0
- mosesdecoder/moses/Syntax/F2S/HyperPathLoader.h +66 -0
- mosesdecoder/moses/Syntax/F2S/HyperTree.cpp +70 -0
- mosesdecoder/moses/Syntax/F2S/HyperTree.h +92 -0
- mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.cpp +169 -0
- mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.h +41 -0
- mosesdecoder/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h +34 -0
- mosesdecoder/moses/Syntax/F2S/PVertexToStackMap.h +20 -0
- mosesdecoder/moses/Syntax/F2S/RuleMatcherCallback.h +51 -0
- mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h +203 -0
- mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree.h +78 -0
- mosesdecoder/moses/Syntax/F2S/TopologicalSorter.cpp +55 -0
- mosesdecoder/moses/Syntax/F2S/TopologicalSorter.h +34 -0
- mosesdecoder/moses/Syntax/F2S/TreeFragmentTokenizer.cpp +100 -0
- mosesdecoder/moses/Syntax/Manager.h +70 -0
- mosesdecoder/moses/Syntax/PVertex.h +25 -0
- mosesdecoder/moses/Syntax/RuleTable.h +24 -0
mosesdecoder/moses/AlignmentInfo.cpp
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
#include <algorithm>
|
| 20 |
+
#include <set>
|
| 21 |
+
#include "AlignmentInfo.h"
|
| 22 |
+
#include "TypeDef.h"
|
| 23 |
+
#include "StaticData.h"
|
| 24 |
+
#include "Util.h"
|
| 25 |
+
#include "util/exception.hh"
|
| 26 |
+
|
| 27 |
+
namespace Moses
|
| 28 |
+
{
|
| 29 |
+
|
| 30 |
+
AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
|
| 31 |
+
: m_collection(pairs)
|
| 32 |
+
{
|
| 33 |
+
BuildNonTermIndexMaps();
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
|
| 37 |
+
{
|
| 38 |
+
assert(aln.size()%2==0);
|
| 39 |
+
for (size_t i = 0; i < aln.size(); i+= 2)
|
| 40 |
+
m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
|
| 41 |
+
BuildNonTermIndexMaps();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
AlignmentInfo::AlignmentInfo(const std::string &str)
|
| 45 |
+
{
|
| 46 |
+
std::vector<std::string> points = Tokenize(str, " ");
|
| 47 |
+
std::vector<std::string>::const_iterator iter;
|
| 48 |
+
for (iter = points.begin(); iter != points.end(); iter++) {
|
| 49 |
+
std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
|
| 50 |
+
UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
|
| 51 |
+
Add(point[0], point[1]);
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void AlignmentInfo::BuildNonTermIndexMaps()
|
| 56 |
+
{
|
| 57 |
+
if (m_collection.empty()) {
|
| 58 |
+
return;
|
| 59 |
+
}
|
| 60 |
+
const_iterator p = begin();
|
| 61 |
+
size_t maxIndex = p->second;
|
| 62 |
+
for (++p; p != end(); ++p) {
|
| 63 |
+
if (p->second > maxIndex) {
|
| 64 |
+
maxIndex = p->second;
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
|
| 68 |
+
m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
|
| 69 |
+
size_t i = 0;
|
| 70 |
+
for (p = begin(); p != end(); ++p) {
|
| 71 |
+
if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
|
| 72 |
+
// 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
|
| 73 |
+
m_nonTermIndexMap.clear();
|
| 74 |
+
m_nonTermIndexMap2.clear();
|
| 75 |
+
return;
|
| 76 |
+
}
|
| 77 |
+
m_nonTermIndexMap[p->second] = i++;
|
| 78 |
+
m_nonTermIndexMap2[p->second] = p->first;
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
|
| 83 |
+
{
|
| 84 |
+
std::set<size_t> ret;
|
| 85 |
+
CollType::const_iterator iter;
|
| 86 |
+
for (iter = begin(); iter != end(); ++iter) {
|
| 87 |
+
// const std::pair<size_t,size_t> &align = *iter;
|
| 88 |
+
if (iter->first == sourcePos) {
|
| 89 |
+
ret.insert(iter->second);
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
return ret;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
|
| 96 |
+
{
|
| 97 |
+
std::set<size_t> ret;
|
| 98 |
+
CollType::const_iterator iter;
|
| 99 |
+
for (iter = begin(); iter != end(); ++iter) {
|
| 100 |
+
// const std::pair<size_t,size_t> &align = *iter;
|
| 101 |
+
if (iter->second == targetPos) {
|
| 102 |
+
ret.insert(iter->first);
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
return ret;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
bool
|
| 110 |
+
compare_target(std::pair<size_t,size_t> const* a,
|
| 111 |
+
std::pair<size_t,size_t> const* b)
|
| 112 |
+
{
|
| 113 |
+
if(a->second < b->second) return true;
|
| 114 |
+
if(a->second == b->second) return (a->first < b->first);
|
| 115 |
+
return false;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
std::vector< const std::pair<size_t,size_t>* >
|
| 120 |
+
AlignmentInfo::
|
| 121 |
+
GetSortedAlignments(WordAlignmentSort SortOrder) const
|
| 122 |
+
{
|
| 123 |
+
std::vector< const std::pair<size_t,size_t>* > ret;
|
| 124 |
+
|
| 125 |
+
CollType::const_iterator iter;
|
| 126 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 127 |
+
const std::pair<size_t,size_t> &alignPair = *iter;
|
| 128 |
+
ret.push_back(&alignPair);
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
switch (SortOrder) {
|
| 132 |
+
case NoSort:
|
| 133 |
+
break;
|
| 134 |
+
|
| 135 |
+
case TargetOrder:
|
| 136 |
+
std::sort(ret.begin(), ret.end(), compare_target);
|
| 137 |
+
break;
|
| 138 |
+
|
| 139 |
+
default:
|
| 140 |
+
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
|
| 141 |
+
<< SortOrder);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
return ret;
|
| 145 |
+
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
|
| 149 |
+
{
|
| 150 |
+
std::set<size_t> sourcePoses;
|
| 151 |
+
|
| 152 |
+
CollType::const_iterator iter;
|
| 153 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 154 |
+
size_t sourcePos = iter->first;
|
| 155 |
+
sourcePoses.insert(sourcePos);
|
| 156 |
+
}
|
| 157 |
+
std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
|
| 158 |
+
return ret;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
|
| 162 |
+
{
|
| 163 |
+
AlignmentInfo::const_iterator iter;
|
| 164 |
+
for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) {
|
| 165 |
+
out << iter->first << "-" << iter->second << " ";
|
| 166 |
+
}
|
| 167 |
+
return out;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
}
|
mosesdecoder/moses/BitmapContainer.h
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_BitmapContainer_h
|
| 23 |
+
#define moses_BitmapContainer_h
|
| 24 |
+
|
| 25 |
+
#include <queue>
|
| 26 |
+
#include <set>
|
| 27 |
+
#include <vector>
|
| 28 |
+
|
| 29 |
+
#include "Hypothesis.h"
|
| 30 |
+
#include "HypothesisStackCubePruning.h"
|
| 31 |
+
#include "SquareMatrix.h"
|
| 32 |
+
#include "TranslationOption.h"
|
| 33 |
+
#include "TypeDef.h"
|
| 34 |
+
#include "Bitmap.h"
|
| 35 |
+
|
| 36 |
+
#include <boost/unordered_set.hpp>
|
| 37 |
+
|
| 38 |
+
namespace Moses
|
| 39 |
+
{
|
| 40 |
+
|
| 41 |
+
class BitmapContainer;
|
| 42 |
+
class BackwardsEdge;
|
| 43 |
+
class Hypothesis;
|
| 44 |
+
class HypothesisStackCubePruning;
|
| 45 |
+
class HypothesisQueueItem;
|
| 46 |
+
class QueueItemOrderer;
|
| 47 |
+
class TranslationOptionList;
|
| 48 |
+
|
| 49 |
+
typedef std::vector< Hypothesis* > HypothesisSet;
|
| 50 |
+
typedef std::set< BackwardsEdge* > BackwardsEdgeSet;
|
| 51 |
+
typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueItem* >, QueueItemOrderer> HypothesisQueue;
|
| 52 |
+
|
| 53 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 54 |
+
// Hypothesis Priority Queue Code
|
| 55 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 56 |
+
|
| 57 |
+
//! 1 item in the priority queue for stack decoding (phrase-based)
|
| 58 |
+
class HypothesisQueueItem
|
| 59 |
+
{
|
| 60 |
+
private:
|
| 61 |
+
size_t m_hypothesis_pos, m_translation_pos;
|
| 62 |
+
Hypothesis *m_hypothesis;
|
| 63 |
+
BackwardsEdge *m_edge;
|
| 64 |
+
boost::shared_ptr<TargetPhrase> m_target_phrase;
|
| 65 |
+
|
| 66 |
+
HypothesisQueueItem();
|
| 67 |
+
|
| 68 |
+
public:
|
| 69 |
+
HypothesisQueueItem(const size_t hypothesis_pos
|
| 70 |
+
, const size_t translation_pos
|
| 71 |
+
, Hypothesis *hypothesis
|
| 72 |
+
, BackwardsEdge *edge
|
| 73 |
+
, const TargetPhrase *target_phrase = NULL)
|
| 74 |
+
: m_hypothesis_pos(hypothesis_pos)
|
| 75 |
+
, m_translation_pos(translation_pos)
|
| 76 |
+
, m_hypothesis(hypothesis)
|
| 77 |
+
, m_edge(edge) {
|
| 78 |
+
if (target_phrase != NULL) {
|
| 79 |
+
m_target_phrase.reset(new TargetPhrase(*target_phrase));
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
~HypothesisQueueItem() {
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
int GetHypothesisPos() {
|
| 87 |
+
return m_hypothesis_pos;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
int GetTranslationPos() {
|
| 91 |
+
return m_translation_pos;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
Hypothesis *GetHypothesis() {
|
| 95 |
+
return m_hypothesis;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
BackwardsEdge *GetBackwardsEdge() {
|
| 99 |
+
return m_edge;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
boost::shared_ptr<TargetPhrase> GetTargetPhrase() {
|
| 103 |
+
return m_target_phrase;
|
| 104 |
+
}
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
//! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
|
| 108 |
+
class QueueItemOrderer
|
| 109 |
+
{
|
| 110 |
+
public:
|
| 111 |
+
bool operator()(HypothesisQueueItem* itemA, HypothesisQueueItem* itemB) const {
|
| 112 |
+
float scoreA = itemA->GetHypothesis()->GetFutureScore();
|
| 113 |
+
float scoreB = itemB->GetHypothesis()->GetFutureScore();
|
| 114 |
+
|
| 115 |
+
if (scoreA < scoreB) {
|
| 116 |
+
return true;
|
| 117 |
+
} else if (scoreA > scoreB) {
|
| 118 |
+
return false;
|
| 119 |
+
} else {
|
| 120 |
+
// Equal scores: break ties by comparing target phrases (if they exist)
|
| 121 |
+
// *Important*: these are pointers to copies of the target phrases from the
|
| 122 |
+
// hypotheses. This class is used to keep priority queues ordered in the
|
| 123 |
+
// background, so comparisons made as those data structures are cleaned up
|
| 124 |
+
// may occur *after* the target phrases in hypotheses have been cleaned up,
|
| 125 |
+
// leading to segfaults if relying on hypotheses to provide target phrases.
|
| 126 |
+
boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
|
| 127 |
+
boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
|
| 128 |
+
if (!phrA || !phrB) {
|
| 129 |
+
// Fallback: scoreA < scoreB == false, non-deterministic sort
|
| 130 |
+
return false;
|
| 131 |
+
}
|
| 132 |
+
return (phrA->Compare(*phrB) > 0);
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
};
|
| 136 |
+
|
| 137 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 138 |
+
// Hypothesis Orderer Code
|
| 139 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 140 |
+
// Allows to compare two Hypothesis objects by the corresponding scores.
|
| 141 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 142 |
+
|
| 143 |
+
class HypothesisScoreOrderer
|
| 144 |
+
{
|
| 145 |
+
private:
|
| 146 |
+
bool m_deterministic;
|
| 147 |
+
|
| 148 |
+
public:
|
| 149 |
+
HypothesisScoreOrderer(const bool deterministic = false)
|
| 150 |
+
: m_deterministic(deterministic) {}
|
| 151 |
+
|
| 152 |
+
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
|
| 153 |
+
|
| 154 |
+
float scoreA = hypoA->GetFutureScore();
|
| 155 |
+
float scoreB = hypoB->GetFutureScore();
|
| 156 |
+
|
| 157 |
+
if (scoreA > scoreB) {
|
| 158 |
+
return true;
|
| 159 |
+
} else if (scoreA < scoreB) {
|
| 160 |
+
return false;
|
| 161 |
+
} else {
|
| 162 |
+
if (m_deterministic) {
|
| 163 |
+
// Equal scores: break ties by comparing target phrases
|
| 164 |
+
return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
|
| 165 |
+
}
|
| 166 |
+
// Fallback: scoreA > scoreB == false, non-deterministic sort
|
| 167 |
+
return false;
|
| 168 |
+
}
|
| 169 |
+
}
|
| 170 |
+
};
|
| 171 |
+
|
| 172 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 173 |
+
// Backwards Edge Code
|
| 174 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 175 |
+
// Encodes an edge pointing to a BitmapContainer.
|
| 176 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 177 |
+
|
| 178 |
+
class BackwardsEdge
|
| 179 |
+
{
|
| 180 |
+
private:
|
| 181 |
+
friend class BitmapContainer;
|
| 182 |
+
bool m_initialized;
|
| 183 |
+
|
| 184 |
+
const BitmapContainer &m_prevBitmapContainer;
|
| 185 |
+
BitmapContainer &m_parent;
|
| 186 |
+
const TranslationOptionList &m_translations;
|
| 187 |
+
const SquareMatrix &m_estimatedScores;
|
| 188 |
+
float m_estimatedScore;
|
| 189 |
+
|
| 190 |
+
bool m_deterministic;
|
| 191 |
+
|
| 192 |
+
std::vector< const Hypothesis* > m_hypotheses;
|
| 193 |
+
boost::unordered_set< int > m_seenPosition;
|
| 194 |
+
|
| 195 |
+
// We don't want to instantiate "empty" objects.
|
| 196 |
+
BackwardsEdge();
|
| 197 |
+
|
| 198 |
+
Hypothesis *CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt);
|
| 199 |
+
bool SeenPosition(const size_t x, const size_t y);
|
| 200 |
+
void SetSeenPosition(const size_t x, const size_t y);
|
| 201 |
+
|
| 202 |
+
protected:
|
| 203 |
+
void Initialize();
|
| 204 |
+
|
| 205 |
+
public:
|
| 206 |
+
BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
| 207 |
+
, BitmapContainer &parent
|
| 208 |
+
, const TranslationOptionList &translations
|
| 209 |
+
, const SquareMatrix &estimatedScores
|
| 210 |
+
, const InputType& source
|
| 211 |
+
, const bool deterministic = false);
|
| 212 |
+
~BackwardsEdge();
|
| 213 |
+
|
| 214 |
+
bool GetInitialized();
|
| 215 |
+
const BitmapContainer &GetBitmapContainer() const;
|
| 216 |
+
int GetDistortionPenalty();
|
| 217 |
+
void PushSuccessors(const size_t x, const size_t y);
|
| 218 |
+
};
|
| 219 |
+
|
| 220 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 221 |
+
// Bitmap Container Code
|
| 222 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 223 |
+
// A BitmapContainer encodes an ordered set of hypotheses and a set of edges
|
| 224 |
+
// pointing to the "generating" BitmapContainers. It also stores a priority
|
| 225 |
+
// queue that contains expanded hypotheses from the connected edges.
|
| 226 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 227 |
+
|
| 228 |
+
class BitmapContainer
|
| 229 |
+
{
|
| 230 |
+
private:
|
| 231 |
+
const Bitmap &m_bitmap;
|
| 232 |
+
HypothesisStackCubePruning &m_stack;
|
| 233 |
+
HypothesisSet m_hypotheses;
|
| 234 |
+
BackwardsEdgeSet m_edges;
|
| 235 |
+
HypothesisQueue m_queue;
|
| 236 |
+
size_t m_numStackInsertions;
|
| 237 |
+
bool m_deterministic;
|
| 238 |
+
|
| 239 |
+
// We always require a corresponding bitmap to be supplied.
|
| 240 |
+
BitmapContainer();
|
| 241 |
+
BitmapContainer(const BitmapContainer &);
|
| 242 |
+
public:
|
| 243 |
+
BitmapContainer(const Bitmap &bitmap
|
| 244 |
+
, HypothesisStackCubePruning &stack
|
| 245 |
+
, bool deterministic = false);
|
| 246 |
+
|
| 247 |
+
// The destructor will also delete all the edges that are
|
| 248 |
+
// connected to this BitmapContainer.
|
| 249 |
+
~BitmapContainer();
|
| 250 |
+
|
| 251 |
+
void Enqueue(int hypothesis_pos, int translation_pos, Hypothesis *hypothesis, BackwardsEdge *edge);
|
| 252 |
+
HypothesisQueueItem *Dequeue(bool keepValue=false);
|
| 253 |
+
HypothesisQueueItem *Top() const;
|
| 254 |
+
size_t Size();
|
| 255 |
+
bool Empty() const;
|
| 256 |
+
|
| 257 |
+
const Bitmap &GetWordsBitmap() const {
|
| 258 |
+
return m_bitmap;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
const HypothesisSet &GetHypotheses() const;
|
| 262 |
+
size_t GetHypothesesSize() const;
|
| 263 |
+
const BackwardsEdgeSet &GetBackwardsEdges();
|
| 264 |
+
|
| 265 |
+
void InitializeEdges();
|
| 266 |
+
void ProcessBestHypothesis();
|
| 267 |
+
void EnsureMinStackHyps(const size_t minNumHyps);
|
| 268 |
+
void AddHypothesis(Hypothesis *hypothesis);
|
| 269 |
+
void AddBackwardsEdge(BackwardsEdge *edge);
|
| 270 |
+
void SortHypotheses();
|
| 271 |
+
};
|
| 272 |
+
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
#endif
|
mosesdecoder/moses/ChartCell.cpp
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include "ChartCell.h"
|
| 24 |
+
#include "ChartCellCollection.h"
|
| 25 |
+
#include "HypergraphOutput.h"
|
| 26 |
+
#include "RuleCubeQueue.h"
|
| 27 |
+
#include "RuleCube.h"
|
| 28 |
+
#include "Range.h"
|
| 29 |
+
#include "Util.h"
|
| 30 |
+
#include "ChartTranslationOptions.h"
|
| 31 |
+
#include "ChartTranslationOptionList.h"
|
| 32 |
+
#include "ChartManager.h"
|
| 33 |
+
#include "util/exception.hh"
|
| 34 |
+
|
| 35 |
+
using namespace std;
|
| 36 |
+
|
| 37 |
+
namespace Moses
|
| 38 |
+
{
|
| 39 |
+
|
| 40 |
+
ChartCellBase::ChartCellBase(size_t startPos, size_t endPos) :
|
| 41 |
+
m_coverage(startPos, endPos),
|
| 42 |
+
m_targetLabelSet(m_coverage) {}
|
| 43 |
+
|
| 44 |
+
ChartCellBase::~ChartCellBase() {}
|
| 45 |
+
|
| 46 |
+
/** Constructor
|
| 47 |
+
* \param startPos endPos range of this cell
|
| 48 |
+
* \param manager pointer back to the manager
|
| 49 |
+
*/
|
| 50 |
+
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
|
| 51 |
+
ChartCellBase(startPos, endPos), m_manager(manager)
|
| 52 |
+
{
|
| 53 |
+
m_nBestIsEnabled = manager.options()->nbest.enabled;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
ChartCell::~ChartCell() {}
|
| 57 |
+
|
| 58 |
+
/** Add the given hypothesis to the cell.
|
| 59 |
+
* Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
|
| 60 |
+
* This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
|
| 61 |
+
* \param hypo Hypothesis to be added
|
| 62 |
+
*/
|
| 63 |
+
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
|
| 64 |
+
{
|
| 65 |
+
const Word &targetLHS = hypo->GetTargetLHS();
|
| 66 |
+
MapType::iterator m = m_hypoColl.find(targetLHS);
|
| 67 |
+
if (m == m_hypoColl.end()) {
|
| 68 |
+
std::pair<Word, ChartHypothesisCollection>
|
| 69 |
+
e(targetLHS, ChartHypothesisCollection(*m_manager.options()));
|
| 70 |
+
m = m_hypoColl.insert(e).first;
|
| 71 |
+
}
|
| 72 |
+
return m->second.AddHypothesis(hypo, m_manager);
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
/** Prune each collection in this cell to a particular size */
|
| 76 |
+
void ChartCell::PruneToSize()
|
| 77 |
+
{
|
| 78 |
+
MapType::iterator iter;
|
| 79 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 80 |
+
ChartHypothesisCollection &coll = iter->second;
|
| 81 |
+
coll.PruneToSize(m_manager);
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/** Decoding at span level: fill chart cell with hypotheses
|
| 86 |
+
* (implementation of cube pruning)
|
| 87 |
+
* \param transOptList list of applicable rules to create hypotheses for the cell
|
| 88 |
+
* \param allChartCells entire chart - needed to look up underlying hypotheses
|
| 89 |
+
*/
|
| 90 |
+
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
| 91 |
+
, const ChartCellCollection &allChartCells)
|
| 92 |
+
{
|
| 93 |
+
// priority queue for applicable rules with selected hypotheses
|
| 94 |
+
RuleCubeQueue queue(m_manager);
|
| 95 |
+
|
| 96 |
+
// add all trans opt into queue. using only 1st child node.
|
| 97 |
+
for (size_t i = 0; i < transOptList.GetSize(); ++i) {
|
| 98 |
+
const ChartTranslationOptions &transOpt = transOptList.Get(i);
|
| 99 |
+
RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
|
| 100 |
+
queue.Add(ruleCube);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// pluck things out of queue and add to hypo collection
|
| 104 |
+
const size_t popLimit = m_manager.options()->cube.pop_limit;
|
| 105 |
+
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
| 106 |
+
ChartHypothesis *hypo = queue.Pop();
|
| 107 |
+
AddHypothesis(hypo);
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
//! call SortHypotheses() in each hypo collection in this cell
|
| 112 |
+
void ChartCell::SortHypotheses()
|
| 113 |
+
{
|
| 114 |
+
UTIL_THROW_IF2(!m_targetLabelSet.Empty(), "Already sorted");
|
| 115 |
+
|
| 116 |
+
MapType::iterator iter;
|
| 117 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 118 |
+
ChartHypothesisCollection &coll = iter->second;
|
| 119 |
+
|
| 120 |
+
if (coll.GetSize()) {
|
| 121 |
+
coll.SortHypotheses();
|
| 122 |
+
m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
/** Return the highest scoring hypothesis out of all the hypo collection in this cell */
|
| 128 |
+
const ChartHypothesis *ChartCell::GetBestHypothesis() const
|
| 129 |
+
{
|
| 130 |
+
const ChartHypothesis *ret = NULL;
|
| 131 |
+
float bestScore = -std::numeric_limits<float>::infinity();
|
| 132 |
+
|
| 133 |
+
MapType::const_iterator iter;
|
| 134 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 135 |
+
const HypoList &sortedList = iter->second.GetSortedHypotheses();
|
| 136 |
+
if (sortedList.size() > 0) {
|
| 137 |
+
const ChartHypothesis *hypo = sortedList[0];
|
| 138 |
+
if (hypo->GetFutureScore() > bestScore) {
|
| 139 |
+
bestScore = hypo->GetFutureScore();
|
| 140 |
+
ret = hypo;
|
| 141 |
+
}
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
return ret;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
//! call CleanupArcList() in each hypo collection in this cell
|
| 149 |
+
void ChartCell::CleanupArcList()
|
| 150 |
+
{
|
| 151 |
+
// only necessary if n-best calculations are enabled
|
| 152 |
+
if (!m_nBestIsEnabled) return;
|
| 153 |
+
|
| 154 |
+
MapType::iterator iter;
|
| 155 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 156 |
+
ChartHypothesisCollection &coll = iter->second;
|
| 157 |
+
coll.CleanupArcList();
|
| 158 |
+
}
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
//! debug info - size of each hypo collection in this cell
|
| 162 |
+
void ChartCell::OutputSizes(std::ostream &out) const
|
| 163 |
+
{
|
| 164 |
+
MapType::const_iterator iter;
|
| 165 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 166 |
+
const Word &targetLHS = iter->first;
|
| 167 |
+
const ChartHypothesisCollection &coll = iter->second;
|
| 168 |
+
|
| 169 |
+
out << targetLHS << "=" << coll.GetSize() << " ";
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
//! debug info - total number of hypos in all hypo collection in this cell
|
| 174 |
+
size_t ChartCell::GetSize() const
|
| 175 |
+
{
|
| 176 |
+
size_t ret = 0;
|
| 177 |
+
MapType::const_iterator iter;
|
| 178 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 179 |
+
const ChartHypothesisCollection &coll = iter->second;
|
| 180 |
+
|
| 181 |
+
ret += coll.GetSize();
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
return ret;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
const HypoList *ChartCell::GetAllSortedHypotheses() const
|
| 188 |
+
{
|
| 189 |
+
HypoList *ret = new HypoList();
|
| 190 |
+
|
| 191 |
+
MapType::const_iterator iter;
|
| 192 |
+
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
| 193 |
+
const ChartHypothesisCollection &coll = iter->second;
|
| 194 |
+
const HypoList &list = coll.GetSortedHypotheses();
|
| 195 |
+
std::copy(list.begin(), list.end(), std::inserter(*ret, ret->end()));
|
| 196 |
+
}
|
| 197 |
+
return ret;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
//! call WriteSearchGraph() for each hypo collection
|
| 201 |
+
void ChartCell::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
|
| 202 |
+
{
|
| 203 |
+
MapType::const_iterator iterOutside;
|
| 204 |
+
for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
|
| 205 |
+
const ChartHypothesisCollection &coll = iterOutside->second;
|
| 206 |
+
coll.WriteSearchGraph(writer, reachable);
|
| 207 |
+
}
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
|
| 211 |
+
{
|
| 212 |
+
ChartCell::MapType::const_iterator iterOutside;
|
| 213 |
+
for (iterOutside = cell.m_hypoColl.begin(); iterOutside != cell.m_hypoColl.end(); ++iterOutside) {
|
| 214 |
+
const Word &targetLHS = iterOutside->first;
|
| 215 |
+
cerr << targetLHS << ":" << endl;
|
| 216 |
+
|
| 217 |
+
const ChartHypothesisCollection &coll = iterOutside->second;
|
| 218 |
+
cerr << coll;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
/*
|
| 222 |
+
ChartCell::HCType::const_iterator iter;
|
| 223 |
+
for (iter = cell.m_hypos.begin(); iter != cell.m_hypos.end(); ++iter)
|
| 224 |
+
{
|
| 225 |
+
const ChartHypothesis &hypo = **iter;
|
| 226 |
+
out << hypo << endl;
|
| 227 |
+
}
|
| 228 |
+
*/
|
| 229 |
+
|
| 230 |
+
return out;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
} // namespace
|
mosesdecoder/moses/ChartCellLabelSet.h
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "ChartCellLabel.h"
|
| 23 |
+
#include "NonTerminal.h"
|
| 24 |
+
#include "moses/FactorCollection.h"
|
| 25 |
+
|
| 26 |
+
#include <boost/functional/hash.hpp>
|
| 27 |
+
#include <boost/unordered_map.hpp>
|
| 28 |
+
#include <boost/version.hpp>
|
| 29 |
+
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
class ChartHypothesisCollection;
|
| 34 |
+
|
| 35 |
+
/** @todo I have no idea what's in here
|
| 36 |
+
*/
|
| 37 |
+
class ChartCellLabelSet
|
| 38 |
+
{
|
| 39 |
+
private:
|
| 40 |
+
|
| 41 |
+
typedef std::vector<ChartCellLabel*> MapType;
|
| 42 |
+
|
| 43 |
+
public:
|
| 44 |
+
typedef MapType::const_iterator const_iterator;
|
| 45 |
+
typedef MapType::iterator iterator;
|
| 46 |
+
|
| 47 |
+
ChartCellLabelSet(const Range &coverage)
|
| 48 |
+
: m_coverage(coverage)
|
| 49 |
+
, m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
|
| 50 |
+
, m_size(0) { }
|
| 51 |
+
|
| 52 |
+
~ChartCellLabelSet() {
|
| 53 |
+
RemoveAllInColl(m_map);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// TODO: skip empty elements when iterating, or deprecate this
|
| 57 |
+
const_iterator begin() const {
|
| 58 |
+
return m_map.begin();
|
| 59 |
+
}
|
| 60 |
+
const_iterator end() const {
|
| 61 |
+
return m_map.end();
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
iterator mutable_begin() {
|
| 65 |
+
return m_map.begin();
|
| 66 |
+
}
|
| 67 |
+
iterator mutable_end() {
|
| 68 |
+
return m_map.end();
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
void AddWord(const Word &w) {
|
| 72 |
+
size_t idx = w[0]->GetId();
|
| 73 |
+
if (! ChartCellExists(idx)) {
|
| 74 |
+
m_size++;
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
m_map[idx] = new ChartCellLabel(m_coverage, w);
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
// Stack is a HypoList or whatever the search algorithm uses.
|
| 82 |
+
void AddConstituent(const Word &w, const HypoList *stack) {
|
| 83 |
+
size_t idx = w[0]->GetId();
|
| 84 |
+
if (ChartCellExists(idx)) {
|
| 85 |
+
ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
|
| 86 |
+
s.cube = stack;
|
| 87 |
+
} else {
|
| 88 |
+
ChartCellLabel::Stack s;
|
| 89 |
+
s.cube = stack;
|
| 90 |
+
m_size++;
|
| 91 |
+
m_map[idx] = new ChartCellLabel(m_coverage, w, s);
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// grow vector if necessary
|
| 96 |
+
bool ChartCellExists(size_t idx) {
|
| 97 |
+
try {
|
| 98 |
+
if (m_map.at(idx) != NULL) {
|
| 99 |
+
return true;
|
| 100 |
+
}
|
| 101 |
+
} catch (const std::out_of_range& oor) {
|
| 102 |
+
m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
|
| 103 |
+
}
|
| 104 |
+
return false;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
bool Empty() const {
|
| 108 |
+
return m_size == 0;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
size_t GetSize() const {
|
| 112 |
+
return m_size;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
const ChartCellLabel *Find(const Word &w) const {
|
| 116 |
+
size_t idx = w[0]->GetId();
|
| 117 |
+
try {
|
| 118 |
+
return m_map.at(idx);
|
| 119 |
+
} catch (const std::out_of_range& oor) {
|
| 120 |
+
return NULL;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
const ChartCellLabel *Find(size_t idx) const {
|
| 125 |
+
try {
|
| 126 |
+
return m_map.at(idx);
|
| 127 |
+
} catch (const std::out_of_range& oor) {
|
| 128 |
+
return NULL;
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
ChartCellLabel::Stack &FindOrInsert(const Word &w) {
|
| 133 |
+
size_t idx = w[0]->GetId();
|
| 134 |
+
if (! ChartCellExists(idx)) {
|
| 135 |
+
m_size++;
|
| 136 |
+
m_map[idx] = new ChartCellLabel(m_coverage, w);
|
| 137 |
+
}
|
| 138 |
+
return m_map[idx]->MutableStack();
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
private:
|
| 142 |
+
const Range &m_coverage;
|
| 143 |
+
MapType m_map;
|
| 144 |
+
size_t m_size;
|
| 145 |
+
};
|
| 146 |
+
|
| 147 |
+
}
|
mosesdecoder/moses/ChartHypothesisCollection.h
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
#pragma once
|
| 22 |
+
|
| 23 |
+
#include <set>
|
| 24 |
+
#include "ChartHypothesis.h"
|
| 25 |
+
#include "RuleCube.h"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
class ChartSearchGraphWriter;
|
| 32 |
+
struct AllOptions;
|
| 33 |
+
|
| 34 |
+
//! functor to compare (chart) hypotheses by (descending) score
|
| 35 |
+
class ChartHypothesisScoreOrderer
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const {
|
| 39 |
+
return hypoA->GetFutureScore() > hypoB->GetFutureScore();
|
| 40 |
+
}
|
| 41 |
+
};
|
| 42 |
+
|
| 43 |
+
/** Contains a set of unique hypos that have the same HS non-term.
|
| 44 |
+
* ie. 1 of these for each target LHS in each cell
|
| 45 |
+
*/
|
| 46 |
+
class ChartHypothesisCollection
|
| 47 |
+
{
|
| 48 |
+
friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&);
|
| 49 |
+
|
| 50 |
+
protected:
|
| 51 |
+
//typedef std::set<ChartHypothesis*, ChartHypothesisRecombinationOrderer> HCType;
|
| 52 |
+
typedef boost::unordered_set< ChartHypothesis*, UnorderedComparer<ChartHypothesis>, UnorderedComparer<ChartHypothesis> > HCType;
|
| 53 |
+
HCType m_hypos;
|
| 54 |
+
HypoList m_hyposOrdered;
|
| 55 |
+
|
| 56 |
+
float m_bestScore; /**< score of the best hypothesis in collection */
|
| 57 |
+
float m_beamWidth; /**< minimum score due to threashold pruning */
|
| 58 |
+
size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
|
| 59 |
+
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
|
| 60 |
+
|
| 61 |
+
std::pair<HCType::iterator, bool> Add(ChartHypothesis *hypo, ChartManager &manager);
|
| 62 |
+
|
| 63 |
+
public:
|
| 64 |
+
typedef HCType::iterator iterator;
|
| 65 |
+
typedef HCType::const_iterator const_iterator;
|
| 66 |
+
//! iterators
|
| 67 |
+
const_iterator begin() const {
|
| 68 |
+
return m_hypos.begin();
|
| 69 |
+
}
|
| 70 |
+
const_iterator end() const {
|
| 71 |
+
return m_hypos.end();
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
ChartHypothesisCollection(AllOptions const& opts);
|
| 75 |
+
~ChartHypothesisCollection();
|
| 76 |
+
bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);
|
| 77 |
+
|
| 78 |
+
void Detach(const HCType::iterator &iter);
|
| 79 |
+
void Remove(const HCType::iterator &iter);
|
| 80 |
+
|
| 81 |
+
void PruneToSize(ChartManager &manager);
|
| 82 |
+
|
| 83 |
+
size_t GetSize() const {
|
| 84 |
+
return m_hypos.size();
|
| 85 |
+
}
|
| 86 |
+
size_t GetHypo() const {
|
| 87 |
+
return m_hypos.size();
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
void SortHypotheses();
|
| 91 |
+
void CleanupArcList();
|
| 92 |
+
|
| 93 |
+
//! return vector of hypothesis that has been sorted by score
|
| 94 |
+
const HypoList &GetSortedHypotheses() const {
|
| 95 |
+
return m_hyposOrdered;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
//! return the best total score of all hypos in this collection
|
| 99 |
+
float GetBestScore() const {
|
| 100 |
+
return m_bestScore;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
|
| 104 |
+
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
} // namespace
|
| 108 |
+
|
mosesdecoder/moses/ChartManager.cpp
ADDED
|
@@ -0,0 +1,867 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <cstdio>
|
| 23 |
+
#include "ChartManager.h"
|
| 24 |
+
#include "ChartCell.h"
|
| 25 |
+
#include "ChartHypothesis.h"
|
| 26 |
+
#include "ChartKBestExtractor.h"
|
| 27 |
+
#include "ChartTranslationOptions.h"
|
| 28 |
+
#include "HypergraphOutput.h"
|
| 29 |
+
#include "StaticData.h"
|
| 30 |
+
#include "DecodeStep.h"
|
| 31 |
+
#include "TreeInput.h"
|
| 32 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 33 |
+
#include "moses/FF/WordPenaltyProducer.h"
|
| 34 |
+
#include "moses/OutputCollector.h"
|
| 35 |
+
#include "moses/ChartKBestExtractor.h"
|
| 36 |
+
#include "moses/HypergraphOutput.h"
|
| 37 |
+
#include "moses/TranslationTask.h"
|
| 38 |
+
|
| 39 |
+
using namespace std;
|
| 40 |
+
|
| 41 |
+
namespace Moses
|
| 42 |
+
{
|
| 43 |
+
|
| 44 |
+
/* constructor. Initialize everything prior to decoding a particular sentence.
|
| 45 |
+
* \param source the sentence to be decoded
|
| 46 |
+
* \param system which particular set of models to use.
|
| 47 |
+
*/
|
| 48 |
+
ChartManager::ChartManager(ttasksptr const& ttask)
|
| 49 |
+
: BaseManager(ttask)
|
| 50 |
+
, m_hypoStackColl(m_source, *this)
|
| 51 |
+
, m_start(clock())
|
| 52 |
+
, m_hypothesisId(0)
|
| 53 |
+
, m_parser(ttask, m_hypoStackColl)
|
| 54 |
+
, m_translationOptionList(ttask->options()->syntax.rule_limit, m_source)
|
| 55 |
+
{ }
|
| 56 |
+
|
| 57 |
+
ChartManager::~ChartManager()
|
| 58 |
+
{
|
| 59 |
+
clock_t end = clock();
|
| 60 |
+
float et = (end - m_start);
|
| 61 |
+
et /= (float)CLOCKS_PER_SEC;
|
| 62 |
+
VERBOSE(1, "Translation took " << et << " seconds" << endl);
|
| 63 |
+
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
//! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
|
| 67 |
+
void ChartManager::Decode()
|
| 68 |
+
{
|
| 69 |
+
|
| 70 |
+
VERBOSE(1,"Translating: " << m_source << endl);
|
| 71 |
+
|
| 72 |
+
ResetSentenceStats(m_source);
|
| 73 |
+
|
| 74 |
+
VERBOSE(2,"Decoding: " << endl);
|
| 75 |
+
//ChartHypothesis::ResetHypoCount();
|
| 76 |
+
|
| 77 |
+
AddXmlChartOptions();
|
| 78 |
+
|
| 79 |
+
// MAIN LOOP
|
| 80 |
+
size_t size = m_source.GetSize();
|
| 81 |
+
for (int startPos = size-1; startPos >= 0; --startPos) {
|
| 82 |
+
for (size_t width = 1; width <= size-startPos; ++width) {
|
| 83 |
+
size_t endPos = startPos + width - 1;
|
| 84 |
+
Range range(startPos, endPos);
|
| 85 |
+
|
| 86 |
+
// create trans opt
|
| 87 |
+
m_translationOptionList.Clear();
|
| 88 |
+
m_parser.Create(range, m_translationOptionList);
|
| 89 |
+
m_translationOptionList.ApplyThreshold(options()->search.trans_opt_threshold);
|
| 90 |
+
|
| 91 |
+
const InputPath &inputPath = m_parser.GetInputPath(range);
|
| 92 |
+
m_translationOptionList.EvaluateWithSourceContext(m_source, inputPath);
|
| 93 |
+
|
| 94 |
+
// decode
|
| 95 |
+
ChartCell &cell = m_hypoStackColl.Get(range);
|
| 96 |
+
cell.Decode(m_translationOptionList, m_hypoStackColl);
|
| 97 |
+
|
| 98 |
+
m_translationOptionList.Clear();
|
| 99 |
+
cell.PruneToSize();
|
| 100 |
+
cell.CleanupArcList();
|
| 101 |
+
cell.SortHypotheses();
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
IFVERBOSE(1) {
|
| 106 |
+
|
| 107 |
+
for (size_t startPos = 0; startPos < size; ++startPos) {
|
| 108 |
+
cerr.width(3);
|
| 109 |
+
cerr << startPos << " ";
|
| 110 |
+
}
|
| 111 |
+
cerr << endl;
|
| 112 |
+
for (size_t width = 1; width <= size; width++) {
|
| 113 |
+
for( size_t space = 0; space < width-1; space++ ) {
|
| 114 |
+
cerr << " ";
|
| 115 |
+
}
|
| 116 |
+
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
| 117 |
+
Range range(startPos, startPos+width-1);
|
| 118 |
+
cerr.width(3);
|
| 119 |
+
cerr << m_hypoStackColl.Get(range).GetSize() << " ";
|
| 120 |
+
}
|
| 121 |
+
cerr << endl;
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
/** add specific translation options and hypotheses according to the XML override translation scheme.
|
| 127 |
+
* Doesn't seem to do anything about walls and zones.
|
| 128 |
+
* @todo check walls & zones. Check that the implementation doesn't leak, xml options sometimes does if you're not careful
|
| 129 |
+
*/
|
| 130 |
+
void ChartManager::AddXmlChartOptions()
|
| 131 |
+
{
|
| 132 |
+
const std::vector <ChartTranslationOptions*> xmlChartOptionsList
|
| 133 |
+
= m_source.GetXmlChartTranslationOptions();
|
| 134 |
+
IFVERBOSE(2) {
|
| 135 |
+
cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl;
|
| 136 |
+
}
|
| 137 |
+
if (xmlChartOptionsList.size() == 0) return;
|
| 138 |
+
|
| 139 |
+
typedef std::vector<ChartTranslationOptions*>::const_iterator citer;
|
| 140 |
+
for(citer i = xmlChartOptionsList.begin(); i != xmlChartOptionsList.end(); ++i) {
|
| 141 |
+
ChartTranslationOptions* opt = *i;
|
| 142 |
+
|
| 143 |
+
const Range &range = opt->GetSourceWordsRange();
|
| 144 |
+
|
| 145 |
+
RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
|
| 146 |
+
ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
|
| 147 |
+
hypo->EvaluateWhenApplied();
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
ChartCell &cell = m_hypoStackColl.Get(range);
|
| 151 |
+
cell.AddHypothesis(hypo);
|
| 152 |
+
}
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
//! get best complete translation from the top chart cell.
|
| 156 |
+
const ChartHypothesis *ChartManager::GetBestHypothesis() const
|
| 157 |
+
{
|
| 158 |
+
size_t size = m_source.GetSize();
|
| 159 |
+
|
| 160 |
+
if (size == 0) // empty source
|
| 161 |
+
return NULL;
|
| 162 |
+
else {
|
| 163 |
+
Range range(0, size-1);
|
| 164 |
+
const ChartCell &lastCell = m_hypoStackColl.Get(range);
|
| 165 |
+
return lastCell.GetBestHypothesis();
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/** Calculate the n-best paths through the output hypergraph.
|
| 170 |
+
* Return the list of paths with the variable ret
|
| 171 |
+
* \param n how may paths to return
|
| 172 |
+
* \param ret return argument
|
| 173 |
+
* \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
|
| 174 |
+
*/
|
| 175 |
+
void ChartManager::CalcNBest(
|
| 176 |
+
std::size_t n,
|
| 177 |
+
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
|
| 178 |
+
bool onlyDistinct) const
|
| 179 |
+
{
|
| 180 |
+
nBestList.clear();
|
| 181 |
+
if (n == 0 || m_source.GetSize() == 0) {
|
| 182 |
+
return;
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// Get the list of top-level hypotheses, sorted by score.
|
| 186 |
+
Range range(0, m_source.GetSize()-1);
|
| 187 |
+
const ChartCell &lastCell = m_hypoStackColl.Get(range);
|
| 188 |
+
boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
|
| 189 |
+
lastCell.GetAllSortedHypotheses());
|
| 190 |
+
if (!topLevelHypos) {
|
| 191 |
+
return;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
ChartKBestExtractor extractor;
|
| 195 |
+
|
| 196 |
+
if (!onlyDistinct) {
|
| 197 |
+
// Return the n-best list as is, including duplicate translations.
|
| 198 |
+
extractor.Extract(*topLevelHypos, n, nBestList);
|
| 199 |
+
return;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
// Determine how many derivations to extract. If the n-best list is
|
| 203 |
+
// restricted to distinct translations then this limit should be bigger
|
| 204 |
+
// than n. The n-best factor determines how much bigger the limit should be,
|
| 205 |
+
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
| 206 |
+
// too many translations are identical.
|
| 207 |
+
const std::size_t nBestFactor = options()->nbest.factor;
|
| 208 |
+
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
|
| 209 |
+
|
| 210 |
+
// Extract the derivations.
|
| 211 |
+
ChartKBestExtractor::KBestVec bigList;
|
| 212 |
+
bigList.reserve(numDerivations);
|
| 213 |
+
extractor.Extract(*topLevelHypos, numDerivations, bigList);
|
| 214 |
+
|
| 215 |
+
// Copy derivations into nBestList, skipping ones with repeated translations.
|
| 216 |
+
std::set<Phrase> distinct;
|
| 217 |
+
for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
|
| 218 |
+
nBestList.size() < n && p != bigList.end(); ++p) {
|
| 219 |
+
boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
|
| 220 |
+
Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
|
| 221 |
+
if (distinct.insert(translation).second) {
|
| 222 |
+
nBestList.push_back(derivation);
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
void ChartManager::WriteSearchGraph(const ChartSearchGraphWriter& writer) const
|
| 228 |
+
{
|
| 229 |
+
|
| 230 |
+
size_t size = m_source.GetSize();
|
| 231 |
+
|
| 232 |
+
// which hypotheses are reachable?
|
| 233 |
+
std::map<unsigned,bool> reachable;
|
| 234 |
+
Range fullRange(0, size-1);
|
| 235 |
+
const ChartCell &lastCell = m_hypoStackColl.Get(fullRange);
|
| 236 |
+
const ChartHypothesis *hypo = lastCell.GetBestHypothesis();
|
| 237 |
+
|
| 238 |
+
if (hypo == NULL) {
|
| 239 |
+
// no hypothesis
|
| 240 |
+
return;
|
| 241 |
+
}
|
| 242 |
+
size_t winners = 0;
|
| 243 |
+
size_t losers = 0;
|
| 244 |
+
|
| 245 |
+
FindReachableHypotheses( hypo, reachable, &winners, &losers);
|
| 246 |
+
writer.WriteHeader(winners, losers);
|
| 247 |
+
|
| 248 |
+
for (size_t width = 1; width <= size; ++width) {
|
| 249 |
+
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
| 250 |
+
size_t endPos = startPos + width - 1;
|
| 251 |
+
Range range(startPos, endPos);
|
| 252 |
+
TRACE_ERR(" " << range << "=");
|
| 253 |
+
|
| 254 |
+
const ChartCell &cell = m_hypoStackColl.Get(range);
|
| 255 |
+
cell.WriteSearchGraph(writer, reachable);
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
void ChartManager::FindReachableHypotheses(
|
| 261 |
+
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable, size_t* winners, size_t* losers) const
|
| 262 |
+
{
|
| 263 |
+
// do not recurse, if already visited
|
| 264 |
+
if (reachable.find(hypo->GetId()) != reachable.end()) {
|
| 265 |
+
return;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
// recurse
|
| 269 |
+
reachable[ hypo->GetId() ] = true;
|
| 270 |
+
if (hypo->GetWinningHypothesis() == hypo) {
|
| 271 |
+
(*winners)++;
|
| 272 |
+
} else {
|
| 273 |
+
(*losers)++;
|
| 274 |
+
}
|
| 275 |
+
const std::vector<const ChartHypothesis*> &previous = hypo->GetPrevHypos();
|
| 276 |
+
for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i) {
|
| 277 |
+
FindReachableHypotheses( *i, reachable, winners, losers );
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
// also loop over recombined hypotheses (arcs)
|
| 281 |
+
const ChartArcList *arcList = hypo->GetArcList();
|
| 282 |
+
if (arcList) {
|
| 283 |
+
ChartArcList::const_iterator iterArc;
|
| 284 |
+
for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
|
| 285 |
+
const ChartHypothesis &arc = **iterArc;
|
| 286 |
+
FindReachableHypotheses( &arc, reachable, winners, losers );
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
void
|
| 292 |
+
ChartManager::
|
| 293 |
+
OutputSearchGraphAsHypergraph(std::ostream& out) const
|
| 294 |
+
{
|
| 295 |
+
ChartSearchGraphWriterHypergraph writer(options(), &out);
|
| 296 |
+
WriteSearchGraph(writer);
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
|
| 300 |
+
{
|
| 301 |
+
ChartSearchGraphWriterMoses writer(options(), &outputSearchGraphStream,
|
| 302 |
+
m_source.GetTranslationId());
|
| 303 |
+
WriteSearchGraph(writer);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
void ChartManager::OutputBest(OutputCollector *collector) const
|
| 307 |
+
{
|
| 308 |
+
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
| 309 |
+
if (collector && bestHypo) {
|
| 310 |
+
const size_t translationId = m_source.GetTranslationId();
|
| 311 |
+
const ChartHypothesis *bestHypo = GetBestHypothesis();
|
| 312 |
+
OutputBestHypo(collector, bestHypo, translationId);
|
| 313 |
+
}
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
void ChartManager::OutputNBest(OutputCollector *collector) const
|
| 317 |
+
{
|
| 318 |
+
size_t nBestSize = options()->nbest.nbest_size;
|
| 319 |
+
if (nBestSize > 0) {
|
| 320 |
+
const size_t translationId = m_source.GetTranslationId();
|
| 321 |
+
|
| 322 |
+
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
|
| 323 |
+
<< options()->nbest.output_file_path << endl);
|
| 324 |
+
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
| 325 |
+
CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
|
| 326 |
+
OutputNBestList(collector, nBestList, translationId);
|
| 327 |
+
IFVERBOSE(2) {
|
| 328 |
+
PrintUserTime("N-Best Hypotheses Generation Time:");
|
| 329 |
+
}
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
void ChartManager::OutputNBestList(OutputCollector *collector,
|
| 335 |
+
const ChartKBestExtractor::KBestVec &nBestList,
|
| 336 |
+
long translationId) const
|
| 337 |
+
{
|
| 338 |
+
std::ostringstream out;
|
| 339 |
+
|
| 340 |
+
if (collector->OutputIsCout()) {
|
| 341 |
+
// Set precision only if we're writing the n-best list to cout. This is to
|
| 342 |
+
// preserve existing behaviour, but should probably be done either way.
|
| 343 |
+
FixPrecision(out);
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
NBestOptions const& nbo = options()->nbest;
|
| 347 |
+
bool includeWordAlignment = nbo.include_alignment_info;
|
| 348 |
+
bool PrintNBestTrees = nbo.print_trees;
|
| 349 |
+
|
| 350 |
+
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
| 351 |
+
p != nBestList.end(); ++p) {
|
| 352 |
+
const ChartKBestExtractor::Derivation &derivation = **p;
|
| 353 |
+
|
| 354 |
+
// get the derivation's target-side yield
|
| 355 |
+
Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
|
| 356 |
+
|
| 357 |
+
// delete <s> and </s>
|
| 358 |
+
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
| 359 |
+
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
| 360 |
+
outputPhrase.RemoveWord(0);
|
| 361 |
+
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
| 362 |
+
|
| 363 |
+
// print the translation ID, surface factors, and scores
|
| 364 |
+
out << translationId << " ||| ";
|
| 365 |
+
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
|
| 366 |
+
out << " ||| ";
|
| 367 |
+
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
|
| 368 |
+
bool with_labels = options()->nbest.include_feature_labels;
|
| 369 |
+
scoreBreakdown->OutputAllFeatureScores(out, with_labels);
|
| 370 |
+
out << " ||| " << derivation.score;
|
| 371 |
+
|
| 372 |
+
// optionally, print word alignments
|
| 373 |
+
if (includeWordAlignment) {
|
| 374 |
+
out << " ||| ";
|
| 375 |
+
Alignments align;
|
| 376 |
+
OutputAlignmentNBest(align, derivation, 0);
|
| 377 |
+
for (Alignments::const_iterator q = align.begin(); q != align.end();
|
| 378 |
+
++q) {
|
| 379 |
+
out << q->first << "-" << q->second << " ";
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
// optionally, print tree
|
| 384 |
+
if (PrintNBestTrees) {
|
| 385 |
+
TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
|
| 386 |
+
out << " ||| " << tree->GetString();
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
out << std::endl;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
assert(collector);
|
| 393 |
+
collector->Write(translationId, out.str());
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
|
| 397 |
+
{
|
| 398 |
+
size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
|
| 399 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 400 |
+
for (size_t i = 0; i < prevHypos.size(); ++i) {
|
| 401 |
+
size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
|
| 402 |
+
ret -= (childSize - 1);
|
| 403 |
+
}
|
| 404 |
+
return ret;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
size_t ChartManager::OutputAlignmentNBest(
|
| 408 |
+
Alignments &retAlign,
|
| 409 |
+
const Moses::ChartKBestExtractor::Derivation &derivation,
|
| 410 |
+
size_t startTarget) const
|
| 411 |
+
{
|
| 412 |
+
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
|
| 413 |
+
|
| 414 |
+
size_t totalTargetSize = 0;
|
| 415 |
+
size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
|
| 416 |
+
|
| 417 |
+
const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
|
| 418 |
+
|
| 419 |
+
size_t thisSourceSize = CalcSourceSize(&hypo);
|
| 420 |
+
|
| 421 |
+
// position of each terminal word in translation rule, irrespective of alignment
|
| 422 |
+
// if non-term, number is undefined
|
| 423 |
+
vector<size_t> sourceOffsets(thisSourceSize, 0);
|
| 424 |
+
vector<size_t> targetOffsets(tp.GetSize(), 0);
|
| 425 |
+
|
| 426 |
+
const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
|
| 427 |
+
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
| 428 |
+
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
|
| 429 |
+
|
| 430 |
+
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
|
| 431 |
+
"Error");
|
| 432 |
+
|
| 433 |
+
size_t targetInd = 0;
|
| 434 |
+
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
| 435 |
+
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
| 436 |
+
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
| 437 |
+
size_t sourceInd = targetPos2SourceInd[targetPos];
|
| 438 |
+
size_t sourcePos = sourceInd2pos[sourceInd];
|
| 439 |
+
|
| 440 |
+
const Moses::ChartKBestExtractor::Derivation &subderivation =
|
| 441 |
+
*derivation.subderivations[sourceInd];
|
| 442 |
+
|
| 443 |
+
// calc source size
|
| 444 |
+
size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
|
| 445 |
+
sourceOffsets[sourcePos] = sourceSize;
|
| 446 |
+
|
| 447 |
+
// calc target size.
|
| 448 |
+
// Recursively look thru child hypos
|
| 449 |
+
size_t currStartTarget = startTarget + totalTargetSize;
|
| 450 |
+
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
|
| 451 |
+
currStartTarget);
|
| 452 |
+
targetOffsets[targetPos] = targetSize;
|
| 453 |
+
|
| 454 |
+
totalTargetSize += targetSize;
|
| 455 |
+
++targetInd;
|
| 456 |
+
} else {
|
| 457 |
+
++totalTargetSize;
|
| 458 |
+
}
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
// convert position within translation rule to absolute position within
|
| 462 |
+
// source sentence / output sentence
|
| 463 |
+
ShiftOffsets(sourceOffsets, startSource);
|
| 464 |
+
ShiftOffsets(targetOffsets, startTarget);
|
| 465 |
+
|
| 466 |
+
// get alignments from this hypo
|
| 467 |
+
const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
|
| 468 |
+
|
| 469 |
+
// add to output arg, offsetting by source & target
|
| 470 |
+
AlignmentInfo::const_iterator iter;
|
| 471 |
+
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
| 472 |
+
const std::pair<size_t,size_t> &align = *iter;
|
| 473 |
+
size_t relSource = align.first;
|
| 474 |
+
size_t relTarget = align.second;
|
| 475 |
+
size_t absSource = sourceOffsets[relSource];
|
| 476 |
+
size_t absTarget = targetOffsets[relTarget];
|
| 477 |
+
|
| 478 |
+
pair<size_t, size_t> alignPoint(absSource, absTarget);
|
| 479 |
+
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
| 480 |
+
UTIL_THROW_IF2(!ret.second, "Error");
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
return totalTargetSize;
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
void ChartManager::OutputAlignment(OutputCollector *collector) const
|
| 487 |
+
{
|
| 488 |
+
if (collector == NULL) {
|
| 489 |
+
return;
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
ostringstream out;
|
| 493 |
+
|
| 494 |
+
const ChartHypothesis *hypo = GetBestHypothesis();
|
| 495 |
+
if (hypo) {
|
| 496 |
+
Alignments retAlign;
|
| 497 |
+
OutputAlignment(retAlign, hypo, 0);
|
| 498 |
+
|
| 499 |
+
// output alignments
|
| 500 |
+
Alignments::const_iterator iter;
|
| 501 |
+
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
|
| 502 |
+
const pair<size_t, size_t> &alignPoint = *iter;
|
| 503 |
+
out << alignPoint.first << "-" << alignPoint.second << " ";
|
| 504 |
+
}
|
| 505 |
+
}
|
| 506 |
+
out << endl;
|
| 507 |
+
|
| 508 |
+
collector->Write(m_source.GetTranslationId(), out.str());
|
| 509 |
+
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
size_t ChartManager::OutputAlignment(Alignments &retAlign,
|
| 513 |
+
const Moses::ChartHypothesis *hypo,
|
| 514 |
+
size_t startTarget) const
|
| 515 |
+
{
|
| 516 |
+
size_t totalTargetSize = 0;
|
| 517 |
+
size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
|
| 518 |
+
|
| 519 |
+
const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
|
| 520 |
+
|
| 521 |
+
size_t thisSourceSize = CalcSourceSize(hypo);
|
| 522 |
+
|
| 523 |
+
// position of each terminal word in translation rule, irrespective of alignment
|
| 524 |
+
// if non-term, number is undefined
|
| 525 |
+
vector<size_t> sourceOffsets(thisSourceSize, 0);
|
| 526 |
+
vector<size_t> targetOffsets(tp.GetSize(), 0);
|
| 527 |
+
|
| 528 |
+
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 529 |
+
|
| 530 |
+
const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
|
| 531 |
+
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
| 532 |
+
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
|
| 533 |
+
|
| 534 |
+
UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
|
| 535 |
+
|
| 536 |
+
size_t targetInd = 0;
|
| 537 |
+
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
| 538 |
+
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
| 539 |
+
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
| 540 |
+
size_t sourceInd = targetPos2SourceInd[targetPos];
|
| 541 |
+
size_t sourcePos = sourceInd2pos[sourceInd];
|
| 542 |
+
|
| 543 |
+
const ChartHypothesis *prevHypo = prevHypos[sourceInd];
|
| 544 |
+
|
| 545 |
+
// calc source size
|
| 546 |
+
size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
|
| 547 |
+
sourceOffsets[sourcePos] = sourceSize;
|
| 548 |
+
|
| 549 |
+
// calc target size.
|
| 550 |
+
// Recursively look thru child hypos
|
| 551 |
+
size_t currStartTarget = startTarget + totalTargetSize;
|
| 552 |
+
size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
|
| 553 |
+
targetOffsets[targetPos] = targetSize;
|
| 554 |
+
|
| 555 |
+
totalTargetSize += targetSize;
|
| 556 |
+
++targetInd;
|
| 557 |
+
} else {
|
| 558 |
+
++totalTargetSize;
|
| 559 |
+
}
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
// convert position within translation rule to absolute position within
|
| 563 |
+
// source sentence / output sentence
|
| 564 |
+
ShiftOffsets(sourceOffsets, startSource);
|
| 565 |
+
ShiftOffsets(targetOffsets, startTarget);
|
| 566 |
+
|
| 567 |
+
// get alignments from this hypo
|
| 568 |
+
const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
|
| 569 |
+
|
| 570 |
+
// add to output arg, offsetting by source & target
|
| 571 |
+
AlignmentInfo::const_iterator iter;
|
| 572 |
+
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
| 573 |
+
const std::pair<size_t,size_t> &align = *iter;
|
| 574 |
+
size_t relSource = align.first;
|
| 575 |
+
size_t relTarget = align.second;
|
| 576 |
+
size_t absSource = sourceOffsets[relSource];
|
| 577 |
+
size_t absTarget = targetOffsets[relTarget];
|
| 578 |
+
|
| 579 |
+
pair<size_t, size_t> alignPoint(absSource, absTarget);
|
| 580 |
+
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
| 581 |
+
UTIL_THROW_IF2(!ret.second, "Error");
|
| 582 |
+
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
return totalTargetSize;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
|
| 589 |
+
{
|
| 590 |
+
if (collector) {
|
| 591 |
+
OutputDetailedTranslationReport(collector,
|
| 592 |
+
GetBestHypothesis(),
|
| 593 |
+
static_cast<const Sentence&>(m_source),
|
| 594 |
+
m_source.GetTranslationId());
|
| 595 |
+
}
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
void ChartManager::OutputDetailedTranslationReport(
|
| 599 |
+
OutputCollector *collector,
|
| 600 |
+
const ChartHypothesis *hypo,
|
| 601 |
+
const Sentence &sentence,
|
| 602 |
+
long translationId) const
|
| 603 |
+
{
|
| 604 |
+
if (hypo == NULL) {
|
| 605 |
+
return;
|
| 606 |
+
}
|
| 607 |
+
std::ostringstream out;
|
| 608 |
+
ApplicationContext applicationContext;
|
| 609 |
+
|
| 610 |
+
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
| 611 |
+
collector->Write(translationId, out.str());
|
| 612 |
+
|
| 613 |
+
//DIMw
|
| 614 |
+
if (options()->output.detailed_all_transrep_filepath.size()) {
|
| 615 |
+
const Sentence &sentence = static_cast<const Sentence &>(m_source);
|
| 616 |
+
size_t nBestSize = options()->nbest.nbest_size;
|
| 617 |
+
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
| 618 |
+
CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
|
| 619 |
+
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
void ChartManager::OutputTranslationOptions(std::ostream &out,
|
| 625 |
+
ApplicationContext &applicationContext,
|
| 626 |
+
const ChartHypothesis *hypo,
|
| 627 |
+
const Sentence &sentence,
|
| 628 |
+
long translationId) const
|
| 629 |
+
{
|
| 630 |
+
if (hypo != NULL) {
|
| 631 |
+
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
| 632 |
+
out << std::endl;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
// recursive
|
| 636 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 637 |
+
std::vector<const ChartHypothesis*>::const_iterator iter;
|
| 638 |
+
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
| 639 |
+
const ChartHypothesis *prevHypo = *iter;
|
| 640 |
+
OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
| 641 |
+
}
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
void ChartManager::OutputTranslationOption(std::ostream &out,
|
| 645 |
+
ApplicationContext &applicationContext,
|
| 646 |
+
const ChartHypothesis *hypo,
|
| 647 |
+
const Sentence &sentence,
|
| 648 |
+
long translationId) const
|
| 649 |
+
{
|
| 650 |
+
ReconstructApplicationContext(*hypo, sentence, applicationContext);
|
| 651 |
+
out << "Trans Opt " << translationId
|
| 652 |
+
<< " " << hypo->GetCurrSourceRange()
|
| 653 |
+
<< ": ";
|
| 654 |
+
WriteApplicationContext(out, applicationContext);
|
| 655 |
+
out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
|
| 656 |
+
<< "->" << hypo->GetCurrTargetPhrase()
|
| 657 |
+
<< " " << hypo->GetFutureScore() << hypo->GetScoreBreakdown();
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
// Given a hypothesis and sentence, reconstructs the 'application context' --
|
| 661 |
+
// the source RHS symbols of the SCFG rule that was applied, plus their spans.
|
| 662 |
+
void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
|
| 663 |
+
const Sentence &sentence,
|
| 664 |
+
ApplicationContext &context) const
|
| 665 |
+
{
|
| 666 |
+
context.clear();
|
| 667 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
|
| 668 |
+
std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
|
| 669 |
+
std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
|
| 670 |
+
const Range &span = hypo.GetCurrSourceRange();
|
| 671 |
+
size_t i = span.GetStartPos();
|
| 672 |
+
while (i <= span.GetEndPos()) {
|
| 673 |
+
if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
|
| 674 |
+
// Symbol is a terminal.
|
| 675 |
+
const Word &symbol = sentence.GetWord(i);
|
| 676 |
+
context.push_back(std::make_pair(symbol, Range(i, i)));
|
| 677 |
+
++i;
|
| 678 |
+
} else {
|
| 679 |
+
// Symbol is a non-terminal.
|
| 680 |
+
const Word &symbol = (*p)->GetTargetLHS();
|
| 681 |
+
const Range &range = (*p)->GetCurrSourceRange();
|
| 682 |
+
context.push_back(std::make_pair(symbol, range));
|
| 683 |
+
i = range.GetEndPos()+1;
|
| 684 |
+
++p;
|
| 685 |
+
}
|
| 686 |
+
}
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
void ChartManager::OutputUnknowns(OutputCollector *collector) const
|
| 690 |
+
{
|
| 691 |
+
if (collector) {
|
| 692 |
+
long translationId = m_source.GetTranslationId();
|
| 693 |
+
const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
|
| 694 |
+
|
| 695 |
+
std::ostringstream out;
|
| 696 |
+
for (std::vector<Phrase*>::const_iterator p = oovs.begin();
|
| 697 |
+
p != oovs.end(); ++p) {
|
| 698 |
+
out << **p;
|
| 699 |
+
}
|
| 700 |
+
out << std::endl;
|
| 701 |
+
collector->Write(translationId, out.str());
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
|
| 707 |
+
{
|
| 708 |
+
const ChartHypothesis *hypo = GetBestHypothesis();
|
| 709 |
+
if (collector == NULL || hypo == NULL) {
|
| 710 |
+
return;
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
std::ostringstream out;
|
| 714 |
+
ApplicationContext applicationContext;
|
| 715 |
+
|
| 716 |
+
const Sentence &sentence = static_cast<const Sentence &>(m_source);
|
| 717 |
+
const size_t translationId = m_source.GetTranslationId();
|
| 718 |
+
|
| 719 |
+
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
| 720 |
+
|
| 721 |
+
//Tree of full sentence
|
| 722 |
+
const StatefulFeatureFunction* treeStructure;
|
| 723 |
+
treeStructure = StaticData::Instance().GetTreeStructure();
|
| 724 |
+
if (treeStructure != NULL) {
|
| 725 |
+
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 726 |
+
for( size_t i=0; i<sff.size(); i++ ) {
|
| 727 |
+
if (sff[i] == treeStructure) {
|
| 728 |
+
const TreeState* tree = static_cast<const TreeState*>(hypo->GetFFState(i));
|
| 729 |
+
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
|
| 730 |
+
break;
|
| 731 |
+
}
|
| 732 |
+
}
|
| 733 |
+
}
|
| 734 |
+
|
| 735 |
+
collector->Write(translationId, out.str());
|
| 736 |
+
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
| 740 |
+
ApplicationContext &applicationContext,
|
| 741 |
+
const ChartHypothesis *hypo,
|
| 742 |
+
const Sentence &sentence,
|
| 743 |
+
long translationId) const
|
| 744 |
+
{
|
| 745 |
+
|
| 746 |
+
if (hypo != NULL) {
|
| 747 |
+
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
| 748 |
+
|
| 749 |
+
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
|
| 750 |
+
|
| 751 |
+
out << " ||| ";
|
| 752 |
+
if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
|
| 753 |
+
out << " " << *property->GetValueString();
|
| 754 |
+
} else {
|
| 755 |
+
out << " " << "noTreeInfo";
|
| 756 |
+
}
|
| 757 |
+
out << std::endl;
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
// recursive
|
| 761 |
+
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 762 |
+
std::vector<const ChartHypothesis*>::const_iterator iter;
|
| 763 |
+
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
| 764 |
+
const ChartHypothesis *prevHypo = *iter;
|
| 765 |
+
OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
| 766 |
+
}
|
| 767 |
+
}
|
| 768 |
+
|
| 769 |
+
void ChartManager::OutputSearchGraph(OutputCollector *collector) const
|
| 770 |
+
{
|
| 771 |
+
if (collector) {
|
| 772 |
+
long translationId = m_source.GetTranslationId();
|
| 773 |
+
std::ostringstream out;
|
| 774 |
+
OutputSearchGraphMoses( out);
|
| 775 |
+
collector->Write(translationId, out.str());
|
| 776 |
+
}
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
//DIMw
|
| 780 |
+
void ChartManager::OutputDetailedAllTranslationReport(
|
| 781 |
+
OutputCollector *collector,
|
| 782 |
+
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
| 783 |
+
const Sentence &sentence,
|
| 784 |
+
long translationId) const
|
| 785 |
+
{
|
| 786 |
+
std::ostringstream out;
|
| 787 |
+
ApplicationContext applicationContext;
|
| 788 |
+
|
| 789 |
+
const ChartCellCollection& cells = GetChartCellCollection();
|
| 790 |
+
size_t size = GetSource().GetSize();
|
| 791 |
+
for (size_t width = 1; width <= size; ++width) {
|
| 792 |
+
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
| 793 |
+
size_t endPos = startPos + width - 1;
|
| 794 |
+
Range range(startPos, endPos);
|
| 795 |
+
const ChartCell& cell = cells.Get(range);
|
| 796 |
+
const HypoList* hyps = cell.GetAllSortedHypotheses();
|
| 797 |
+
out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
|
| 798 |
+
HypoList::const_iterator iter;
|
| 799 |
+
size_t c = 1;
|
| 800 |
+
for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
|
| 801 |
+
out << "----------------Item " << c++ << " ---------------------"
|
| 802 |
+
<< endl;
|
| 803 |
+
OutputTranslationOptions(out, applicationContext, *iter,
|
| 804 |
+
sentence, translationId);
|
| 805 |
+
}
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
collector->Write(translationId, out.str());
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
|
| 812 |
+
{
|
| 813 |
+
if (!collector)
|
| 814 |
+
return;
|
| 815 |
+
std::ostringstream out;
|
| 816 |
+
FixPrecision(out);
|
| 817 |
+
if (hypo != NULL) {
|
| 818 |
+
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
|
| 819 |
+
VERBOSE(3,"Best path: ");
|
| 820 |
+
Backtrack(hypo);
|
| 821 |
+
VERBOSE(3,"0" << std::endl);
|
| 822 |
+
|
| 823 |
+
if (options()->output.ReportHypoScore) {
|
| 824 |
+
out << hypo->GetFutureScore() << " ";
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
if (options()->output.RecoverPath) {
|
| 828 |
+
out << "||| ";
|
| 829 |
+
}
|
| 830 |
+
Phrase outPhrase(ARRAY_SIZE_INCR);
|
| 831 |
+
hypo->GetOutputPhrase(outPhrase);
|
| 832 |
+
|
| 833 |
+
// delete 1st & last
|
| 834 |
+
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
|
| 835 |
+
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
| 836 |
+
|
| 837 |
+
outPhrase.RemoveWord(0);
|
| 838 |
+
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
|
| 839 |
+
|
| 840 |
+
string output = outPhrase.GetStringRep(options()->output.factor_order);
|
| 841 |
+
out << output << endl;
|
| 842 |
+
} else {
|
| 843 |
+
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
| 844 |
+
|
| 845 |
+
if (options()->output.ReportHypoScore) {
|
| 846 |
+
out << "0 ";
|
| 847 |
+
}
|
| 848 |
+
|
| 849 |
+
out << endl;
|
| 850 |
+
}
|
| 851 |
+
collector->Write(translationId, out.str());
|
| 852 |
+
}
|
| 853 |
+
|
| 854 |
+
void ChartManager::Backtrack(const ChartHypothesis *hypo) const
|
| 855 |
+
{
|
| 856 |
+
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
| 857 |
+
|
| 858 |
+
vector<const ChartHypothesis*>::const_iterator iter;
|
| 859 |
+
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
| 860 |
+
const ChartHypothesis *prevHypo = *iter;
|
| 861 |
+
|
| 862 |
+
VERBOSE(3,prevHypo->GetId() << " <= ");
|
| 863 |
+
Backtrack(prevHypo);
|
| 864 |
+
}
|
| 865 |
+
}
|
| 866 |
+
|
| 867 |
+
} // namespace Moses
|
mosesdecoder/moses/ChartParser.h
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// -*- c++ -*-
|
| 2 |
+
// $Id$
|
| 3 |
+
// vim:tabstop=2
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2010 Hieu Hoang
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#pragma once
|
| 24 |
+
|
| 25 |
+
#include <list>
|
| 26 |
+
#include <vector>
|
| 27 |
+
#include "Range.h"
|
| 28 |
+
#include "StackVec.h"
|
| 29 |
+
#include "InputPath.h"
|
| 30 |
+
#include "TargetPhraseCollection.h"
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
class ChartParserCallback;
|
| 35 |
+
class ChartRuleLookupManager;
|
| 36 |
+
class InputType;
|
| 37 |
+
class Sentence;
|
| 38 |
+
class ChartCellCollectionBase;
|
| 39 |
+
class Word;
|
| 40 |
+
class Phrase;
|
| 41 |
+
// class TargetPhraseCollection;
|
| 42 |
+
class DecodeGraph;
|
| 43 |
+
|
| 44 |
+
class ChartParserUnknown
|
| 45 |
+
{
|
| 46 |
+
ttaskwptr m_ttask;
|
| 47 |
+
public:
|
| 48 |
+
ChartParserUnknown(ttasksptr const& ttask);
|
| 49 |
+
~ChartParserUnknown();
|
| 50 |
+
|
| 51 |
+
void Process(const Word &sourceWord, const Range &range, ChartParserCallback &to);
|
| 52 |
+
|
| 53 |
+
const std::vector<Phrase*> &GetUnknownSources() const {
|
| 54 |
+
return m_unksrcs;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
private:
|
| 58 |
+
std::vector<Phrase*> m_unksrcs;
|
| 59 |
+
std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
|
| 60 |
+
AllOptions::ptr const& options() const;
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
class ChartParser
|
| 64 |
+
{
|
| 65 |
+
ttaskwptr m_ttask;
|
| 66 |
+
public:
|
| 67 |
+
ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells);
|
| 68 |
+
~ChartParser();
|
| 69 |
+
|
| 70 |
+
void Create(const Range &range, ChartParserCallback &to);
|
| 71 |
+
|
| 72 |
+
//! the sentence being decoded
|
| 73 |
+
//const Sentence &GetSentence() const;
|
| 74 |
+
long GetTranslationId() const;
|
| 75 |
+
size_t GetSize() const;
|
| 76 |
+
const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
|
| 77 |
+
const InputPath &GetInputPath(const Range &range) const;
|
| 78 |
+
const std::vector<Phrase*> &GetUnknownSources() const {
|
| 79 |
+
return m_unknown.GetUnknownSources();
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
AllOptions::ptr const& options() const;
|
| 83 |
+
|
| 84 |
+
private:
|
| 85 |
+
ChartParserUnknown m_unknown;
|
| 86 |
+
std::vector <DecodeGraph*> m_decodeGraphList;
|
| 87 |
+
std::vector<ChartRuleLookupManager*> m_ruleLookupManagers;
|
| 88 |
+
InputType const& m_source; /**< source sentence to be translated */
|
| 89 |
+
|
| 90 |
+
typedef std::vector< std::vector<InputPath*> > InputPathMatrix;
|
| 91 |
+
InputPathMatrix m_inputPathMatrix;
|
| 92 |
+
|
| 93 |
+
void CreateInputPaths(const InputType &input);
|
| 94 |
+
InputPath &GetInputPath(size_t startPos, size_t endPos);
|
| 95 |
+
|
| 96 |
+
};
|
| 97 |
+
|
| 98 |
+
}
|
| 99 |
+
|
mosesdecoder/moses/ChartTranslationOptionList.h
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2006 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "ChartTranslationOptions.h"
|
| 23 |
+
#include "ChartParserCallback.h"
|
| 24 |
+
#include "StackVec.h"
|
| 25 |
+
|
| 26 |
+
#include <vector>
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
class TargetPhraseCollection;
|
| 32 |
+
class Range;
|
| 33 |
+
class InputType;
|
| 34 |
+
class InputPath;
|
| 35 |
+
class ChartCellLabel;
|
| 36 |
+
|
| 37 |
+
//! a vector of translations options for a specific range, in a specific sentence
|
| 38 |
+
class ChartTranslationOptionList : public ChartParserCallback
|
| 39 |
+
{
|
| 40 |
+
friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptionList&);
|
| 41 |
+
|
| 42 |
+
public:
|
| 43 |
+
ChartTranslationOptionList(size_t ruleLimit, const InputType &input);
|
| 44 |
+
~ChartTranslationOptionList();
|
| 45 |
+
|
| 46 |
+
const ChartTranslationOptions &Get(size_t i) const {
|
| 47 |
+
return *m_collection[i];
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
//! number of translation options
|
| 51 |
+
size_t GetSize() const {
|
| 52 |
+
return m_size;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void Add(const TargetPhraseCollection &, const StackVec &,
|
| 56 |
+
const Range &);
|
| 57 |
+
|
| 58 |
+
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const Range &range);
|
| 59 |
+
|
| 60 |
+
bool Empty() const {
|
| 61 |
+
return m_size == 0;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
float GetBestScore(const ChartCellLabel *chartCell) const;
|
| 65 |
+
|
| 66 |
+
void Clear();
|
| 67 |
+
void ApplyThreshold(float threshold);
|
| 68 |
+
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
|
| 69 |
+
|
| 70 |
+
private:
|
| 71 |
+
typedef std::vector<ChartTranslationOptions*> CollType;
|
| 72 |
+
|
| 73 |
+
struct ScoreThresholdPred {
|
| 74 |
+
ScoreThresholdPred(float threshold) : m_thresholdScore(threshold) {}
|
| 75 |
+
bool operator()(const ChartTranslationOptions *option) {
|
| 76 |
+
return option->GetEstimateOfBestScore() >= m_thresholdScore;
|
| 77 |
+
}
|
| 78 |
+
float m_thresholdScore;
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
void SwapTranslationOptions(size_t a, size_t b);
|
| 82 |
+
|
| 83 |
+
CollType m_collection;
|
| 84 |
+
size_t m_size;
|
| 85 |
+
float m_scoreThreshold;
|
| 86 |
+
const size_t m_ruleLimit;
|
| 87 |
+
|
| 88 |
+
};
|
| 89 |
+
|
| 90 |
+
}
|
mosesdecoder/moses/Factor.h
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <ostream>
|
| 25 |
+
#include <string>
|
| 26 |
+
#include "TypeDef.h"
|
| 27 |
+
#include "Util.h"
|
| 28 |
+
#include "util/string_piece.hh"
|
| 29 |
+
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
struct FactorFriend;
|
| 34 |
+
class FactorCollection;
|
| 35 |
+
|
| 36 |
+
/** Represents a factor (word, POS, etc).
|
| 37 |
+
* A Factor has a contiguous identifier and string value.
|
| 38 |
+
*/
|
| 39 |
+
class Factor
|
| 40 |
+
{
|
| 41 |
+
friend std::ostream& operator<<(std::ostream&, const Factor&);
|
| 42 |
+
|
| 43 |
+
// only these classes are allowed to instantiate this class
|
| 44 |
+
friend class FactorCollection;
|
| 45 |
+
friend struct FactorFriend;
|
| 46 |
+
|
| 47 |
+
// FactorCollection writes here.
|
| 48 |
+
// This is mutable so the pointer can be changed to pool-backed memory.
|
| 49 |
+
mutable StringPiece m_string;
|
| 50 |
+
size_t m_id;
|
| 51 |
+
|
| 52 |
+
//! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
|
| 53 |
+
Factor() {}
|
| 54 |
+
|
| 55 |
+
// Needed for STL containers. They'll delegate through FactorFriend, which is never exposed publicly.
|
| 56 |
+
Factor(const Factor &factor) : m_string(factor.m_string), m_id(factor.m_id) {}
|
| 57 |
+
|
| 58 |
+
// Not implemented. Shouldn't be called.
|
| 59 |
+
Factor &operator=(const Factor &factor);
|
| 60 |
+
|
| 61 |
+
public:
|
| 62 |
+
//! original string representation of the factor
|
| 63 |
+
StringPiece GetString() const {
|
| 64 |
+
return m_string;
|
| 65 |
+
}
|
| 66 |
+
//! contiguous ID
|
| 67 |
+
inline size_t GetId() const {
|
| 68 |
+
return m_id;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
/** transitive comparison between 2 factors.
|
| 72 |
+
* -1 = less than
|
| 73 |
+
* +1 = more than
|
| 74 |
+
* 0 = same
|
| 75 |
+
*/
|
| 76 |
+
inline int Compare(const Factor &compare) const {
|
| 77 |
+
if (this < &compare)
|
| 78 |
+
return -1;
|
| 79 |
+
if (this > &compare)
|
| 80 |
+
return 1;
|
| 81 |
+
return 0;
|
| 82 |
+
}
|
| 83 |
+
//! transitive comparison used for adding objects into FactorCollection
|
| 84 |
+
inline bool operator<(const Factor &compare) const {
|
| 85 |
+
return this < &compare;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
// quick equality comparison. Not used
|
| 89 |
+
inline bool operator==(const Factor &compare) const {
|
| 90 |
+
return this == &compare;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
TO_STRING();
|
| 94 |
+
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
size_t hash_value(const Factor &f);
|
| 98 |
+
|
| 99 |
+
}
|
| 100 |
+
|
mosesdecoder/moses/FactorCollection.cpp
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <boost/version.hpp>
|
| 23 |
+
#ifdef WITH_THREADS
|
| 24 |
+
#include <boost/thread/locks.hpp>
|
| 25 |
+
#endif
|
| 26 |
+
#include <ostream>
|
| 27 |
+
#include <string>
|
| 28 |
+
#include "FactorCollection.h"
|
| 29 |
+
#include "Util.h"
|
| 30 |
+
#include "util/pool.hh"
|
| 31 |
+
|
| 32 |
+
using namespace std;
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
FactorCollection FactorCollection::s_instance;
|
| 37 |
+
|
| 38 |
+
const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool isNonTerminal)
|
| 39 |
+
{
|
| 40 |
+
FactorFriend to_ins;
|
| 41 |
+
to_ins.in.m_string = factorString;
|
| 42 |
+
to_ins.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
|
| 43 |
+
Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
|
| 44 |
+
// If we're threaded, hope a read-only lock is sufficient.
|
| 45 |
+
#ifdef WITH_THREADS
|
| 46 |
+
{
|
| 47 |
+
// read=lock scope
|
| 48 |
+
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
| 49 |
+
Set::const_iterator i = set.find(to_ins);
|
| 50 |
+
if (i != set.end()) return &i->in;
|
| 51 |
+
}
|
| 52 |
+
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
| 53 |
+
#endif // WITH_THREADS
|
| 54 |
+
std::pair<Set::iterator, bool> ret(set.insert(to_ins));
|
| 55 |
+
if (ret.second) {
|
| 56 |
+
ret.first->in.m_string.set(
|
| 57 |
+
memcpy(m_string_backing.Allocate(factorString.size()), factorString.data(), factorString.size()),
|
| 58 |
+
factorString.size());
|
| 59 |
+
if (isNonTerminal) {
|
| 60 |
+
m_factorIdNonTerminal++;
|
| 61 |
+
UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
|
| 62 |
+
} else {
|
| 63 |
+
m_factorId++;
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
return &ret.first->in;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
const Factor *FactorCollection::GetFactor(const StringPiece &factorString, bool isNonTerminal)
|
| 70 |
+
{
|
| 71 |
+
FactorFriend to_find;
|
| 72 |
+
to_find.in.m_string = factorString;
|
| 73 |
+
to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
|
| 74 |
+
Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
|
| 75 |
+
{
|
| 76 |
+
// read=lock scope
|
| 77 |
+
#ifdef WITH_THREADS
|
| 78 |
+
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
| 79 |
+
#endif // WITH_THREADS
|
| 80 |
+
Set::const_iterator i = set.find(to_find);
|
| 81 |
+
if (i != set.end()) return &i->in;
|
| 82 |
+
}
|
| 83 |
+
return NULL;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
FactorCollection::~FactorCollection() {}
|
| 88 |
+
|
| 89 |
+
TO_STRING_BODY(FactorCollection);
|
| 90 |
+
|
| 91 |
+
// friend
|
| 92 |
+
ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
|
| 93 |
+
{
|
| 94 |
+
#ifdef WITH_THREADS
|
| 95 |
+
boost::shared_lock<boost::shared_mutex> lock(factorCollection.m_accessLock);
|
| 96 |
+
#endif
|
| 97 |
+
for (FactorCollection::Set::const_iterator i = factorCollection.m_set.begin(); i != factorCollection.m_set.end(); ++i) {
|
| 98 |
+
out << i->in;
|
| 99 |
+
}
|
| 100 |
+
return out;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
|
mosesdecoder/moses/FactorCollection.h
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_FactorCollection_h
|
| 23 |
+
#define moses_FactorCollection_h
|
| 24 |
+
|
| 25 |
+
// reserve space for non-terminal symbols (ensuring consecutive numbering, and allowing quick lookup by ID)
|
| 26 |
+
#ifndef moses_MaxNumNonterminals
|
| 27 |
+
#define moses_MaxNumNonterminals 10000
|
| 28 |
+
#endif
|
| 29 |
+
|
| 30 |
+
#ifdef WITH_THREADS
|
| 31 |
+
#include <boost/thread/shared_mutex.hpp>
|
| 32 |
+
#endif
|
| 33 |
+
|
| 34 |
+
#include "util/murmur_hash.hh"
|
| 35 |
+
#include <boost/unordered_set.hpp>
|
| 36 |
+
|
| 37 |
+
#include <functional>
|
| 38 |
+
#include <string>
|
| 39 |
+
|
| 40 |
+
#include "util/string_piece.hh"
|
| 41 |
+
#include "util/pool.hh"
|
| 42 |
+
#include "Factor.h"
|
| 43 |
+
|
| 44 |
+
class System;
|
| 45 |
+
|
| 46 |
+
namespace Moses
|
| 47 |
+
{
|
| 48 |
+
|
| 49 |
+
/** We don't want Factor to be copyable by anybody. But we also want to store
|
| 50 |
+
* it in an STL container. The solution is that Factor's copy constructor is
|
| 51 |
+
* private and friended to FactorFriend. The STL containers can delegate
|
| 52 |
+
* copying, so friending the container isn't sufficient. STL containers see
|
| 53 |
+
* FactorFriend's public copy constructor and everybody else sees Factor's
|
| 54 |
+
* private copy constructor.
|
| 55 |
+
*/
|
| 56 |
+
struct FactorFriend {
|
| 57 |
+
Factor in;
|
| 58 |
+
};
|
| 59 |
+
|
| 60 |
+
/** collection of factors
|
| 61 |
+
*
|
| 62 |
+
* All Factors in moses are accessed and created by a FactorCollection.
|
| 63 |
+
* By enforcing this strict creation processes (ie, forbidding factors
|
| 64 |
+
* from being created on the stack, etc), their memory addresses can
|
| 65 |
+
* be used as keys to uniquely identify them.
|
| 66 |
+
* Only 1 FactorCollection object should be created.
|
| 67 |
+
*/
|
| 68 |
+
class FactorCollection
|
| 69 |
+
{
|
| 70 |
+
friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
|
| 71 |
+
friend class ::System;
|
| 72 |
+
|
| 73 |
+
struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
|
| 74 |
+
std::size_t operator()(const FactorFriend &factor) const {
|
| 75 |
+
return util::MurmurHashNative(factor.in.m_string.data(), factor.in.m_string.size());
|
| 76 |
+
}
|
| 77 |
+
};
|
| 78 |
+
struct EqualsFactor : public std::binary_function<const FactorFriend &, const FactorFriend &, bool> {
|
| 79 |
+
bool operator()(const FactorFriend &left, const FactorFriend &right) const {
|
| 80 |
+
return left.in.GetString() == right.in.GetString();
|
| 81 |
+
}
|
| 82 |
+
};
|
| 83 |
+
typedef boost::unordered_set<FactorFriend, HashFactor, EqualsFactor> Set;
|
| 84 |
+
Set m_set;
|
| 85 |
+
Set m_setNonTerminal;
|
| 86 |
+
|
| 87 |
+
util::Pool m_string_backing;
|
| 88 |
+
|
| 89 |
+
static FactorCollection s_instance;
|
| 90 |
+
#ifdef WITH_THREADS
|
| 91 |
+
//reader-writer lock
|
| 92 |
+
mutable boost::shared_mutex m_accessLock;
|
| 93 |
+
#endif
|
| 94 |
+
|
| 95 |
+
size_t m_factorIdNonTerminal; /**< unique, contiguous ids, starting from 0, for each non-terminal factor */
|
| 96 |
+
size_t m_factorId; /**< unique, contiguous ids, starting from moses_MaxNumNonterminals, for each terminal factor */
|
| 97 |
+
|
| 98 |
+
//! constructor. only the 1 static variable can be created
|
| 99 |
+
FactorCollection()
|
| 100 |
+
: m_factorIdNonTerminal(0)
|
| 101 |
+
, m_factorId(moses_MaxNumNonterminals) {
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
public:
|
| 105 |
+
static FactorCollection& Instance() {
|
| 106 |
+
return s_instance;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
~FactorCollection();
|
| 110 |
+
|
| 111 |
+
/** returns a factor with the same direction, factorType and factorString.
|
| 112 |
+
* If a factor already exist in the collection, return the existing factor, if not create a new 1
|
| 113 |
+
*/
|
| 114 |
+
const Factor *AddFactor(const StringPiece &factorString, bool isNonTerminal = false);
|
| 115 |
+
|
| 116 |
+
size_t GetNumNonTerminals() {
|
| 117 |
+
return m_factorIdNonTerminal;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal = false);
|
| 121 |
+
|
| 122 |
+
// TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
|
| 123 |
+
const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString, bool isNonTerminal = false) {
|
| 124 |
+
return AddFactor(factorString, isNonTerminal);
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
TO_STRING();
|
| 128 |
+
|
| 129 |
+
};
|
| 130 |
+
|
| 131 |
+
}
|
| 132 |
+
#endif
|
mosesdecoder/moses/FactorTypeSet.cpp
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "FactorTypeSet.h"
|
| 23 |
+
|
| 24 |
+
using namespace std;
|
| 25 |
+
|
| 26 |
+
namespace Moses
|
| 27 |
+
{
|
| 28 |
+
FactorMask::FactorMask(const vector<FactorType> &factors)
|
| 29 |
+
{
|
| 30 |
+
vector<FactorType>::const_iterator iter;
|
| 31 |
+
for (iter = factors.begin() ; iter != factors.end() ; ++iter) {
|
| 32 |
+
this->set(*iter);
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
bool FactorMask::IsUseable(const FactorMask &other) const
|
| 37 |
+
{
|
| 38 |
+
for (size_t i = 0; i < other.size(); ++i) {
|
| 39 |
+
if (other[i]) {
|
| 40 |
+
if (!this->operator[](i) ) {
|
| 41 |
+
return false;
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
return true;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
TO_STRING_BODY(FactorMask);
|
| 50 |
+
|
| 51 |
+
// friend
|
| 52 |
+
std::ostream& operator<<(std::ostream& out, const FactorMask& fm)
|
| 53 |
+
{
|
| 54 |
+
out << "FactorMask<";
|
| 55 |
+
bool first = true;
|
| 56 |
+
for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
|
| 57 |
+
if (fm[currFactor]) {
|
| 58 |
+
if (first) {
|
| 59 |
+
first = false;
|
| 60 |
+
} else {
|
| 61 |
+
out << ",";
|
| 62 |
+
}
|
| 63 |
+
out << currFactor;
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
out << ">";
|
| 67 |
+
|
| 68 |
+
return out;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
|
mosesdecoder/moses/FactorTypeSet.h
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_FactorTypeSet_h
|
| 23 |
+
#define moses_FactorTypeSet_h
|
| 24 |
+
|
| 25 |
+
#include <iostream>
|
| 26 |
+
#include <bitset>
|
| 27 |
+
#include <vector>
|
| 28 |
+
#include "TypeDef.h"
|
| 29 |
+
#include "Util.h"
|
| 30 |
+
|
| 31 |
+
namespace Moses
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
/** set of unique FactorTypes. Used to store what factor types are used in phrase tables etc
|
| 35 |
+
*/
|
| 36 |
+
class FactorMask : public std::bitset<MAX_NUM_FACTORS>
|
| 37 |
+
{
|
| 38 |
+
friend std::ostream& operator<<(std::ostream&, const FactorMask&);
|
| 39 |
+
|
| 40 |
+
public:
|
| 41 |
+
//! construct object from list of FactorType.
|
| 42 |
+
explicit FactorMask(const std::vector<FactorType> &factors);
|
| 43 |
+
//! default constructor
|
| 44 |
+
inline FactorMask() {}
|
| 45 |
+
//! copy constructor
|
| 46 |
+
FactorMask(const std::bitset<MAX_NUM_FACTORS>& rhs) : std::bitset<MAX_NUM_FACTORS>(rhs) { }
|
| 47 |
+
|
| 48 |
+
bool IsUseable(const FactorMask &other) const;
|
| 49 |
+
|
| 50 |
+
TO_STRING();
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
#endif
|
mosesdecoder/moses/FilePtr.h
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/* ---------------------------------------------------------------- */
|
| 4 |
+
/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
|
| 5 |
+
/* Richard Zens */
|
| 6 |
+
/* ---------------------------------------------------------------- */
|
| 7 |
+
|
| 8 |
+
#ifndef moses_FilePtr_h
|
| 9 |
+
#define moses_FilePtr_h
|
| 10 |
+
|
| 11 |
+
#include "File.h"
|
| 12 |
+
|
| 13 |
+
namespace Moses
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
/** smart pointer for on-demand loading from file
|
| 17 |
+
* requirement: T has a constructor T(FILE*)
|
| 18 |
+
*/
|
| 19 |
+
template<typename T> class FilePtr
|
| 20 |
+
{
|
| 21 |
+
public:
|
| 22 |
+
typedef T* Ptr;
|
| 23 |
+
private:
|
| 24 |
+
FILE* f;
|
| 25 |
+
OFF_T pos;
|
| 26 |
+
mutable Ptr t;
|
| 27 |
+
public:
|
| 28 |
+
FilePtr(FILE* f_=0,OFF_T p=0) : f(f_),pos(p),t(0) {}
|
| 29 |
+
~FilePtr() {}
|
| 30 |
+
|
| 31 |
+
void set(FILE* f_,OFF_T p) {
|
| 32 |
+
f=f_;
|
| 33 |
+
pos=p;
|
| 34 |
+
}
|
| 35 |
+
void free() {
|
| 36 |
+
delete t;
|
| 37 |
+
t=0;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
T& operator* () {
|
| 41 |
+
load();
|
| 42 |
+
return *t;
|
| 43 |
+
}
|
| 44 |
+
Ptr operator->() {
|
| 45 |
+
load();
|
| 46 |
+
return t;
|
| 47 |
+
}
|
| 48 |
+
operator Ptr () {
|
| 49 |
+
load();
|
| 50 |
+
return t;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
const T& operator* () const {
|
| 54 |
+
load();
|
| 55 |
+
return *t;
|
| 56 |
+
}
|
| 57 |
+
Ptr operator->() const {
|
| 58 |
+
load();
|
| 59 |
+
return t;
|
| 60 |
+
}
|
| 61 |
+
operator Ptr () const {
|
| 62 |
+
load();
|
| 63 |
+
return t;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
// direct access to pointer, use with care!
|
| 67 |
+
Ptr getPtr() {
|
| 68 |
+
return t;
|
| 69 |
+
}
|
| 70 |
+
Ptr getPtr() const {
|
| 71 |
+
return t;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
operator bool() const {
|
| 75 |
+
return (f && pos!=InvalidOffT);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
void load() const {
|
| 79 |
+
if(t) return;
|
| 80 |
+
if(f && pos!=InvalidOffT) {
|
| 81 |
+
fSeek(f,pos);
|
| 82 |
+
t=new T(f);
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
#endif
|
mosesdecoder/moses/HypergraphOutput.h
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2014- University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#ifndef moses_Hypergraph_Output_h
|
| 24 |
+
#define moses_Hypergraph_Output_h
|
| 25 |
+
|
| 26 |
+
#include <ostream>
|
| 27 |
+
#include "moses/parameters/AllOptions.h"
|
| 28 |
+
|
| 29 |
+
/**
|
| 30 |
+
* Manage the output of hypergraphs.
|
| 31 |
+
**/
|
| 32 |
+
|
| 33 |
+
namespace Moses
|
| 34 |
+
{
|
| 35 |
+
|
| 36 |
+
class ChartHypothesisCollection;
|
| 37 |
+
|
| 38 |
+
template<class M>
|
| 39 |
+
class HypergraphOutput
|
| 40 |
+
{
|
| 41 |
+
|
| 42 |
+
public:
|
| 43 |
+
/** Initialise output directory and create weights file */
|
| 44 |
+
HypergraphOutput(size_t precision);
|
| 45 |
+
|
| 46 |
+
/** Write this hypergraph to file */
|
| 47 |
+
void Write(const M& manager) const;
|
| 48 |
+
|
| 49 |
+
private:
|
| 50 |
+
size_t m_precision;
|
| 51 |
+
std::string m_hypergraphDir;
|
| 52 |
+
std::string m_compression;
|
| 53 |
+
bool m_appendSuffix;
|
| 54 |
+
};
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
/**
|
| 58 |
+
* ABC for different types of search graph output for chart Moses.
|
| 59 |
+
**/
|
| 60 |
+
class ChartSearchGraphWriter
|
| 61 |
+
{
|
| 62 |
+
protected:
|
| 63 |
+
AllOptions::ptr m_options;
|
| 64 |
+
ChartSearchGraphWriter(AllOptions::ptr const& opts) : m_options(opts) { }
|
| 65 |
+
public:
|
| 66 |
+
virtual void WriteHeader(size_t winners, size_t losers) const = 0;
|
| 67 |
+
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
|
| 68 |
+
const std::map<unsigned, bool> &reachable) const = 0;
|
| 69 |
+
|
| 70 |
+
};
|
| 71 |
+
|
| 72 |
+
/** "Moses" format (osg style) */
|
| 73 |
+
class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter
|
| 74 |
+
{
|
| 75 |
+
public:
|
| 76 |
+
ChartSearchGraphWriterMoses(AllOptions::ptr const& opts,
|
| 77 |
+
std::ostream* out, size_t lineNumber)
|
| 78 |
+
: ChartSearchGraphWriter(opts), m_out(out), m_lineNumber(lineNumber) {}
|
| 79 |
+
virtual void WriteHeader(size_t, size_t) const {
|
| 80 |
+
/* do nothing */
|
| 81 |
+
}
|
| 82 |
+
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
|
| 83 |
+
const std::map<unsigned, bool> &reachable) const;
|
| 84 |
+
|
| 85 |
+
private:
|
| 86 |
+
std::ostream* m_out;
|
| 87 |
+
size_t m_lineNumber;
|
| 88 |
+
};
|
| 89 |
+
|
| 90 |
+
/** Modified version of Kenneth's lazy hypergraph format */
|
| 91 |
+
class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter
|
| 92 |
+
{
|
| 93 |
+
public:
|
| 94 |
+
ChartSearchGraphWriterHypergraph(AllOptions::ptr const& opts, std::ostream* out)
|
| 95 |
+
: ChartSearchGraphWriter(opts), m_out(out), m_nodeId(0) { }
|
| 96 |
+
virtual void WriteHeader(size_t winners, size_t losers) const;
|
| 97 |
+
virtual void WriteHypos(const ChartHypothesisCollection& hypos,
|
| 98 |
+
const std::map<unsigned, bool> &reachable) const;
|
| 99 |
+
|
| 100 |
+
private:
|
| 101 |
+
std::ostream* m_out;
|
| 102 |
+
mutable size_t m_nodeId;
|
| 103 |
+
mutable std::map<size_t,size_t> m_hypoIdToNodeId;
|
| 104 |
+
};
|
| 105 |
+
|
| 106 |
+
}
|
| 107 |
+
#endif
|
mosesdecoder/moses/HypothesisStack.cpp
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
#include "HypothesisStack.h"
|
| 3 |
+
|
| 4 |
+
namespace Moses
|
| 5 |
+
{
|
| 6 |
+
HypothesisStack::~HypothesisStack()
|
| 7 |
+
{
|
| 8 |
+
// delete all hypos
|
| 9 |
+
while (m_hypos.begin() != m_hypos.end()) {
|
| 10 |
+
Remove(m_hypos.begin());
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
/** Remove hypothesis pointed to by iterator but don't delete the object. */
|
| 15 |
+
void HypothesisStack::Detach(const HypothesisStack::iterator &iter)
|
| 16 |
+
{
|
| 17 |
+
m_hypos.erase(iter);
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
void HypothesisStack::Remove(const HypothesisStack::iterator &iter)
|
| 22 |
+
{
|
| 23 |
+
Hypothesis *h = *iter;
|
| 24 |
+
Detach(iter);
|
| 25 |
+
delete h;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
}
|
| 30 |
+
|
mosesdecoder/moses/HypothesisStackNormal.cpp
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include <algorithm>
|
| 23 |
+
#include <set>
|
| 24 |
+
#include <queue>
|
| 25 |
+
#include "HypothesisStackNormal.h"
|
| 26 |
+
#include "TypeDef.h"
|
| 27 |
+
#include "Util.h"
|
| 28 |
+
#include "Manager.h"
|
| 29 |
+
#include "util/exception.hh"
|
| 30 |
+
|
| 31 |
+
using namespace std;
|
| 32 |
+
|
| 33 |
+
namespace Moses
|
| 34 |
+
{
|
| 35 |
+
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
|
| 36 |
+
HypothesisStack(manager)
|
| 37 |
+
{
|
| 38 |
+
m_nBestIsEnabled = manager.options()->nbest.enabled;
|
| 39 |
+
m_bestScore = -std::numeric_limits<float>::infinity();
|
| 40 |
+
m_worstScore = -std::numeric_limits<float>::infinity();
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
/** remove all hypotheses from the collection */
|
| 44 |
+
void HypothesisStackNormal::RemoveAll()
|
| 45 |
+
{
|
| 46 |
+
while (m_hypos.begin() != m_hypos.end()) {
|
| 47 |
+
Remove(m_hypos.begin());
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesis *hypo)
|
| 52 |
+
{
|
| 53 |
+
std::pair<iterator, bool> ret = m_hypos.insert(hypo);
|
| 54 |
+
if (ret.second) {
|
| 55 |
+
// equiv hypo doesn't exists
|
| 56 |
+
VERBOSE(3,"added hyp to stack");
|
| 57 |
+
|
| 58 |
+
// Update best score, if this hypothesis is new best
|
| 59 |
+
if (hypo->GetFutureScore() > m_bestScore) {
|
| 60 |
+
VERBOSE(3,", best on stack");
|
| 61 |
+
m_bestScore = hypo->GetFutureScore();
|
| 62 |
+
// this may also affect the worst score
|
| 63 |
+
if ( m_bestScore + m_beamWidth > m_worstScore )
|
| 64 |
+
m_worstScore = m_bestScore + m_beamWidth;
|
| 65 |
+
}
|
| 66 |
+
// update best/worst score for stack diversity 1
|
| 67 |
+
if ( m_minHypoStackDiversity == 1 &&
|
| 68 |
+
hypo->GetFutureScore() > GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) {
|
| 69 |
+
SetWorstScoreForBitmap( hypo->GetWordsBitmap().GetID(), hypo->GetFutureScore() );
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
VERBOSE(3,", now size " << m_hypos.size());
|
| 73 |
+
|
| 74 |
+
// prune only if stack is twice as big as needed (lazy pruning)
|
| 75 |
+
size_t toleratedSize = 2*m_maxHypoStackSize-1;
|
| 76 |
+
// add in room for stack diversity
|
| 77 |
+
if (m_minHypoStackDiversity) {
|
| 78 |
+
// so what happens if maxdistortion is negative?
|
| 79 |
+
toleratedSize += m_minHypoStackDiversity
|
| 80 |
+
<< m_manager.options()->reordering.max_distortion;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
if (m_hypos.size() > toleratedSize) {
|
| 84 |
+
PruneToSize(m_maxHypoStackSize);
|
| 85 |
+
} else {
|
| 86 |
+
VERBOSE(3,std::endl);
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
return ret;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
|
| 94 |
+
{
|
| 95 |
+
if (hypo->GetFutureScore() == - std::numeric_limits<float>::infinity()) {
|
| 96 |
+
m_manager.GetSentenceStats().AddDiscarded();
|
| 97 |
+
VERBOSE(3,"discarded, constraint" << std::endl);
|
| 98 |
+
delete hypo;
|
| 99 |
+
return false;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
// too bad for stack. don't bother adding hypo into collection
|
| 103 |
+
if (m_manager.options()->search.disable_discarding == false
|
| 104 |
+
&& hypo->GetFutureScore() < m_worstScore
|
| 105 |
+
&& ! ( m_minHypoStackDiversity > 0
|
| 106 |
+
&& hypo->GetFutureScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) ) {
|
| 107 |
+
m_manager.GetSentenceStats().AddDiscarded();
|
| 108 |
+
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
| 109 |
+
delete hypo;
|
| 110 |
+
return false;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
// over threshold, try to add to collection
|
| 114 |
+
std::pair<iterator, bool> addRet = Add(hypo);
|
| 115 |
+
if (addRet.second) {
|
| 116 |
+
// nothing found. add to collection
|
| 117 |
+
return true;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
// equiv hypo exists, recombine with other hypo
|
| 121 |
+
iterator &iterExisting = addRet.first;
|
| 122 |
+
Hypothesis *hypoExisting = *iterExisting;
|
| 123 |
+
assert(iterExisting != m_hypos.end());
|
| 124 |
+
|
| 125 |
+
m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
| 126 |
+
|
| 127 |
+
// found existing hypo with same target ending.
|
| 128 |
+
// keep the best 1
|
| 129 |
+
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
| 130 |
+
// incoming hypo is better than the one we have
|
| 131 |
+
VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
|
| 132 |
+
if (m_nBestIsEnabled) {
|
| 133 |
+
hypo->AddArc(hypoExisting);
|
| 134 |
+
Detach(iterExisting);
|
| 135 |
+
} else {
|
| 136 |
+
Remove(iterExisting);
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
bool added = Add(hypo).second;
|
| 140 |
+
if (!added) {
|
| 141 |
+
iterExisting = m_hypos.find(hypo);
|
| 142 |
+
UTIL_THROW2("Offending hypo = " << **iterExisting);
|
| 143 |
+
}
|
| 144 |
+
return false;
|
| 145 |
+
} else {
|
| 146 |
+
// already storing the best hypo. discard current hypo
|
| 147 |
+
VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
|
| 148 |
+
if (m_nBestIsEnabled) {
|
| 149 |
+
hypoExisting->AddArc(hypo);
|
| 150 |
+
} else {
|
| 151 |
+
delete hypo;
|
| 152 |
+
}
|
| 153 |
+
return false;
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
void HypothesisStackNormal::PruneToSize(size_t newSize)
|
| 158 |
+
{
|
| 159 |
+
if ( newSize == 0) return; // no limit
|
| 160 |
+
if ( size() <= newSize ) return; // ok, if not over the limit
|
| 161 |
+
|
| 162 |
+
// we need to store a temporary list of hypotheses
|
| 163 |
+
vector< Hypothesis* > hypos = GetSortedListNOTCONST();
|
| 164 |
+
bool* included = (bool*) malloc(sizeof(bool) * hypos.size());
|
| 165 |
+
for(size_t i=0; i<hypos.size(); i++) included[i] = false;
|
| 166 |
+
|
| 167 |
+
// clear out original set
|
| 168 |
+
for( iterator iter = m_hypos.begin(); iter != m_hypos.end(); ) {
|
| 169 |
+
iterator removeHyp = iter++;
|
| 170 |
+
Detach(removeHyp);
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
// add best hyps for each coverage according to minStackDiversity
|
| 174 |
+
if ( m_minHypoStackDiversity > 0 ) {
|
| 175 |
+
map< WordsBitmapID, size_t > diversityCount;
|
| 176 |
+
for(size_t i=0; i<hypos.size(); i++) {
|
| 177 |
+
Hypothesis *hyp = hypos[i];
|
| 178 |
+
WordsBitmapID coverage = hyp->GetWordsBitmap().GetID();;
|
| 179 |
+
if (diversityCount.find( coverage ) == diversityCount.end())
|
| 180 |
+
diversityCount[ coverage ] = 0;
|
| 181 |
+
|
| 182 |
+
if (diversityCount[ coverage ] < m_minHypoStackDiversity) {
|
| 183 |
+
m_hypos.insert( hyp );
|
| 184 |
+
included[i] = true;
|
| 185 |
+
diversityCount[ coverage ]++;
|
| 186 |
+
if (diversityCount[ coverage ] == m_minHypoStackDiversity)
|
| 187 |
+
SetWorstScoreForBitmap( coverage, hyp->GetFutureScore());
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
// only add more if stack not full after satisfying minStackDiversity
|
| 193 |
+
if ( size() < newSize ) {
|
| 194 |
+
|
| 195 |
+
// add best remaining hypotheses
|
| 196 |
+
for(size_t i=0; i<hypos.size()
|
| 197 |
+
&& size() < newSize
|
| 198 |
+
&& hypos[i]->GetFutureScore() > m_bestScore+m_beamWidth; i++) {
|
| 199 |
+
if (! included[i]) {
|
| 200 |
+
m_hypos.insert( hypos[i] );
|
| 201 |
+
included[i] = true;
|
| 202 |
+
if (size() == newSize)
|
| 203 |
+
m_worstScore = hypos[i]->GetFutureScore();
|
| 204 |
+
}
|
| 205 |
+
}
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
// delete hypotheses that have not been included
|
| 209 |
+
for(size_t i=0; i<hypos.size(); i++) {
|
| 210 |
+
if (! included[i]) {
|
| 211 |
+
delete hypos[i];
|
| 212 |
+
m_manager.GetSentenceStats().AddPruning();
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
free(included);
|
| 216 |
+
|
| 217 |
+
// some reporting....
|
| 218 |
+
VERBOSE(3,", pruned to size " << size() << endl);
|
| 219 |
+
IFVERBOSE(3) {
|
| 220 |
+
TRACE_ERR("stack now contains: ");
|
| 221 |
+
for(iterator iter = m_hypos.begin(); iter != m_hypos.end(); iter++) {
|
| 222 |
+
Hypothesis *hypo = *iter;
|
| 223 |
+
TRACE_ERR( hypo->GetId() << " (" << hypo->GetFutureScore() << ") ");
|
| 224 |
+
}
|
| 225 |
+
TRACE_ERR( endl);
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
const Hypothesis *HypothesisStackNormal::GetBestHypothesis() const
|
| 230 |
+
{
|
| 231 |
+
if (!m_hypos.empty()) {
|
| 232 |
+
const_iterator iter = m_hypos.begin();
|
| 233 |
+
Hypothesis *bestHypo = *iter;
|
| 234 |
+
while (++iter != m_hypos.end()) {
|
| 235 |
+
Hypothesis *hypo = *iter;
|
| 236 |
+
if (hypo->GetFutureScore() > bestHypo->GetFutureScore())
|
| 237 |
+
bestHypo = hypo;
|
| 238 |
+
}
|
| 239 |
+
return bestHypo;
|
| 240 |
+
}
|
| 241 |
+
return NULL;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
vector<const Hypothesis*> HypothesisStackNormal::GetSortedList() const
|
| 245 |
+
{
|
| 246 |
+
vector<const Hypothesis*> ret;
|
| 247 |
+
ret.reserve(m_hypos.size());
|
| 248 |
+
std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
|
| 249 |
+
sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
|
| 250 |
+
|
| 251 |
+
return ret;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
vector<Hypothesis*> HypothesisStackNormal::GetSortedListNOTCONST()
|
| 255 |
+
{
|
| 256 |
+
vector<Hypothesis*> ret;
|
| 257 |
+
ret.reserve(m_hypos.size());
|
| 258 |
+
std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
|
| 259 |
+
sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
|
| 260 |
+
|
| 261 |
+
return ret;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
void HypothesisStackNormal::CleanupArcList()
|
| 265 |
+
{
|
| 266 |
+
// only necessary if n-best calculations are enabled
|
| 267 |
+
if (!m_nBestIsEnabled) return;
|
| 268 |
+
|
| 269 |
+
iterator iter;
|
| 270 |
+
for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
|
| 271 |
+
Hypothesis *mainHypo = *iter;
|
| 272 |
+
mainHypo->CleanupArcList(this->m_manager.options()->nbest.nbest_size, this->m_manager.options()->NBestDistinct());
|
| 273 |
+
}
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
TO_STRING_BODY(HypothesisStackNormal);
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
// friend
|
| 280 |
+
std::ostream& operator<<(std::ostream& out, const HypothesisStackNormal& hypoColl)
|
| 281 |
+
{
|
| 282 |
+
HypothesisStackNormal::const_iterator iter;
|
| 283 |
+
|
| 284 |
+
for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter) {
|
| 285 |
+
const Hypothesis &hypo = **iter;
|
| 286 |
+
out << hypo << endl;
|
| 287 |
+
|
| 288 |
+
}
|
| 289 |
+
return out;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
}
|
| 294 |
+
|
mosesdecoder/moses/IOWrapper.cpp
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (c) 2006 University of Edinburgh
|
| 6 |
+
All rights reserved.
|
| 7 |
+
|
| 8 |
+
Redistribution and use in source and binary forms, with or without modification,
|
| 9 |
+
are permitted provided that the following conditions are met:
|
| 10 |
+
|
| 11 |
+
* Redistributions of source code must retain the above copyright notice,
|
| 12 |
+
this list of conditions and the following disclaimer.
|
| 13 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
| 14 |
+
this list of conditions and the following disclaimer in the documentation
|
| 15 |
+
and/or other materials provided with the distribution.
|
| 16 |
+
* Neither the name of the University of Edinburgh nor the names of its contributors
|
| 17 |
+
may be used to endorse or promote products derived from this software
|
| 18 |
+
without specific prior written permission.
|
| 19 |
+
|
| 20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
| 22 |
+
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 23 |
+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
| 24 |
+
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| 25 |
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| 26 |
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| 27 |
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
| 28 |
+
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| 29 |
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 30 |
+
POSSIBILITY OF SUCH DAMAGE.
|
| 31 |
+
***********************************************************************/
|
| 32 |
+
|
| 33 |
+
#include <iostream>
|
| 34 |
+
#include <stack>
|
| 35 |
+
#include <boost/algorithm/string.hpp>
|
| 36 |
+
#include <boost/foreach.hpp>
|
| 37 |
+
|
| 38 |
+
#include "moses/Syntax/KBestExtractor.h"
|
| 39 |
+
#include "moses/Syntax/PVertex.h"
|
| 40 |
+
#include "moses/Syntax/S2T/DerivationWriter.h"
|
| 41 |
+
|
| 42 |
+
#include "moses/Hypothesis.h"
|
| 43 |
+
#include "moses/TrellisPathList.h"
|
| 44 |
+
#include "moses/StaticData.h"
|
| 45 |
+
#include "moses/InputFileStream.h"
|
| 46 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 47 |
+
#include "moses/TreeInput.h"
|
| 48 |
+
#include "moses/ForestInput.h"
|
| 49 |
+
#include "moses/ConfusionNet.h"
|
| 50 |
+
#include "moses/WordLattice.h"
|
| 51 |
+
#include "moses/ChartManager.h"
|
| 52 |
+
|
| 53 |
+
#include "IOWrapper.h"
|
| 54 |
+
|
| 55 |
+
#include <boost/filesystem.hpp>
|
| 56 |
+
#include <boost/iostreams/device/file.hpp>
|
| 57 |
+
#include <boost/iostreams/filter/bzip2.hpp>
|
| 58 |
+
#include <boost/iostreams/filter/gzip.hpp>
|
| 59 |
+
#include <boost/iostreams/filtering_stream.hpp>
|
| 60 |
+
|
| 61 |
+
using namespace std;
|
| 62 |
+
|
| 63 |
+
namespace Moses
|
| 64 |
+
{
|
| 65 |
+
|
| 66 |
+
IOWrapper::IOWrapper(AllOptions const& opts)
|
| 67 |
+
: m_options(new AllOptions(opts))
|
| 68 |
+
, m_nBestStream(NULL)
|
| 69 |
+
, m_surpressSingleBestOutput(false)
|
| 70 |
+
, m_look_ahead(0)
|
| 71 |
+
, m_look_back(0)
|
| 72 |
+
, m_buffered_ahead(0)
|
| 73 |
+
, spe_src(NULL)
|
| 74 |
+
, spe_trg(NULL)
|
| 75 |
+
, spe_aln(NULL)
|
| 76 |
+
{
|
| 77 |
+
const StaticData &staticData = StaticData::Instance();
|
| 78 |
+
Parameter const& P = staticData.GetParameter();
|
| 79 |
+
|
| 80 |
+
// context buffering for context-sensitive decoding
|
| 81 |
+
m_look_ahead = m_options->context.look_ahead;
|
| 82 |
+
m_look_back = m_options->context.look_back;
|
| 83 |
+
m_inputType = m_options->input.input_type;
|
| 84 |
+
|
| 85 |
+
UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
|
| 86 |
+
"Context-sensitive decoding currently works only with sentence input.");
|
| 87 |
+
|
| 88 |
+
m_currentLine = m_options->output.start_translation_id;
|
| 89 |
+
m_inputFactorOrder = &m_options->input.factor_order;
|
| 90 |
+
|
| 91 |
+
size_t nBestSize = m_options->nbest.nbest_size;
|
| 92 |
+
string nBestFilePath = m_options->nbest.output_file_path;
|
| 93 |
+
|
| 94 |
+
staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
|
| 95 |
+
if (m_inputFilePath.empty()) {
|
| 96 |
+
m_inputFile = NULL;
|
| 97 |
+
m_inputStream = &cin;
|
| 98 |
+
} else {
|
| 99 |
+
VERBOSE(2,"IO from File" << endl);
|
| 100 |
+
m_inputFile = new InputFileStream(m_inputFilePath);
|
| 101 |
+
m_inputStream = m_inputFile;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
if (nBestSize > 0) {
|
| 105 |
+
m_nBestOutputCollector.reset(new Moses::OutputCollector(nBestFilePath));
|
| 106 |
+
if (m_nBestOutputCollector->OutputIsCout()) {
|
| 107 |
+
m_surpressSingleBestOutput = true;
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
std::string path;
|
| 112 |
+
P.SetParameter<std::string>(path, "output-search-graph-extended", "");
|
| 113 |
+
if (!path.size()) P.SetParameter<std::string>(path, "output-search-graph", "");
|
| 114 |
+
if (path.size()) m_searchGraphOutputCollector.reset(new OutputCollector(path));
|
| 115 |
+
|
| 116 |
+
P.SetParameter<std::string>(path, "output-unknowns", "");
|
| 117 |
+
if (path.size()) m_unknownsCollector.reset(new OutputCollector(path));
|
| 118 |
+
|
| 119 |
+
P.SetParameter<std::string>(path, "alignment-output-file", "");
|
| 120 |
+
if (path.size()) m_alignmentInfoCollector.reset(new OutputCollector(path));
|
| 121 |
+
|
| 122 |
+
P.SetParameter<string>(path, "translation-details", "");
|
| 123 |
+
if (path.size()) m_detailedTranslationCollector.reset(new OutputCollector(path));
|
| 124 |
+
|
| 125 |
+
P.SetParameter<string>(path, "tree-translation-details", "");
|
| 126 |
+
if (path.size()) m_detailTreeFragmentsOutputCollector.reset(new OutputCollector(path));
|
| 127 |
+
|
| 128 |
+
P.SetParameter<string>(path, "output-word-graph", "");
|
| 129 |
+
if (path.size()) m_wordGraphCollector.reset(new OutputCollector(path));
|
| 130 |
+
|
| 131 |
+
size_t latticeSamplesSize = staticData.options()->output.lattice_sample_size;
|
| 132 |
+
string latticeSamplesFile = staticData.options()->output.lattice_sample_filepath;
|
| 133 |
+
if (latticeSamplesSize) {
|
| 134 |
+
m_latticeSamplesCollector.reset(new OutputCollector(latticeSamplesFile));
|
| 135 |
+
if (m_latticeSamplesCollector->OutputIsCout()) {
|
| 136 |
+
m_surpressSingleBestOutput = true;
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
if (!m_surpressSingleBestOutput) {
|
| 141 |
+
m_singleBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
// setup file pattern for hypergraph output
|
| 145 |
+
char const* key = "output-search-graph-hypergraph";
|
| 146 |
+
PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
|
| 147 |
+
std::string& fmt = m_hypergraph_output_filepattern;
|
| 148 |
+
// first, determine the output directory
|
| 149 |
+
if (p && p->size() > 2) fmt = p->at(2);
|
| 150 |
+
else if (nBestFilePath.size() && nBestFilePath != "-" &&
|
| 151 |
+
! boost::starts_with(nBestFilePath, "/dev/stdout")) {
|
| 152 |
+
fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
|
| 153 |
+
if (fmt.empty()) fmt = ".";
|
| 154 |
+
} else fmt = boost::filesystem::current_path().string() + "/hypergraph";
|
| 155 |
+
if (*fmt.rbegin() != '/') fmt += "/";
|
| 156 |
+
std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt"));
|
| 157 |
+
UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2",
|
| 158 |
+
"Unknown compression type '" << extension
|
| 159 |
+
<< "' for hypergraph output!");
|
| 160 |
+
fmt += string("%d.") + extension;
|
| 161 |
+
|
| 162 |
+
// input streams for simulated post-editing
|
| 163 |
+
if (staticData.GetParameter().GetParam("spe-src")) {
|
| 164 |
+
spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
|
| 165 |
+
spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
|
| 166 |
+
spe_aln = new ifstream(staticData.GetParameter().GetParam("spe-aln")->at(0).c_str());
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
IOWrapper::~IOWrapper()
|
| 171 |
+
{
|
| 172 |
+
if (m_inputFile != NULL)
|
| 173 |
+
delete m_inputFile;
|
| 174 |
+
// if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
|
| 175 |
+
// outputting n-best to file, rather than stdout. need to close file and delete obj
|
| 176 |
+
// delete m_nBestStream;
|
| 177 |
+
// }
|
| 178 |
+
|
| 179 |
+
// delete m_detailedTranslationReportingStream;
|
| 180 |
+
// delete m_alignmentInfoStream;
|
| 181 |
+
// delete m_unknownsStream;
|
| 182 |
+
// delete m_outputSearchGraphStream;
|
| 183 |
+
// delete m_outputWordGraphStream;
|
| 184 |
+
// delete m_latticeSamplesStream;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
// InputType*
|
| 188 |
+
// IOWrapper::
|
| 189 |
+
// GetInput(InputType* inputType)
|
| 190 |
+
// {
|
| 191 |
+
// if(inputType->Read(*m_inputStream, *m_inputFactorOrder)) {
|
| 192 |
+
// return inputType;
|
| 193 |
+
// } else {
|
| 194 |
+
// delete inputType;
|
| 195 |
+
// return NULL;
|
| 196 |
+
// }
|
| 197 |
+
// }
|
| 198 |
+
|
| 199 |
+
boost::shared_ptr<InputType>
|
| 200 |
+
IOWrapper::
|
| 201 |
+
GetBufferedInput()
|
| 202 |
+
{
|
| 203 |
+
switch(m_inputType) {
|
| 204 |
+
case SentenceInput:
|
| 205 |
+
return BufferInput<Sentence>();
|
| 206 |
+
case ConfusionNetworkInput:
|
| 207 |
+
return BufferInput<ConfusionNet>();
|
| 208 |
+
case WordLatticeInput:
|
| 209 |
+
return BufferInput<WordLattice>();
|
| 210 |
+
case TreeInputType:
|
| 211 |
+
return BufferInput<TreeInput>();
|
| 212 |
+
case TabbedSentenceInput:
|
| 213 |
+
return BufferInput<TabbedSentence>();
|
| 214 |
+
case ForestInputType:
|
| 215 |
+
return BufferInput<ForestInput>();
|
| 216 |
+
default:
|
| 217 |
+
TRACE_ERR("Unknown input type: " << m_inputType << "\n");
|
| 218 |
+
return boost::shared_ptr<InputType>();
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
boost::shared_ptr<InputType>
|
| 224 |
+
IOWrapper::
|
| 225 |
+
ReadInput(boost::shared_ptr<std::vector<std::string> >* cw)
|
| 226 |
+
{
|
| 227 |
+
#ifdef WITH_THREADS
|
| 228 |
+
boost::lock_guard<boost::mutex> lock(m_lock);
|
| 229 |
+
#endif
|
| 230 |
+
boost::shared_ptr<InputType> source = GetBufferedInput();
|
| 231 |
+
if (source) {
|
| 232 |
+
source->SetTranslationId(m_currentLine++);
|
| 233 |
+
|
| 234 |
+
// when using a sliding context window, remove obsolete past input from buffer:
|
| 235 |
+
if (m_past_input.size() && m_look_back != std::numeric_limits<size_t>::max()) {
|
| 236 |
+
list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
|
| 237 |
+
for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();)
|
| 238 |
+
cnt += (*m)->GetSize();
|
| 239 |
+
while (m_past_input.begin() != m) m_past_input.pop_front();
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
if (m_look_back)
|
| 243 |
+
m_past_input.push_back(source);
|
| 244 |
+
}
|
| 245 |
+
if (cw) *cw = GetCurrentContextWindow();
|
| 246 |
+
return source;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
boost::shared_ptr<std::vector<std::string> >
|
| 250 |
+
IOWrapper::
|
| 251 |
+
GetCurrentContextWindow() const
|
| 252 |
+
{
|
| 253 |
+
boost::shared_ptr<std::vector<string> > context(new std::vector<string>);
|
| 254 |
+
BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_past_input)
|
| 255 |
+
context->push_back(i->ToString());
|
| 256 |
+
BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_future_input)
|
| 257 |
+
context->push_back(i->ToString());
|
| 258 |
+
return context;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
std::string
|
| 264 |
+
IOWrapper::
|
| 265 |
+
GetHypergraphOutputFileName(size_t const id) const
|
| 266 |
+
{
|
| 267 |
+
return str(boost::format(m_hypergraph_output_filepattern) % id);
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
} // namespace
|
| 272 |
+
|
mosesdecoder/moses/InputFileStream.h
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_InputFileStream_h
|
| 23 |
+
#define moses_InputFileStream_h
|
| 24 |
+
|
| 25 |
+
#include <cstdlib>
|
| 26 |
+
#include <fstream>
|
| 27 |
+
#include <string>
|
| 28 |
+
|
| 29 |
+
namespace Moses
|
| 30 |
+
{
|
| 31 |
+
|
| 32 |
+
/** Used in place of std::istream, can read zipped files if it ends in .gz
|
| 33 |
+
*/
|
| 34 |
+
class InputFileStream : public std::istream
|
| 35 |
+
{
|
| 36 |
+
protected:
|
| 37 |
+
std::streambuf *m_streambuf;
|
| 38 |
+
public:
|
| 39 |
+
|
| 40 |
+
explicit InputFileStream(const std::string &filePath);
|
| 41 |
+
~InputFileStream();
|
| 42 |
+
|
| 43 |
+
void Close();
|
| 44 |
+
};
|
| 45 |
+
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
#endif
|
mosesdecoder/moses/LatticeMBR.cpp
ADDED
|
@@ -0,0 +1,680 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* LatticeMBR.cpp
|
| 3 |
+
* moses-cmd
|
| 4 |
+
*
|
| 5 |
+
* Created by Abhishek Arun on 26/01/2010.
|
| 6 |
+
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
| 7 |
+
*
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
#include "LatticeMBR.h"
|
| 11 |
+
#include "moses/StaticData.h"
|
| 12 |
+
#include <algorithm>
|
| 13 |
+
#include <set>
|
| 14 |
+
|
| 15 |
+
using namespace std;
|
| 16 |
+
|
| 17 |
+
namespace Moses
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
size_t bleu_order = 4;
|
| 21 |
+
float UNKNGRAMLOGPROB = -20;
|
| 22 |
+
void GetOutputWords(const TrellisPath &path, vector <Word> &translation)
|
| 23 |
+
{
|
| 24 |
+
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
| 25 |
+
|
| 26 |
+
// print the surface factor of the translation
|
| 27 |
+
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
| 28 |
+
const Hypothesis &edge = *edges[currEdge];
|
| 29 |
+
const Phrase &phrase = edge.GetCurrTargetPhrase();
|
| 30 |
+
size_t size = phrase.GetSize();
|
| 31 |
+
for (size_t pos = 0 ; pos < size ; pos++) {
|
| 32 |
+
translation.push_back(phrase.GetWord(pos));
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
void extract_ngrams(const vector<Word >& sentence, map < Phrase, int > & allngrams)
|
| 39 |
+
{
|
| 40 |
+
for (int k = 0; k < (int)bleu_order; k++) {
|
| 41 |
+
for(int i =0; i < max((int)sentence.size()-k,0); i++) {
|
| 42 |
+
Phrase ngram( k+1);
|
| 43 |
+
for ( int j = i; j<= i+k; j++) {
|
| 44 |
+
ngram.AddWord(sentence[j]);
|
| 45 |
+
}
|
| 46 |
+
++allngrams[ngram];
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
void NgramScores::addScore(const Hypothesis* node, const Phrase& ngram, float score)
|
| 54 |
+
{
|
| 55 |
+
set<Phrase>::const_iterator ngramIter = m_ngrams.find(ngram);
|
| 56 |
+
if (ngramIter == m_ngrams.end()) {
|
| 57 |
+
ngramIter = m_ngrams.insert(ngram).first;
|
| 58 |
+
}
|
| 59 |
+
map<const Phrase*,float>& ngramScores = m_scores[node];
|
| 60 |
+
map<const Phrase*,float>::iterator scoreIter = ngramScores.find(&(*ngramIter));
|
| 61 |
+
if (scoreIter == ngramScores.end()) {
|
| 62 |
+
ngramScores[&(*ngramIter)] = score;
|
| 63 |
+
} else {
|
| 64 |
+
ngramScores[&(*ngramIter)] = log_sum(score,scoreIter->second);
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
NgramScores::NodeScoreIterator NgramScores::nodeBegin(const Hypothesis* node)
|
| 69 |
+
{
|
| 70 |
+
return m_scores[node].begin();
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
NgramScores::NodeScoreIterator NgramScores::nodeEnd(const Hypothesis* node)
|
| 75 |
+
{
|
| 76 |
+
return m_scores[node].end();
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
LatticeMBRSolution::LatticeMBRSolution(const TrellisPath& path, bool isMap) :
|
| 80 |
+
m_score(0.0f)
|
| 81 |
+
{
|
| 82 |
+
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
| 83 |
+
|
| 84 |
+
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
|
| 85 |
+
const Hypothesis &edge = *edges[currEdge];
|
| 86 |
+
const Phrase &phrase = edge.GetCurrTargetPhrase();
|
| 87 |
+
size_t size = phrase.GetSize();
|
| 88 |
+
for (size_t pos = 0 ; pos < size ; pos++) {
|
| 89 |
+
m_words.push_back(phrase.GetWord(pos));
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
if (isMap) {
|
| 93 |
+
m_mapScore = path.GetFutureScore();
|
| 94 |
+
} else {
|
| 95 |
+
m_mapScore = 0;
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
void LatticeMBRSolution::CalcScore(map<Phrase, float>& finalNgramScores, const vector<float>& thetas, float mapWeight)
|
| 101 |
+
{
|
| 102 |
+
m_ngramScores.assign(thetas.size()-1, -10000);
|
| 103 |
+
|
| 104 |
+
map < Phrase, int > counts;
|
| 105 |
+
extract_ngrams(m_words,counts);
|
| 106 |
+
|
| 107 |
+
//Now score this translation
|
| 108 |
+
m_score = thetas[0] * m_words.size();
|
| 109 |
+
|
| 110 |
+
//Calculate the ngramScores, working in log space at first
|
| 111 |
+
for (map < Phrase, int >::iterator ngrams = counts.begin(); ngrams != counts.end(); ++ngrams) {
|
| 112 |
+
float ngramPosterior = UNKNGRAMLOGPROB;
|
| 113 |
+
map<Phrase,float>::const_iterator ngramPosteriorIt = finalNgramScores.find(ngrams->first);
|
| 114 |
+
if (ngramPosteriorIt != finalNgramScores.end()) {
|
| 115 |
+
ngramPosterior = ngramPosteriorIt->second;
|
| 116 |
+
}
|
| 117 |
+
size_t ngramSize = ngrams->first.GetSize();
|
| 118 |
+
m_ngramScores[ngramSize-1] = log_sum(log((float)ngrams->second) + ngramPosterior,m_ngramScores[ngramSize-1]);
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
//convert from log to probability and create weighted sum
|
| 122 |
+
for (size_t i = 0; i < m_ngramScores.size(); ++i) {
|
| 123 |
+
m_ngramScores[i] = exp(m_ngramScores[i]);
|
| 124 |
+
m_score += thetas[i+1] * m_ngramScores[i];
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
//The map score
|
| 129 |
+
m_score += m_mapScore*mapWeight;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
void pruneLatticeFB(Lattice & connectedHyp, map < const Hypothesis*, set <const Hypothesis* > > & outgoingHyps, map<const Hypothesis*, vector<Edge> >& incomingEdges,
|
| 134 |
+
const vector< float> & estimatedScores, const Hypothesis* bestHypo, size_t edgeDensity, float scale)
|
| 135 |
+
{
|
| 136 |
+
|
| 137 |
+
//Need hyp 0 in connectedHyp - Find empty hypothesis
|
| 138 |
+
VERBOSE(2,"Pruning lattice to edge density " << edgeDensity << endl);
|
| 139 |
+
const Hypothesis* emptyHyp = connectedHyp.at(0);
|
| 140 |
+
while (emptyHyp->GetId() != 0) {
|
| 141 |
+
emptyHyp = emptyHyp->GetPrevHypo();
|
| 142 |
+
}
|
| 143 |
+
connectedHyp.push_back(emptyHyp); //Add it to list of hyps
|
| 144 |
+
|
| 145 |
+
//Need hyp 0's outgoing Hyps
|
| 146 |
+
for (size_t i = 0; i < connectedHyp.size(); ++i) {
|
| 147 |
+
if (connectedHyp[i]->GetId() > 0 && connectedHyp[i]->GetPrevHypo()->GetId() == 0)
|
| 148 |
+
outgoingHyps[emptyHyp].insert(connectedHyp[i]);
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
//sort hyps based on estimated scores - do so by copying to multimap
|
| 152 |
+
multimap<float, const Hypothesis*> sortHypsByVal;
|
| 153 |
+
for (size_t i =0; i < estimatedScores.size(); ++i) {
|
| 154 |
+
sortHypsByVal.insert(make_pair(estimatedScores[i], connectedHyp[i]));
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end();
|
| 158 |
+
float bestScore = it->first;
|
| 159 |
+
//store best score as score of hyp 0
|
| 160 |
+
sortHypsByVal.insert(make_pair(bestScore, emptyHyp));
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
IFVERBOSE(3) {
|
| 164 |
+
for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
|
| 165 |
+
const Hypothesis* currHyp = it->second;
|
| 166 |
+
cerr << "Hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl;
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
set <const Hypothesis*> survivingHyps; //store hyps that make the cut in this
|
| 172 |
+
|
| 173 |
+
VERBOSE(2, "BEST HYPO TARGET LENGTH : " << bestHypo->GetSize() << endl)
|
| 174 |
+
size_t numEdgesTotal = edgeDensity * bestHypo->GetSize(); //as per Shankar, aim for (density * target length of MAP solution) arcs
|
| 175 |
+
size_t numEdgesCreated = 0;
|
| 176 |
+
VERBOSE(2, "Target edge count: " << numEdgesTotal << endl);
|
| 177 |
+
|
| 178 |
+
float prevScore = -999999;
|
| 179 |
+
|
| 180 |
+
//now iterate over multimap
|
| 181 |
+
for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
|
| 182 |
+
float currEstimatedScore = it->first;
|
| 183 |
+
const Hypothesis* currHyp = it->second;
|
| 184 |
+
|
| 185 |
+
if (numEdgesCreated >= numEdgesTotal && prevScore > currEstimatedScore) //if this hyp has equal estimated score to previous, include its edges too
|
| 186 |
+
break;
|
| 187 |
+
|
| 188 |
+
prevScore = currEstimatedScore;
|
| 189 |
+
VERBOSE(3, "Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
|
| 190 |
+
VERBOSE(3, "Considering hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl)
|
| 191 |
+
|
| 192 |
+
survivingHyps.insert(currHyp); //CurrHyp made the cut
|
| 193 |
+
|
| 194 |
+
// is its best predecessor already included ?
|
| 195 |
+
if (survivingHyps.find(currHyp->GetPrevHypo()) != survivingHyps.end()) { //yes, then add an edge
|
| 196 |
+
vector <Edge>& edges = incomingEdges[currHyp];
|
| 197 |
+
Edge winningEdge(currHyp->GetPrevHypo(),currHyp,scale*(currHyp->GetScore() - currHyp->GetPrevHypo()->GetScore()),currHyp->GetCurrTargetPhrase());
|
| 198 |
+
edges.push_back(winningEdge);
|
| 199 |
+
++numEdgesCreated;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
//let's try the arcs too
|
| 203 |
+
const ArcList *arcList = currHyp->GetArcList();
|
| 204 |
+
if (arcList != NULL) {
|
| 205 |
+
ArcList::const_iterator iterArcList;
|
| 206 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 207 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 208 |
+
const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
|
| 209 |
+
if (survivingHyps.find(loserPrevHypo) != survivingHyps.end()) { //found it, add edge
|
| 210 |
+
double arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
|
| 211 |
+
Edge losingEdge(loserPrevHypo, currHyp, arcScore*scale, loserHypo->GetCurrTargetPhrase());
|
| 212 |
+
vector <Edge>& edges = incomingEdges[currHyp];
|
| 213 |
+
edges.push_back(losingEdge);
|
| 214 |
+
++numEdgesCreated;
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
//Now if a successor node has already been visited, add an edge connecting the two
|
| 220 |
+
map < const Hypothesis*, set < const Hypothesis* > >::const_iterator outgoingIt = outgoingHyps.find(currHyp);
|
| 221 |
+
|
| 222 |
+
if (outgoingIt != outgoingHyps.end()) {//currHyp does have successors
|
| 223 |
+
const set<const Hypothesis*> & outHyps = outgoingIt->second; //the successors
|
| 224 |
+
for (set<const Hypothesis*>::const_iterator outHypIts = outHyps.begin(); outHypIts != outHyps.end(); ++outHypIts) {
|
| 225 |
+
const Hypothesis* succHyp = *outHypIts;
|
| 226 |
+
|
| 227 |
+
if (survivingHyps.find(succHyp) == survivingHyps.end()) //Have we encountered the successor yet?
|
| 228 |
+
continue; //No, move on to next
|
| 229 |
+
|
| 230 |
+
//Curr Hyp can be : a) the best predecessor of succ b) or an arc attached to succ
|
| 231 |
+
if (succHyp->GetPrevHypo() == currHyp) { //best predecessor
|
| 232 |
+
vector <Edge>& succEdges = incomingEdges[succHyp];
|
| 233 |
+
Edge succWinningEdge(currHyp, succHyp, scale*(succHyp->GetScore() - currHyp->GetScore()), succHyp->GetCurrTargetPhrase());
|
| 234 |
+
succEdges.push_back(succWinningEdge);
|
| 235 |
+
survivingHyps.insert(succHyp);
|
| 236 |
+
++numEdgesCreated;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
//now, let's find an arc
|
| 240 |
+
const ArcList *arcList = succHyp->GetArcList();
|
| 241 |
+
if (arcList != NULL) {
|
| 242 |
+
ArcList::const_iterator iterArcList;
|
| 243 |
+
//QUESTION: What happens if there's more than one loserPrevHypo?
|
| 244 |
+
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
|
| 245 |
+
const Hypothesis *loserHypo = *iterArcList;
|
| 246 |
+
const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
|
| 247 |
+
if (loserPrevHypo == currHyp) { //found it
|
| 248 |
+
vector <Edge>& succEdges = incomingEdges[succHyp];
|
| 249 |
+
double arcScore = loserHypo->GetScore() - currHyp->GetScore();
|
| 250 |
+
Edge losingEdge(currHyp, succHyp,scale* arcScore, loserHypo->GetCurrTargetPhrase());
|
| 251 |
+
succEdges.push_back(losingEdge);
|
| 252 |
+
++numEdgesCreated;
|
| 253 |
+
}
|
| 254 |
+
}
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
connectedHyp.clear();
|
| 261 |
+
for (set <const Hypothesis*>::iterator it = survivingHyps.begin(); it != survivingHyps.end(); ++it) {
|
| 262 |
+
connectedHyp.push_back(*it);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
VERBOSE(2, "Done! Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
|
| 266 |
+
|
| 267 |
+
IFVERBOSE(3) {
|
| 268 |
+
cerr << "Surviving hyps: " ;
|
| 269 |
+
for (set <const Hypothesis*>::iterator it = survivingHyps.begin(); it != survivingHyps.end(); ++it) {
|
| 270 |
+
cerr << (*it)->GetId() << " ";
|
| 271 |
+
}
|
| 272 |
+
cerr << endl;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
void calcNgramExpectations(Lattice & connectedHyp, map<const Hypothesis*, vector<Edge> >& incomingEdges,
|
| 279 |
+
map<Phrase, float>& finalNgramScores, bool posteriors)
|
| 280 |
+
{
|
| 281 |
+
|
| 282 |
+
sort(connectedHyp.begin(),connectedHyp.end(),ascendingCoverageCmp); //sort by increasing source word cov
|
| 283 |
+
|
| 284 |
+
/*cerr << "Lattice:" << endl;
|
| 285 |
+
for (Lattice::const_iterator i = connectedHyp.begin(); i != connectedHyp.end(); ++i) {
|
| 286 |
+
const Hypothesis* h = *i;
|
| 287 |
+
cerr << *h << endl;
|
| 288 |
+
const vector<Edge>& edges = incomingEdges[h];
|
| 289 |
+
for (size_t e = 0; e < edges.size(); ++e) {
|
| 290 |
+
cerr << edges[e];
|
| 291 |
+
}
|
| 292 |
+
}*/
|
| 293 |
+
|
| 294 |
+
map<const Hypothesis*, float> forwardScore;
|
| 295 |
+
forwardScore[connectedHyp[0]] = 0.0f; //forward score of hyp 0 is 1 (or 0 in logprob space)
|
| 296 |
+
set< const Hypothesis *> finalHyps; //store completed hyps
|
| 297 |
+
|
| 298 |
+
NgramScores ngramScores;//ngram scores for each hyp
|
| 299 |
+
|
| 300 |
+
for (size_t i = 1; i < connectedHyp.size(); ++i) {
|
| 301 |
+
const Hypothesis* currHyp = connectedHyp[i];
|
| 302 |
+
if (currHyp->GetWordsBitmap().IsComplete()) {
|
| 303 |
+
finalHyps.insert(currHyp);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
VERBOSE(3, "Processing hyp: " << currHyp->GetId() << ", num words cov= " << currHyp->GetWordsBitmap().GetNumWordsCovered() << endl)
|
| 307 |
+
|
| 308 |
+
vector <Edge> & edges = incomingEdges[currHyp];
|
| 309 |
+
for (size_t e = 0; e < edges.size(); ++e) {
|
| 310 |
+
const Edge& edge = edges[e];
|
| 311 |
+
if (forwardScore.find(currHyp) == forwardScore.end()) {
|
| 312 |
+
forwardScore[currHyp] = forwardScore[edge.GetTailNode()] + edge.GetScore();
|
| 313 |
+
VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] = fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
|
| 314 |
+
} else {
|
| 315 |
+
forwardScore[currHyp] = log_sum(forwardScore[currHyp], forwardScore[edge.GetTailNode()] + edge.GetScore());
|
| 316 |
+
VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] += fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
|
| 317 |
+
}
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
//Process ngrams now
|
| 321 |
+
for (size_t j =0 ; j < edges.size(); ++j) {
|
| 322 |
+
Edge& edge = edges[j];
|
| 323 |
+
const NgramHistory & incomingPhrases = edge.GetNgrams(incomingEdges);
|
| 324 |
+
|
| 325 |
+
//let's first score ngrams introduced by this edge
|
| 326 |
+
for (NgramHistory::const_iterator it = incomingPhrases.begin(); it != incomingPhrases.end(); ++it) {
|
| 327 |
+
const Phrase& ngram = it->first;
|
| 328 |
+
const PathCounts& pathCounts = it->second;
|
| 329 |
+
VERBOSE(4, "Calculating score for: " << it->first << endl)
|
| 330 |
+
|
| 331 |
+
for (PathCounts::const_iterator pathCountIt = pathCounts.begin(); pathCountIt != pathCounts.end(); ++pathCountIt) {
|
| 332 |
+
//Score of an n-gram is forward score of head node of leftmost edge + all edge scores
|
| 333 |
+
const Path& path = pathCountIt->first;
|
| 334 |
+
//cerr << "path count for " << ngram << " is " << pathCountIt->second << endl;
|
| 335 |
+
float score = forwardScore[path[0]->GetTailNode()];
|
| 336 |
+
for (size_t i = 0; i < path.size(); ++i) {
|
| 337 |
+
score += path[i]->GetScore();
|
| 338 |
+
}
|
| 339 |
+
//if we're doing expectations, then the number of times the ngram
|
| 340 |
+
//appears on the path is relevant.
|
| 341 |
+
size_t count = posteriors ? 1 : pathCountIt->second;
|
| 342 |
+
for (size_t k = 0; k < count; ++k) {
|
| 343 |
+
ngramScores.addScore(currHyp,ngram,score);
|
| 344 |
+
}
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
//Now score ngrams that are just being propagated from the history
|
| 349 |
+
for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(edge.GetTailNode());
|
| 350 |
+
it != ngramScores.nodeEnd(edge.GetTailNode()); ++it) {
|
| 351 |
+
const Phrase & currNgram = *(it->first);
|
| 352 |
+
float currNgramScore = it->second;
|
| 353 |
+
VERBOSE(4, "Calculating score for: " << currNgram << endl)
|
| 354 |
+
|
| 355 |
+
// For posteriors, don't double count ngrams
|
| 356 |
+
if (!posteriors || incomingPhrases.find(currNgram) == incomingPhrases.end()) {
|
| 357 |
+
float score = edge.GetScore() + currNgramScore;
|
| 358 |
+
ngramScores.addScore(currHyp,currNgram,score);
|
| 359 |
+
}
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
}
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
float Z = 9999999; //the total score of the lattice
|
| 366 |
+
|
| 367 |
+
//Done - Print out ngram posteriors for final hyps
|
| 368 |
+
for (set< const Hypothesis *>::iterator finalHyp = finalHyps.begin(); finalHyp != finalHyps.end(); ++finalHyp) {
|
| 369 |
+
const Hypothesis* hyp = *finalHyp;
|
| 370 |
+
|
| 371 |
+
for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(hyp); it != ngramScores.nodeEnd(hyp); ++it) {
|
| 372 |
+
const Phrase& ngram = *(it->first);
|
| 373 |
+
if (finalNgramScores.find(ngram) == finalNgramScores.end()) {
|
| 374 |
+
finalNgramScores[ngram] = it->second;
|
| 375 |
+
} else {
|
| 376 |
+
finalNgramScores[ngram] = log_sum(it->second, finalNgramScores[ngram]);
|
| 377 |
+
}
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
if (Z == 9999999) {
|
| 381 |
+
Z = forwardScore[hyp];
|
| 382 |
+
} else {
|
| 383 |
+
Z = log_sum(Z, forwardScore[hyp]);
|
| 384 |
+
}
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
//Z *= scale; //scale the score
|
| 388 |
+
|
| 389 |
+
for (map<Phrase, float>::iterator finalScoresIt = finalNgramScores.begin(); finalScoresIt != finalNgramScores.end(); ++finalScoresIt) {
|
| 390 |
+
finalScoresIt->second = finalScoresIt->second - Z;
|
| 391 |
+
IFVERBOSE(2) {
|
| 392 |
+
VERBOSE(2,finalScoresIt->first << " [" << finalScoresIt->second << "]" << endl);
|
| 393 |
+
}
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & incomingEdges)
|
| 399 |
+
{
|
| 400 |
+
|
| 401 |
+
if (m_ngrams.size() > 0)
|
| 402 |
+
return m_ngrams;
|
| 403 |
+
|
| 404 |
+
const Phrase& currPhrase = GetWords();
|
| 405 |
+
//Extract the n-grams local to this edge
|
| 406 |
+
for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
|
| 407 |
+
for (size_t end = start; end < start + bleu_order; ++end) {
|
| 408 |
+
if (end < currPhrase.GetSize()) {
|
| 409 |
+
Phrase edgeNgram(end-start+1);
|
| 410 |
+
for (size_t index = start; index <= end; ++index) {
|
| 411 |
+
edgeNgram.AddWord(currPhrase.GetWord(index));
|
| 412 |
+
}
|
| 413 |
+
//cout << "Inserting Phrase : " << edgeNgram << endl;
|
| 414 |
+
vector<const Edge*> edgeHistory;
|
| 415 |
+
edgeHistory.push_back(this);
|
| 416 |
+
storeNgramHistory(edgeNgram, edgeHistory);
|
| 417 |
+
} else {
|
| 418 |
+
break;
|
| 419 |
+
}
|
| 420 |
+
}
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
map<const Hypothesis*, vector<Edge> >::iterator it = incomingEdges.find(m_tailNode);
|
| 424 |
+
if (it != incomingEdges.end()) { //node has incoming edges
|
| 425 |
+
vector<Edge> & inEdges = it->second;
|
| 426 |
+
|
| 427 |
+
for (vector<Edge>::iterator edge = inEdges.begin(); edge != inEdges.end(); ++edge) {//add the ngrams straddling prev and curr edge
|
| 428 |
+
const NgramHistory & edgeIncomingNgrams = edge->GetNgrams(incomingEdges);
|
| 429 |
+
for (NgramHistory::const_iterator edgeInNgramHist = edgeIncomingNgrams.begin(); edgeInNgramHist != edgeIncomingNgrams.end(); ++edgeInNgramHist) {
|
| 430 |
+
const Phrase& edgeIncomingNgram = edgeInNgramHist->first;
|
| 431 |
+
const PathCounts & edgeIncomingNgramPaths = edgeInNgramHist->second;
|
| 432 |
+
size_t back = min(edgeIncomingNgram.GetSize(), edge->GetWordsSize());
|
| 433 |
+
const Phrase& edgeWords = edge->GetWords();
|
| 434 |
+
IFVERBOSE(3) {
|
| 435 |
+
cerr << "Edge: "<< *edge <<endl;
|
| 436 |
+
cerr << "edgeWords: " << edgeWords << endl;
|
| 437 |
+
cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
Phrase edgeSuffix(ARRAY_SIZE_INCR);
|
| 441 |
+
Phrase ngramSuffix(ARRAY_SIZE_INCR);
|
| 442 |
+
GetPhraseSuffix(edgeWords,back,edgeSuffix);
|
| 443 |
+
GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);
|
| 444 |
+
|
| 445 |
+
if (ngramSuffix == edgeSuffix) { //we've got the suffix of previous edge
|
| 446 |
+
size_t edgeInNgramSize = edgeIncomingNgram.GetSize();
|
| 447 |
+
|
| 448 |
+
for (size_t i = 0; i < GetWordsSize() && i + edgeInNgramSize < bleu_order ; ++i) {
|
| 449 |
+
Phrase newNgram(edgeIncomingNgram);
|
| 450 |
+
for (size_t j = 0; j <= i ; ++j) {
|
| 451 |
+
newNgram.AddWord(GetWords().GetWord(j));
|
| 452 |
+
}
|
| 453 |
+
VERBOSE(3, "Inserting New Phrase : " << newNgram << endl)
|
| 454 |
+
|
| 455 |
+
for (PathCounts::const_iterator pathIt = edgeIncomingNgramPaths.begin(); pathIt != edgeIncomingNgramPaths.end(); ++pathIt) {
|
| 456 |
+
Path newNgramPath = pathIt->first;
|
| 457 |
+
newNgramPath.push_back(this);
|
| 458 |
+
storeNgramHistory(newNgram, newNgramPath, pathIt->second);
|
| 459 |
+
}
|
| 460 |
+
}
|
| 461 |
+
}
|
| 462 |
+
}
|
| 463 |
+
}
|
| 464 |
+
}
|
| 465 |
+
return m_ngrams;
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
//Add the last lastN words of origPhrase to targetPhrase
|
| 469 |
+
void Edge::GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const
|
| 470 |
+
{
|
| 471 |
+
size_t origSize = origPhrase.GetSize();
|
| 472 |
+
size_t startIndex = origSize - lastN;
|
| 473 |
+
for (size_t index = startIndex; index < origPhrase.GetSize(); ++index) {
|
| 474 |
+
targetPhrase.AddWord(origPhrase.GetWord(index));
|
| 475 |
+
}
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
bool Edge::operator< (const Edge& compare ) const
|
| 479 |
+
{
|
| 480 |
+
if (m_headNode->GetId() < compare.m_headNode->GetId())
|
| 481 |
+
return true;
|
| 482 |
+
if (compare.m_headNode->GetId() < m_headNode->GetId())
|
| 483 |
+
return false;
|
| 484 |
+
if (m_tailNode->GetId() < compare.m_tailNode->GetId())
|
| 485 |
+
return true;
|
| 486 |
+
if (compare.m_tailNode->GetId() < m_tailNode->GetId())
|
| 487 |
+
return false;
|
| 488 |
+
return GetScore() < compare.GetScore();
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
ostream& operator<< (ostream& out, const Edge& edge)
|
| 492 |
+
{
|
| 493 |
+
out << "Head: " << edge.m_headNode->GetId()
|
| 494 |
+
<< ", Tail: " << edge.m_tailNode->GetId()
|
| 495 |
+
<< ", Score: " << edge.m_score
|
| 496 |
+
<< ", Phrase: " << edge.m_targetPhrase << endl;
|
| 497 |
+
return out;
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
|
| 501 |
+
{
|
| 502 |
+
return (a->GetWordsBitmap().GetNumWordsCovered()
|
| 503 |
+
<
|
| 504 |
+
b->GetWordsBitmap().GetNumWordsCovered());
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList,
|
| 508 |
+
vector<LatticeMBRSolution>& solutions, size_t n)
|
| 509 |
+
{
|
| 510 |
+
std::map < int, bool > connected;
|
| 511 |
+
std::vector< const Hypothesis *> connectedList;
|
| 512 |
+
map<Phrase, float> ngramPosteriors;
|
| 513 |
+
std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
|
| 514 |
+
map<const Hypothesis*, vector<Edge> > incomingEdges;
|
| 515 |
+
vector< float> estimatedScores;
|
| 516 |
+
manager.GetForwardBackwardSearchGraph(&connected, &connectedList,
|
| 517 |
+
&outgoingHyps, &estimatedScores);
|
| 518 |
+
LMBR_Options const& lmbr = manager.options()->lmbr;
|
| 519 |
+
MBR_Options const& mbr = manager.options()->mbr;
|
| 520 |
+
pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores,
|
| 521 |
+
manager.GetBestHypothesis(), lmbr.pruning_factor, mbr.scale);
|
| 522 |
+
calcNgramExpectations(connectedList, incomingEdges, ngramPosteriors,true);
|
| 523 |
+
|
| 524 |
+
vector<float> mbrThetas = lmbr.theta;
|
| 525 |
+
float p = lmbr.precision;
|
| 526 |
+
float r = lmbr.ratio;
|
| 527 |
+
float mapWeight = lmbr.map_weight;
|
| 528 |
+
if (mbrThetas.size() == 0) {
|
| 529 |
+
// thetas were not specified on the command line, so use p and r instead
|
| 530 |
+
mbrThetas.push_back(-1); //Theta 0
|
| 531 |
+
mbrThetas.push_back(1/(bleu_order*p));
|
| 532 |
+
for (size_t i = 2; i <= bleu_order; ++i) {
|
| 533 |
+
mbrThetas.push_back(mbrThetas[i-1] / r);
|
| 534 |
+
}
|
| 535 |
+
}
|
| 536 |
+
IFVERBOSE(2) {
|
| 537 |
+
VERBOSE(2,"Thetas: ");
|
| 538 |
+
for (size_t i = 0; i < mbrThetas.size(); ++i) {
|
| 539 |
+
VERBOSE(2,mbrThetas[i] << " ");
|
| 540 |
+
}
|
| 541 |
+
VERBOSE(2,endl);
|
| 542 |
+
}
|
| 543 |
+
TrellisPathList::const_iterator iter;
|
| 544 |
+
size_t ctr = 0;
|
| 545 |
+
LatticeMBRSolutionComparator comparator;
|
| 546 |
+
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter, ++ctr) {
|
| 547 |
+
const TrellisPath &path = **iter;
|
| 548 |
+
solutions.push_back(LatticeMBRSolution(path,iter==nBestList.begin()));
|
| 549 |
+
solutions.back().CalcScore(ngramPosteriors, mbrThetas, mapWeight);
|
| 550 |
+
sort(solutions.begin(), solutions.end(), comparator);
|
| 551 |
+
while (solutions.size() > n) {
|
| 552 |
+
solutions.pop_back();
|
| 553 |
+
}
|
| 554 |
+
}
|
| 555 |
+
VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
vector<Word> doLatticeMBR(const Manager& manager, const TrellisPathList& nBestList)
|
| 559 |
+
{
|
| 560 |
+
|
| 561 |
+
vector<LatticeMBRSolution> solutions;
|
| 562 |
+
getLatticeMBRNBest(manager, nBestList, solutions,1);
|
| 563 |
+
return solutions.at(0).GetWords();
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathList& nBestList)
|
| 567 |
+
{
|
| 568 |
+
static const int BLEU_ORDER = 4;
|
| 569 |
+
static const float SMOOTH = 1;
|
| 570 |
+
|
| 571 |
+
//calculate the ngram expectations
|
| 572 |
+
const StaticData& staticData = StaticData::Instance();
|
| 573 |
+
std::map < int, bool > connected;
|
| 574 |
+
std::vector< const Hypothesis *> connectedList;
|
| 575 |
+
map<Phrase, float> ngramExpectations;
|
| 576 |
+
std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
|
| 577 |
+
map<const Hypothesis*, vector<Edge> > incomingEdges;
|
| 578 |
+
vector< float> estimatedScores;
|
| 579 |
+
manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores);
|
| 580 |
+
LMBR_Options const& lmbr = manager.options()->lmbr;
|
| 581 |
+
MBR_Options const& mbr = manager.options()->mbr;
|
| 582 |
+
pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores,
|
| 583 |
+
manager.GetBestHypothesis(), lmbr.pruning_factor, mbr.scale);
|
| 584 |
+
calcNgramExpectations(connectedList, incomingEdges, ngramExpectations,false);
|
| 585 |
+
|
| 586 |
+
//expected length is sum of expected unigram counts
|
| 587 |
+
//cerr << "Thread " << pthread_self() << " Ngram expectations size: " << ngramExpectations.size() << endl;
|
| 588 |
+
float ref_length = 0.0f;
|
| 589 |
+
for (map<Phrase,float>::const_iterator ref_iter = ngramExpectations.begin();
|
| 590 |
+
ref_iter != ngramExpectations.end(); ++ref_iter) {
|
| 591 |
+
//cerr << "Ngram: " << ref_iter->first << " score: " <<
|
| 592 |
+
// ref_iter->second << endl;
|
| 593 |
+
if (ref_iter->first.GetSize() == 1) {
|
| 594 |
+
ref_length += exp(ref_iter->second);
|
| 595 |
+
// cerr << "Expected for " << ref_iter->first << " is " << exp(ref_iter->second) << endl;
|
| 596 |
+
}
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
VERBOSE(2,"REF Length: " << ref_length << endl);
|
| 600 |
+
|
| 601 |
+
//use the ngram expectations to rescore the nbest list.
|
| 602 |
+
TrellisPathList::const_iterator iter;
|
| 603 |
+
TrellisPathList::const_iterator best = nBestList.end();
|
| 604 |
+
float bestScore = -100000;
|
| 605 |
+
//cerr << "nbest list size: " << nBestList.GetSize() << endl;
|
| 606 |
+
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
|
| 607 |
+
const TrellisPath &path = **iter;
|
| 608 |
+
vector<Word> words;
|
| 609 |
+
map<Phrase,int> ngrams;
|
| 610 |
+
GetOutputWords(path,words);
|
| 611 |
+
/*for (size_t i = 0; i < words.size(); ++i) {
|
| 612 |
+
cerr << words[i].GetFactor(0)->GetString() << " ";
|
| 613 |
+
}
|
| 614 |
+
cerr << endl;
|
| 615 |
+
*/
|
| 616 |
+
extract_ngrams(words,ngrams);
|
| 617 |
+
|
| 618 |
+
vector<float> comps(2*BLEU_ORDER+1);
|
| 619 |
+
float logbleu = 0.0;
|
| 620 |
+
float brevity = 0.0;
|
| 621 |
+
int hyp_length = words.size();
|
| 622 |
+
for (int i = 0; i < BLEU_ORDER; ++i) {
|
| 623 |
+
comps[2*i] = 0.0;
|
| 624 |
+
comps[2*i+1] = max(hyp_length-i,0);
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
for (map<Phrase,int>::const_iterator hyp_iter = ngrams.begin();
|
| 628 |
+
hyp_iter != ngrams.end(); ++hyp_iter) {
|
| 629 |
+
map<Phrase,float>::const_iterator ref_iter = ngramExpectations.find(hyp_iter->first);
|
| 630 |
+
if (ref_iter != ngramExpectations.end()) {
|
| 631 |
+
comps[2*(hyp_iter->first.GetSize()-1)] += min(exp(ref_iter->second), (float)(hyp_iter->second));
|
| 632 |
+
}
|
| 633 |
+
|
| 634 |
+
}
|
| 635 |
+
comps[comps.size()-1] = ref_length;
|
| 636 |
+
/*for (size_t i = 0; i < comps.size(); ++i) {
|
| 637 |
+
cerr << comps[i] << " ";
|
| 638 |
+
}
|
| 639 |
+
cerr << endl;
|
| 640 |
+
*/
|
| 641 |
+
|
| 642 |
+
float score = 0.0f;
|
| 643 |
+
if (comps[0] != 0) {
|
| 644 |
+
for (int i=0; i<BLEU_ORDER; i++) {
|
| 645 |
+
if ( i > 0 ) {
|
| 646 |
+
logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
|
| 647 |
+
} else {
|
| 648 |
+
logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
|
| 649 |
+
}
|
| 650 |
+
}
|
| 651 |
+
logbleu /= BLEU_ORDER;
|
| 652 |
+
brevity = 1.0-(float)comps[comps.size()-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
|
| 653 |
+
if (brevity < 0.0) {
|
| 654 |
+
logbleu += brevity;
|
| 655 |
+
}
|
| 656 |
+
score = exp(logbleu);
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
//cerr << "score: " << score << " bestScore: " << bestScore << endl;
|
| 660 |
+
if (score > bestScore) {
|
| 661 |
+
bestScore = score;
|
| 662 |
+
best = iter;
|
| 663 |
+
VERBOSE(2,"NEW BEST: " << score << endl);
|
| 664 |
+
//for (size_t i = 0; i < comps.size(); ++i) {
|
| 665 |
+
// cerr << comps[i] << " ";
|
| 666 |
+
//}
|
| 667 |
+
//cerr << endl;
|
| 668 |
+
}
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
assert (best != nBestList.end());
|
| 672 |
+
return **best;
|
| 673 |
+
//vector<Word> bestWords;
|
| 674 |
+
//GetOutputWords(**best,bestWords);
|
| 675 |
+
//return bestWords;
|
| 676 |
+
}
|
| 677 |
+
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
|
mosesdecoder/moses/NonTerminal.cpp
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
#include "NonTerminal.h"
|
| 3 |
+
|
| 4 |
+
using namespace std;
|
| 5 |
+
|
| 6 |
+
namespace Moses
|
| 7 |
+
{
|
| 8 |
+
std::ostream& operator<<(std::ostream &out, const NonTerminalSet &obj)
|
| 9 |
+
{
|
| 10 |
+
NonTerminalSet::const_iterator iter;
|
| 11 |
+
for (iter = obj.begin(); iter != obj.end(); ++iter) {
|
| 12 |
+
const Word &word = *iter;
|
| 13 |
+
out << word << " ";
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
return out;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
}
|
mosesdecoder/moses/PartialTranslOptColl.cpp
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "PartialTranslOptColl.h"
|
| 23 |
+
#include <algorithm>
|
| 24 |
+
#include <iostream>
|
| 25 |
+
|
| 26 |
+
using namespace std;
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
/** constructor, intializes counters and thresholds */
|
| 31 |
+
PartialTranslOptColl::PartialTranslOptColl(size_t const maxSize)
|
| 32 |
+
{
|
| 33 |
+
m_bestScore = -std::numeric_limits<float>::infinity();
|
| 34 |
+
m_worstScore = -std::numeric_limits<float>::infinity();
|
| 35 |
+
m_maxSize = maxSize; // StaticData::Instance().GetMaxNoPartTransOpt();
|
| 36 |
+
m_totalPruned = 0;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
/** add a partial translation option to the collection (without pruning) */
|
| 41 |
+
void PartialTranslOptColl::AddNoPrune(TranslationOption *partialTranslOpt)
|
| 42 |
+
{
|
| 43 |
+
if (partialTranslOpt->GetFutureScore() >= m_worstScore) {
|
| 44 |
+
m_list.push_back(partialTranslOpt);
|
| 45 |
+
if (partialTranslOpt->GetFutureScore() > m_bestScore)
|
| 46 |
+
m_bestScore = partialTranslOpt->GetFutureScore();
|
| 47 |
+
} else {
|
| 48 |
+
m_totalPruned++;
|
| 49 |
+
delete partialTranslOpt;
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
/** add a partial translation option to the collection, prune if necessary.
|
| 54 |
+
* This is done similar to the Prune() in TranslationOptionCollection */
|
| 55 |
+
|
| 56 |
+
void PartialTranslOptColl::Add(TranslationOption *partialTranslOpt)
|
| 57 |
+
{
|
| 58 |
+
// add
|
| 59 |
+
AddNoPrune(partialTranslOpt );
|
| 60 |
+
|
| 61 |
+
// done if not too large (lazy pruning, only if twice as large as max)
|
| 62 |
+
if ( m_list.size() > 2 * m_maxSize ) {
|
| 63 |
+
Prune();
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
/** helper, used by pruning */
|
| 69 |
+
bool ComparePartialTranslationOption(const TranslationOption *a, const TranslationOption *b)
|
| 70 |
+
{
|
| 71 |
+
return a->GetFutureScore() > b->GetFutureScore();
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/** pruning, remove partial translation options, if list too big */
|
| 75 |
+
void PartialTranslOptColl::Prune()
|
| 76 |
+
{
|
| 77 |
+
// done if not too big
|
| 78 |
+
if ( m_list.size() <= m_maxSize ) {
|
| 79 |
+
return;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// TRACE_ERR( "pruning partial translation options from size " << m_list.size() << std::endl);
|
| 83 |
+
|
| 84 |
+
// find nth element
|
| 85 |
+
NTH_ELEMENT4(m_list.begin(),
|
| 86 |
+
m_list.begin() + m_maxSize,
|
| 87 |
+
m_list.end(),
|
| 88 |
+
ComparePartialTranslationOption);
|
| 89 |
+
|
| 90 |
+
m_worstScore = m_list[ m_maxSize-1 ]->GetFutureScore();
|
| 91 |
+
// delete the rest
|
| 92 |
+
for (size_t i = m_maxSize ; i < m_list.size() ; ++i) {
|
| 93 |
+
delete m_list[i];
|
| 94 |
+
m_totalPruned++;
|
| 95 |
+
}
|
| 96 |
+
m_list.resize(m_maxSize);
|
| 97 |
+
// TRACE_ERR( "pruned to size " << m_list.size() << ", total pruned: " << m_totalPruned << std::endl);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
// friend
|
| 101 |
+
ostream& operator<<(ostream& out, const PartialTranslOptColl& possibleTranslation)
|
| 102 |
+
{
|
| 103 |
+
for (size_t i = 0; i < possibleTranslation.m_list.size(); ++i) {
|
| 104 |
+
const TranslationOption &transOpt = *possibleTranslation.m_list[i];
|
| 105 |
+
out << transOpt << endl;
|
| 106 |
+
}
|
| 107 |
+
return out;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
|
mosesdecoder/moses/RuleCubeQueue.cpp
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#include "RuleCubeQueue.h"
|
| 23 |
+
|
| 24 |
+
#include "RuleCubeItem.h"
|
| 25 |
+
#include "StaticData.h"
|
| 26 |
+
#include "ChartManager.h"
|
| 27 |
+
|
| 28 |
+
namespace Moses
|
| 29 |
+
{
|
| 30 |
+
|
| 31 |
+
RuleCubeQueue::~RuleCubeQueue()
|
| 32 |
+
{
|
| 33 |
+
while (!m_queue.empty()) {
|
| 34 |
+
RuleCube *cube = m_queue.top();
|
| 35 |
+
m_queue.pop();
|
| 36 |
+
delete cube;
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
void RuleCubeQueue::Add(RuleCube *ruleCube)
|
| 41 |
+
{
|
| 42 |
+
m_queue.push(ruleCube);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
ChartHypothesis *RuleCubeQueue::Pop()
|
| 46 |
+
{
|
| 47 |
+
// pop the most promising rule cube
|
| 48 |
+
RuleCube *cube = m_queue.top();
|
| 49 |
+
m_queue.pop();
|
| 50 |
+
|
| 51 |
+
// pop the most promising item from the cube and get the corresponding
|
| 52 |
+
// hypothesis
|
| 53 |
+
RuleCubeItem *item = cube->Pop(m_manager);
|
| 54 |
+
if (m_manager.options()->cube.lazy_scoring) {
|
| 55 |
+
item->CreateHypothesis(cube->GetTranslationOption(), m_manager);
|
| 56 |
+
}
|
| 57 |
+
ChartHypothesis *hypo = item->ReleaseHypothesis();
|
| 58 |
+
|
| 59 |
+
// if the cube contains more items then push it back onto the queue
|
| 60 |
+
if (!cube->IsEmpty()) {
|
| 61 |
+
m_queue.push(cube);
|
| 62 |
+
} else {
|
| 63 |
+
delete cube;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
return hypo;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
}
|
mosesdecoder/moses/RuleCubeQueue.h
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2010 Hieu Hoang
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include "RuleCube.h"
|
| 25 |
+
|
| 26 |
+
#include <queue>
|
| 27 |
+
#include <vector>
|
| 28 |
+
|
| 29 |
+
namespace Moses
|
| 30 |
+
{
|
| 31 |
+
|
| 32 |
+
class ChartManager;
|
| 33 |
+
|
| 34 |
+
/** Define an ordering between RuleCube based on their best item scores. This
|
| 35 |
+
* is used to order items in the priority queue.
|
| 36 |
+
*/
|
| 37 |
+
class RuleCubeOrderer
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
bool operator()(const RuleCube *p, const RuleCube *q) const {
|
| 41 |
+
return p->GetTopScore() < q->GetTopScore();
|
| 42 |
+
}
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
/** @todo how is this used */
|
| 46 |
+
class RuleCubeQueue
|
| 47 |
+
{
|
| 48 |
+
public:
|
| 49 |
+
RuleCubeQueue(ChartManager &manager) : m_manager(manager) {}
|
| 50 |
+
~RuleCubeQueue();
|
| 51 |
+
|
| 52 |
+
void Add(RuleCube *);
|
| 53 |
+
ChartHypothesis *Pop();
|
| 54 |
+
bool IsEmpty() const {
|
| 55 |
+
return m_queue.empty();
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
private:
|
| 59 |
+
typedef std::priority_queue<RuleCube*, std::vector<RuleCube*>,
|
| 60 |
+
RuleCubeOrderer > Queue;
|
| 61 |
+
|
| 62 |
+
Queue m_queue;
|
| 63 |
+
ChartManager &m_manager;
|
| 64 |
+
};
|
| 65 |
+
|
| 66 |
+
}
|
mosesdecoder/moses/SquareMatrix.h
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#ifndef moses_SquareMatrix_h
|
| 23 |
+
#define moses_SquareMatrix_h
|
| 24 |
+
|
| 25 |
+
#include <iostream>
|
| 26 |
+
#include "TypeDef.h"
|
| 27 |
+
#include "Util.h"
|
| 28 |
+
#include "Bitmap.h"
|
| 29 |
+
|
| 30 |
+
namespace Moses
|
| 31 |
+
{
|
| 32 |
+
|
| 33 |
+
//! A square array of floats to store future costs in the phrase-based decoder
|
| 34 |
+
class SquareMatrix
|
| 35 |
+
{
|
| 36 |
+
friend std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix);
|
| 37 |
+
protected:
|
| 38 |
+
const size_t m_size; /**< length of the square (sentence length) */
|
| 39 |
+
float *m_array; /**< two-dimensional array to store floats */
|
| 40 |
+
|
| 41 |
+
SquareMatrix(); // not implemented
|
| 42 |
+
SquareMatrix(const SquareMatrix ©); // not implemented
|
| 43 |
+
|
| 44 |
+
public:
|
| 45 |
+
SquareMatrix(size_t size)
|
| 46 |
+
:m_size(size) {
|
| 47 |
+
m_array = (float*) malloc(sizeof(float) * size * size);
|
| 48 |
+
}
|
| 49 |
+
~SquareMatrix() {
|
| 50 |
+
free(m_array);
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
// set upper triangle
|
| 54 |
+
void InitTriangle(float val);
|
| 55 |
+
|
| 56 |
+
/** Returns length of the square: typically the sentence length */
|
| 57 |
+
inline size_t GetSize() const {
|
| 58 |
+
return m_size;
|
| 59 |
+
}
|
| 60 |
+
/** Get a future cost score for a span */
|
| 61 |
+
inline float GetScore(size_t startPos, size_t endPos) const {
|
| 62 |
+
return m_array[startPos * m_size + endPos];
|
| 63 |
+
}
|
| 64 |
+
/** Set a future cost score for a span */
|
| 65 |
+
inline void SetScore(size_t startPos, size_t endPos, float value) {
|
| 66 |
+
m_array[startPos * m_size + endPos] = value;
|
| 67 |
+
}
|
| 68 |
+
float CalcEstimatedScore( Bitmap const& ) const;
|
| 69 |
+
float CalcEstimatedScore( Bitmap const&, size_t startPos, size_t endPos ) const;
|
| 70 |
+
|
| 71 |
+
TO_STRING();
|
| 72 |
+
};
|
| 73 |
+
|
| 74 |
+
inline std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix)
|
| 75 |
+
{
|
| 76 |
+
for (size_t endPos = 0 ; endPos < matrix.GetSize() ; endPos++) {
|
| 77 |
+
for (size_t startPos = 0 ; startPos < matrix.GetSize() ; startPos++)
|
| 78 |
+
out << matrix.GetScore(startPos, endPos) << " ";
|
| 79 |
+
out << std::endl;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
return out;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
}
|
| 86 |
+
#endif
|
mosesdecoder/moses/StackVec.h
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2012 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include <vector>
|
| 23 |
+
|
| 24 |
+
namespace Moses
|
| 25 |
+
{
|
| 26 |
+
|
| 27 |
+
class ChartCellLabel;
|
| 28 |
+
|
| 29 |
+
class StackVec : public std::vector<const ChartCellLabel*>
|
| 30 |
+
{
|
| 31 |
+
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
}
|
mosesdecoder/moses/SyntacticLanguageModelFiles.h
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//
|
| 2 |
+
|
| 3 |
+
#ifndef moses_SyntacticLanguageModelFiles_h
|
| 4 |
+
#define moses_SyntacticLanguageModelFiles_h
|
| 5 |
+
|
| 6 |
+
#include "nl-iomacros.h"
|
| 7 |
+
#include "nl-string.h"
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
|
| 12 |
+
template <class MH, class MO>
|
| 13 |
+
class SyntacticLanguageModelFiles
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
public:
|
| 17 |
+
|
| 18 |
+
SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths);
|
| 19 |
+
~SyntacticLanguageModelFiles();
|
| 20 |
+
|
| 21 |
+
MH* getHiddenModel();
|
| 22 |
+
MO* getObservedModel();
|
| 23 |
+
|
| 24 |
+
private:
|
| 25 |
+
MH* hiddenModel;
|
| 26 |
+
MO* observedModel;
|
| 27 |
+
|
| 28 |
+
};
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
template <class MH, class MO>
|
| 32 |
+
SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths)
|
| 33 |
+
{
|
| 34 |
+
|
| 35 |
+
this->hiddenModel = new MH();
|
| 36 |
+
this->observedModel = new MO();
|
| 37 |
+
|
| 38 |
+
//// I. LOAD MODELS...
|
| 39 |
+
std::cerr << "Reading syntactic language model files...\n";
|
| 40 |
+
// For each model file...
|
| 41 |
+
for ( int a=0, n=filePaths.size(); a<n; a++ ) { // read models
|
| 42 |
+
FILE* pf = fopen(filePaths[a].c_str(),"r"); // Read model file
|
| 43 |
+
if(!pf) {
|
| 44 |
+
std::cerr << "Error loading model file " << filePaths[a] << std::endl;
|
| 45 |
+
return;
|
| 46 |
+
}
|
| 47 |
+
std::cerr << "Loading model \'" << filePaths[a] << "\'...\n";
|
| 48 |
+
int c=' ';
|
| 49 |
+
int i=0;
|
| 50 |
+
int line=1;
|
| 51 |
+
String sBuff(1000); // Lookahead/ctrs/buffers
|
| 52 |
+
CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Get to first record
|
| 53 |
+
while ( c!=-1 && c!='\0' && c!='\5' ) { // For each record
|
| 54 |
+
CONSUME_STR ( pf, c, (c!='\n' && c!='\0' && c!='\5'), sBuff, i, line ); // Consume line
|
| 55 |
+
StringInput si(sBuff.c_array());
|
| 56 |
+
if ( !( sBuff[0]=='#' // Accept comments/fields
|
| 57 |
+
|| si>>*(this->hiddenModel)>>"\0"!=NULL
|
| 58 |
+
|| si>>*(this->observedModel)>>"\0"!=NULL
|
| 59 |
+
))
|
| 60 |
+
std::cerr<<"\nERROR: can't parse \'"<<sBuff<<"\' in line "<<line<<"\n\n";
|
| 61 |
+
CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Consume whitespace
|
| 62 |
+
if ( line%100000==0 ) std::cerr<<" "<<line<<" lines read...\n"; // Progress for big models
|
| 63 |
+
}
|
| 64 |
+
std::cerr << "Model \'" << filePaths[a] << "\' loaded.\n";
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
std::cerr << "...reading syntactic language model files completed\n";
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
template <class MH, class MO>
|
| 74 |
+
SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles()
|
| 75 |
+
{
|
| 76 |
+
|
| 77 |
+
VERBOSE(3,"Destructing syntactic language model files" << std::endl);
|
| 78 |
+
delete hiddenModel;
|
| 79 |
+
delete observedModel;
|
| 80 |
+
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
template <class MH, class MO>
|
| 85 |
+
MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel()
|
| 86 |
+
{
|
| 87 |
+
|
| 88 |
+
return this->hiddenModel;
|
| 89 |
+
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
template <class MH, class MO>
|
| 93 |
+
MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel()
|
| 94 |
+
{
|
| 95 |
+
|
| 96 |
+
return this->observedModel;
|
| 97 |
+
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
#endif
|
mosesdecoder/moses/SyntacticLanguageModelState.h
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//
|
| 2 |
+
|
| 3 |
+
#ifndef moses_SyntacticLanguageModelState_h
|
| 4 |
+
#define moses_SyntacticLanguageModelState_h
|
| 5 |
+
|
| 6 |
+
#include "nl-iomacros.h"
|
| 7 |
+
#include "nl-cpt.h"
|
| 8 |
+
#include "nl-hmm.h"
|
| 9 |
+
|
| 10 |
+
#include "SyntacticLanguageModelFiles.h"
|
| 11 |
+
#include "moses/FF/FFState.h"
|
| 12 |
+
#include <string>
|
| 13 |
+
|
| 14 |
+
namespace Moses
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
|
| 18 |
+
class SyntacticLanguageModelState : public FFState
|
| 19 |
+
{
|
| 20 |
+
public:
|
| 21 |
+
|
| 22 |
+
// Initialize an empty LM state
|
| 23 |
+
SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize );
|
| 24 |
+
|
| 25 |
+
// Get the next LM state from an existing LM state and the next word
|
| 26 |
+
SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word );
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
~SyntacticLanguageModelState() {
|
| 30 |
+
VERBOSE(3,"Destructing SyntacticLanguageModelState" << std::endl);
|
| 31 |
+
delete randomVariableStore;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
virtual int Compare(const FFState& other) const;
|
| 35 |
+
|
| 36 |
+
// Get the LM score from this LM state
|
| 37 |
+
double getScore() const;
|
| 38 |
+
|
| 39 |
+
double getProb() const;
|
| 40 |
+
|
| 41 |
+
private:
|
| 42 |
+
|
| 43 |
+
void setScore(double score);
|
| 44 |
+
void printRV();
|
| 45 |
+
|
| 46 |
+
SafeArray1D<Id<int>,pair<YS,LogProb> >* randomVariableStore;
|
| 47 |
+
double prob;
|
| 48 |
+
double score;
|
| 49 |
+
int beamSize;
|
| 50 |
+
SyntacticLanguageModelFiles<MY,MX>* modelData;
|
| 51 |
+
bool sentenceStart;
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
////////////////////////////////////////////////////////////////////////////////
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
template <class MY, class MX, class YS, class B>
|
| 59 |
+
void SyntacticLanguageModelState<MY,MX,YS,B>::printRV()
|
| 60 |
+
{
|
| 61 |
+
|
| 62 |
+
cerr << "*********** BEGIN printRV() ******************" << endl;
|
| 63 |
+
int size=randomVariableStore->getSize();
|
| 64 |
+
cerr << "randomVariableStore->getSize() == " << size << endl;
|
| 65 |
+
|
| 66 |
+
for (int depth=0; depth<size; depth+=1) {
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
const pair<YS,LogProb> *data = &(randomVariableStore->get(depth));
|
| 70 |
+
std::cerr << "randomVariableStore[" << depth << "]\t" << data->first << "\tprob = " << data->second.toProb() << "\tlogProb = " << double(data->second.toInt())/100 << std::endl;
|
| 71 |
+
|
| 72 |
+
}
|
| 73 |
+
cerr << "*********** END printRV() ******************" << endl;
|
| 74 |
+
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// Initialize an empty LM state from grammar files
|
| 78 |
+
//
|
| 79 |
+
// nArgs is the number of model files
|
| 80 |
+
// argv is the list of model file names
|
| 81 |
+
//
|
| 82 |
+
template <class MY, class MX, class YS, class B>
|
| 83 |
+
SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize )
|
| 84 |
+
{
|
| 85 |
+
|
| 86 |
+
this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
|
| 87 |
+
this->modelData = modelData;
|
| 88 |
+
this->beamSize = beamSize;
|
| 89 |
+
|
| 90 |
+
// Initialize an empty random variable value
|
| 91 |
+
YS xBEG;
|
| 92 |
+
StringInput(String(BEG_STATE).c_array())>>xBEG>>"\0";
|
| 93 |
+
cerr<<xBEG<<"\n";
|
| 94 |
+
|
| 95 |
+
// cout << "Examining RV store just before RV init" << endl;
|
| 96 |
+
//printRV();
|
| 97 |
+
|
| 98 |
+
// Initialize the random variable store
|
| 99 |
+
this->randomVariableStore->init(1,pair<YS,LogProb>(xBEG,0));
|
| 100 |
+
|
| 101 |
+
this->sentenceStart = true;
|
| 102 |
+
|
| 103 |
+
IFVERBOSE(3) {
|
| 104 |
+
VERBOSE(3,"Examining RV store just after RV init" << endl);
|
| 105 |
+
printRV();
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// Get score of final frame in HHMM
|
| 109 |
+
LogProb l(1.0);
|
| 110 |
+
//score = l.toDouble();
|
| 111 |
+
setScore(l.toDouble());
|
| 112 |
+
// MY::F_ROOT_OBS = true;
|
| 113 |
+
// this->modelData->getHiddenModel()->setRootObs(true);
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
template <class MY, class MX, class YS, class B>
|
| 120 |
+
int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const
|
| 121 |
+
{
|
| 122 |
+
/*
|
| 123 |
+
const SyntacticLanguageModelState<MY,MX,YS,B>& o =
|
| 124 |
+
static_cast<const SyntacticLanguageModelState<MY,MX,YS,B>&>(other);
|
| 125 |
+
|
| 126 |
+
if (o.score > score) return 1;
|
| 127 |
+
else if (o.score < score) return -1;
|
| 128 |
+
else return 0;
|
| 129 |
+
*/
|
| 130 |
+
return 0;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
template <class MY, class MX, class YS, class B>
|
| 135 |
+
SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word )
|
| 136 |
+
{
|
| 137 |
+
|
| 138 |
+
// Initialize member variables
|
| 139 |
+
this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
|
| 140 |
+
this->modelData = prev->modelData;
|
| 141 |
+
this->beamSize = prev->beamSize;
|
| 142 |
+
this->randomVariableStore->init(this->beamSize);
|
| 143 |
+
this->sentenceStart=false;
|
| 144 |
+
|
| 145 |
+
YS ysEND;
|
| 146 |
+
StringInput(String(END_STATE).c_array())>>ysEND>>"\0";
|
| 147 |
+
|
| 148 |
+
// Get HHMM model files
|
| 149 |
+
MY& mH = *(modelData->getHiddenModel());
|
| 150 |
+
MX& mO = *(modelData->getObservedModel());
|
| 151 |
+
|
| 152 |
+
// Initialize HHMM
|
| 153 |
+
HMM<MY,MX,YS,B> hmm(mH,mO);
|
| 154 |
+
int MAX_WORDS = 2;
|
| 155 |
+
hmm.init(MAX_WORDS,this->beamSize,prev->randomVariableStore);
|
| 156 |
+
typename MX::RandVarType x(word.c_str());
|
| 157 |
+
// cout << "Examining HHMM just after hmm.init" << endl;
|
| 158 |
+
// hmm.debugPrint();
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
/* cerr << "*********** BEGIN writeCurr() ******************" << endl;
|
| 162 |
+
hmm.writeCurr(cout,0);
|
| 163 |
+
hmm.writeCurr(cout,1);
|
| 164 |
+
cerr << "*********** END writeCurr() ******************" << endl;
|
| 165 |
+
*/
|
| 166 |
+
/*
|
| 167 |
+
{
|
| 168 |
+
|
| 169 |
+
int wnum=1;
|
| 170 |
+
list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list
|
| 171 |
+
for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame
|
| 172 |
+
cout << "HYPOTH " << wnum
|
| 173 |
+
<< " " << i->getBackData()
|
| 174 |
+
<< " " << x
|
| 175 |
+
<< " " << i->getId()
|
| 176 |
+
<< " (" << i->getLogProb() << ")"
|
| 177 |
+
<< endl; // print RV val
|
| 178 |
+
}
|
| 179 |
+
}
|
| 180 |
+
*/
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
/*
|
| 184 |
+
cerr << "Writing hmm.writeCurr" << endl;
|
| 185 |
+
hmm.writeCurr(cerr,0);
|
| 186 |
+
hmm.writeCurr(cerr,1);
|
| 187 |
+
cerr << "...done writing hmm.writeCurr" << endl;
|
| 188 |
+
*/
|
| 189 |
+
hmm.getCurrSum();
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
// Initialize observed variable
|
| 194 |
+
// typename MX::RandVarType ov;
|
| 195 |
+
// ov.set(word.c_str(),mO);
|
| 196 |
+
// MY::WORD = ov.getW();
|
| 197 |
+
//bool endOfSentence = prev->sentenceStart;//true;
|
| 198 |
+
|
| 199 |
+
// std::cerr << "About to give HHMM a word of input:\t" << word << std::endl;
|
| 200 |
+
|
| 201 |
+
hmm.updateRanked(x, prev->sentenceStart);
|
| 202 |
+
|
| 203 |
+
// cout << "Examining HHMM just after hmm.updateRanked(" << x << "," << prev->sentenceStart << ")" << endl;
|
| 204 |
+
// hmm.debugPrint();
|
| 205 |
+
/*
|
| 206 |
+
cerr << "*********** BEGIN writeCurr() ******************" << endl;
|
| 207 |
+
hmm.writeCurr(cout,0);
|
| 208 |
+
hmm.writeCurr(cout,1);
|
| 209 |
+
cerr << "*********** END writeCurr() ******************" << endl;
|
| 210 |
+
*/
|
| 211 |
+
/*
|
| 212 |
+
{
|
| 213 |
+
|
| 214 |
+
int wnum=1;
|
| 215 |
+
list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list
|
| 216 |
+
for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame
|
| 217 |
+
cout << "HYPOTH " << wnum
|
| 218 |
+
<< " " << i->getBackData()
|
| 219 |
+
<< " " << x
|
| 220 |
+
<< " " << i->getId()
|
| 221 |
+
<< " (" << i->getLogProb() << ")"
|
| 222 |
+
<< endl; // print RV val
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
*/
|
| 226 |
+
// X ov(word.c_str());
|
| 227 |
+
//mH.setWord(ov);
|
| 228 |
+
// MY::WORD = ov;//ov.getW();
|
| 229 |
+
|
| 230 |
+
// Update HHMM based on observed variable
|
| 231 |
+
//hmm.updateRanked(ov);
|
| 232 |
+
//mH.setRootObs(true);
|
| 233 |
+
//MY::F_ROOT_OBS = false;
|
| 234 |
+
|
| 235 |
+
// Get the current score
|
| 236 |
+
double currSum = hmm.getCurrSum();
|
| 237 |
+
//VERBOSE(3,"Setting score using currSum for " << scientific << x << " = " << currSum << endl);
|
| 238 |
+
setScore(currSum);
|
| 239 |
+
// cout << "Examining RV store just before RV init via gatherElementsInBeam" << endl;
|
| 240 |
+
// printRV();
|
| 241 |
+
|
| 242 |
+
// Get new hidden random variable store from HHMM
|
| 243 |
+
hmm.gatherElementsInBeam(randomVariableStore);
|
| 244 |
+
// cout << "Examining RV store just after RV init via gatherElementsInBeam" << endl;
|
| 245 |
+
// printRV();
|
| 246 |
+
/*
|
| 247 |
+
cerr << "Writing hmm.writeCurr..." << endl;
|
| 248 |
+
hmm.writeCurr(cerr,0);
|
| 249 |
+
hmm.writeCurr(cerr,1);
|
| 250 |
+
cerr << "...done writing hmm.writeCurr" << endl;
|
| 251 |
+
*/
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
template <class MY, class MX, class YS, class B>
|
| 256 |
+
double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const
|
| 257 |
+
{
|
| 258 |
+
|
| 259 |
+
return prob;
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
template <class MY, class MX, class YS, class B>
|
| 263 |
+
double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const
|
| 264 |
+
{
|
| 265 |
+
|
| 266 |
+
return score;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
template <class MY, class MX, class YS, class B>
|
| 271 |
+
void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score)
|
| 272 |
+
{
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
this->prob = score;
|
| 278 |
+
|
| 279 |
+
// We want values to range from -100 to 0
|
| 280 |
+
//
|
| 281 |
+
// If the minimum positive value for a double is min=4.94065645841246544e-324
|
| 282 |
+
// then to scale, we want a logarithmic base such that log_b(min)=-100
|
| 283 |
+
//
|
| 284 |
+
// -100 = log(min) / log(b)
|
| 285 |
+
//
|
| 286 |
+
// log(b) = log(min) / -100
|
| 287 |
+
//
|
| 288 |
+
// b = exp( log(min) / -100 )
|
| 289 |
+
//
|
| 290 |
+
// b = 7.44440071921381
|
| 291 |
+
|
| 292 |
+
// Check for score==0 to avoid causing -infinity with log(score)
|
| 293 |
+
if (score==0) {
|
| 294 |
+
this->score = -100;
|
| 295 |
+
} else {
|
| 296 |
+
double x = log(score) / 7.44440071921381;
|
| 297 |
+
if ( x >= -100) {
|
| 298 |
+
this->score = x;
|
| 299 |
+
} else {
|
| 300 |
+
this->score = -100;
|
| 301 |
+
}
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
VERBOSE(3,"\tSyntacticLanguageModelState has score=" << this->score << endl);
|
| 305 |
+
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
#endif
|
mosesdecoder/moses/Syntax/F2S/DerivationWriter.h
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <ostream>
|
| 4 |
+
|
| 5 |
+
#include "moses/Syntax/KBestExtractor.h"
|
| 6 |
+
#include "moses/Word.h"
|
| 7 |
+
|
| 8 |
+
namespace Moses
|
| 9 |
+
{
|
| 10 |
+
namespace Syntax
|
| 11 |
+
{
|
| 12 |
+
struct SHyperedge;
|
| 13 |
+
|
| 14 |
+
namespace F2S
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
// Writes a string representation of a derivation to a std::ostream. This is
|
| 18 |
+
// used by the -translation-details / -T option.
|
| 19 |
+
// TODO Merge this with S2T::DerivationWriter.
|
| 20 |
+
class DerivationWriter
|
| 21 |
+
{
|
| 22 |
+
public:
|
| 23 |
+
// 1-best version.
|
| 24 |
+
static void Write(const SHyperedge&, std::size_t, std::ostream &);
|
| 25 |
+
|
| 26 |
+
// k-best version.
|
| 27 |
+
static void Write(const KBestExtractor::Derivation &, std::size_t,
|
| 28 |
+
std::ostream &);
|
| 29 |
+
private:
|
| 30 |
+
static void WriteLine(const SHyperedge &, std::size_t, std::ostream &);
|
| 31 |
+
static void WriteSymbol(const Word &, std::ostream &);
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
} // namespace F2S
|
| 35 |
+
} // namespace Syntax
|
| 36 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/Forest.h
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "vector"
|
| 4 |
+
|
| 5 |
+
#include "moses/Syntax/PVertex.h"
|
| 6 |
+
|
| 7 |
+
namespace Moses
|
| 8 |
+
{
|
| 9 |
+
namespace Syntax
|
| 10 |
+
{
|
| 11 |
+
namespace F2S
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
class Forest
|
| 15 |
+
{
|
| 16 |
+
public:
|
| 17 |
+
struct Vertex;
|
| 18 |
+
|
| 19 |
+
struct Hyperedge {
|
| 20 |
+
Vertex *head;
|
| 21 |
+
std::vector<Vertex *> tail;
|
| 22 |
+
float weight;
|
| 23 |
+
};
|
| 24 |
+
|
| 25 |
+
struct Vertex {
|
| 26 |
+
Vertex(const PVertex &v) : pvertex(v) {}
|
| 27 |
+
~Vertex(); // Deletes incoming hyperedges.
|
| 28 |
+
PVertex pvertex;
|
| 29 |
+
std::vector<Hyperedge *> incoming;
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
// Constructor.
|
| 33 |
+
Forest() {}
|
| 34 |
+
|
| 35 |
+
// Destructor (deletes vertices).
|
| 36 |
+
~Forest();
|
| 37 |
+
|
| 38 |
+
// Delete all vertices.
|
| 39 |
+
void Clear();
|
| 40 |
+
|
| 41 |
+
std::vector<Vertex *> vertices;
|
| 42 |
+
|
| 43 |
+
private:
|
| 44 |
+
// Copying is not allowed.
|
| 45 |
+
Forest(const Forest &);
|
| 46 |
+
Forest &operator=(const Forest &);
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
} // namespace F2S
|
| 50 |
+
} // namespace Syntax
|
| 51 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.cpp
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "GlueRuleSynthesizer.h"
|
| 2 |
+
|
| 3 |
+
#include <sstream>
|
| 4 |
+
|
| 5 |
+
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
| 6 |
+
#include "util/string_stream.hh"
|
| 7 |
+
#include "moses/parameters/AllOptions.h"
|
| 8 |
+
namespace Moses
|
| 9 |
+
{
|
| 10 |
+
namespace Syntax
|
| 11 |
+
{
|
| 12 |
+
namespace F2S
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
GlueRuleSynthesizer::
|
| 16 |
+
GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &trie)
|
| 17 |
+
: m_input_default_nonterminal(opts.syntax.input_default_non_terminal)
|
| 18 |
+
, m_output_default_nonterminal(opts.syntax.output_default_non_terminal)
|
| 19 |
+
, m_hyperTree(trie)
|
| 20 |
+
{
|
| 21 |
+
Word *lhs = NULL;
|
| 22 |
+
m_dummySourcePhrase.CreateFromString(Input, opts.input.factor_order, "hello", &lhs);
|
| 23 |
+
delete lhs;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
|
| 27 |
+
{
|
| 28 |
+
HyperPath source;
|
| 29 |
+
SynthesizeHyperPath(e, source);
|
| 30 |
+
TargetPhrase *tp = SynthesizeTargetPhrase(e);
|
| 31 |
+
TargetPhraseCollection::shared_ptr tpc
|
| 32 |
+
= GetOrCreateTargetPhraseCollection(m_hyperTree, source);
|
| 33 |
+
tpc->Add(tp);
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
|
| 37 |
+
HyperPath &path)
|
| 38 |
+
{
|
| 39 |
+
path.nodeSeqs.clear();
|
| 40 |
+
path.nodeSeqs.resize(2);
|
| 41 |
+
path.nodeSeqs[0].push_back(e.head->pvertex.symbol[0]->GetId());
|
| 42 |
+
for (std::vector<Forest::Vertex*>::const_iterator p = e.tail.begin();
|
| 43 |
+
p != e.tail.end(); ++p) {
|
| 44 |
+
const Forest::Vertex &child = **p;
|
| 45 |
+
path.nodeSeqs[1].push_back(child.pvertex.symbol[0]->GetId());
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
TargetPhrase*
|
| 50 |
+
GlueRuleSynthesizer::
|
| 51 |
+
SynthesizeTargetPhrase(const Forest::Hyperedge &e)
|
| 52 |
+
{
|
| 53 |
+
const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
|
| 54 |
+
UnknownWordPenaltyProducer::Instance();
|
| 55 |
+
|
| 56 |
+
TargetPhrase *targetPhrase = new TargetPhrase();
|
| 57 |
+
|
| 58 |
+
util::StringStream alignmentSS;
|
| 59 |
+
for (std::size_t i = 0; i < e.tail.size(); ++i) {
|
| 60 |
+
const Word &symbol = e.tail[i]->pvertex.symbol;
|
| 61 |
+
if (symbol.IsNonTerminal()) {
|
| 62 |
+
targetPhrase->AddWord(m_output_default_nonterminal);
|
| 63 |
+
} else {
|
| 64 |
+
// TODO Check this
|
| 65 |
+
Word &targetWord = targetPhrase->AddWord();
|
| 66 |
+
targetWord.CreateUnknownWord(symbol);
|
| 67 |
+
}
|
| 68 |
+
alignmentSS << i << "-" << i << " ";
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
// Assign the lowest possible score so that glue rules are only used when
|
| 72 |
+
// absolutely required.
|
| 73 |
+
float score = LOWEST_SCORE;
|
| 74 |
+
targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
|
| 75 |
+
targetPhrase->EvaluateInIsolation(m_dummySourcePhrase);
|
| 76 |
+
Word *targetLhs = new Word(m_output_default_nonterminal);
|
| 77 |
+
targetPhrase->SetTargetLHS(targetLhs);
|
| 78 |
+
targetPhrase->SetAlignmentInfo(alignmentSS.str());
|
| 79 |
+
|
| 80 |
+
return targetPhrase;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
} // F2S
|
| 84 |
+
} // Syntax
|
| 85 |
+
} // Moses
|
mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.h
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "moses/Phrase.h"
|
| 4 |
+
#include "moses/TargetPhrase.h"
|
| 5 |
+
|
| 6 |
+
#include "HyperTree.h"
|
| 7 |
+
#include "HyperTreeCreator.h"
|
| 8 |
+
#include "Forest.h"
|
| 9 |
+
|
| 10 |
+
namespace Moses
|
| 11 |
+
{
|
| 12 |
+
class AllOptions;
|
| 13 |
+
namespace Syntax
|
| 14 |
+
{
|
| 15 |
+
namespace F2S
|
| 16 |
+
{
|
| 17 |
+
|
| 18 |
+
class GlueRuleSynthesizer : public HyperTreeCreator
|
| 19 |
+
{
|
| 20 |
+
Word m_input_default_nonterminal;
|
| 21 |
+
Word m_output_default_nonterminal;
|
| 22 |
+
public:
|
| 23 |
+
GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &);
|
| 24 |
+
|
| 25 |
+
// Synthesize the minimal, monotone rule that can be applied to the given
|
| 26 |
+
// hyperedge and add it to the rule trie.
|
| 27 |
+
void SynthesizeRule(const Forest::Hyperedge &);
|
| 28 |
+
|
| 29 |
+
private:
|
| 30 |
+
void SynthesizeHyperPath(const Forest::Hyperedge &, HyperPath &);
|
| 31 |
+
|
| 32 |
+
TargetPhrase *SynthesizeTargetPhrase(const Forest::Hyperedge &);
|
| 33 |
+
|
| 34 |
+
HyperTree &m_hyperTree;
|
| 35 |
+
Phrase m_dummySourcePhrase;
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
} // F2S
|
| 39 |
+
} // Syntax
|
| 40 |
+
} // Moses
|
mosesdecoder/moses/Syntax/F2S/HyperPath.cpp
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "HyperPath.h"
|
| 2 |
+
|
| 3 |
+
#include <limits>
|
| 4 |
+
|
| 5 |
+
namespace Moses
|
| 6 |
+
{
|
| 7 |
+
namespace Syntax
|
| 8 |
+
{
|
| 9 |
+
namespace F2S
|
| 10 |
+
{
|
| 11 |
+
|
| 12 |
+
const std::size_t HyperPath::kEpsilon =
|
| 13 |
+
std::numeric_limits<std::size_t>::max()-1;
|
| 14 |
+
|
| 15 |
+
const std::size_t HyperPath::kComma =
|
| 16 |
+
std::numeric_limits<std::size_t>::max()-2;
|
| 17 |
+
|
| 18 |
+
} // namespace F2S
|
| 19 |
+
} // namespace Syntax
|
| 20 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/HyperPath.h
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <vector>
|
| 4 |
+
|
| 5 |
+
#include "moses/Factor.h"
|
| 6 |
+
|
| 7 |
+
namespace Moses
|
| 8 |
+
{
|
| 9 |
+
namespace Syntax
|
| 10 |
+
{
|
| 11 |
+
namespace F2S
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
// A HyperPath for representing the source-side tree fragment of a
|
| 15 |
+
// tree-to-string rule. See this paper:
|
| 16 |
+
//
|
| 17 |
+
// Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
|
| 18 |
+
// "Fast Translation Rule Matching for Syntax-based Statistical Machine
|
| 19 |
+
// Translation"
|
| 20 |
+
// In proceedings of EMNLP 2009
|
| 21 |
+
//
|
| 22 |
+
struct HyperPath {
|
| 23 |
+
public:
|
| 24 |
+
typedef std::vector<std::size_t> NodeSeq;
|
| 25 |
+
|
| 26 |
+
static const std::size_t kEpsilon;
|
| 27 |
+
static const std::size_t kComma;
|
| 28 |
+
|
| 29 |
+
std::vector<NodeSeq> nodeSeqs;
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
} // namespace F2S
|
| 33 |
+
} // namespace Syntax
|
| 34 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/HyperPathLoader.h
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <stack>
|
| 4 |
+
#include <vector>
|
| 5 |
+
|
| 6 |
+
#include "util/string_piece.hh"
|
| 7 |
+
|
| 8 |
+
#include "moses/FactorCollection.h"
|
| 9 |
+
#include "moses/TypeDef.h"
|
| 10 |
+
|
| 11 |
+
#include "HyperPath.h"
|
| 12 |
+
#include "TreeFragmentTokenizer.h"
|
| 13 |
+
|
| 14 |
+
namespace Moses
|
| 15 |
+
{
|
| 16 |
+
namespace Syntax
|
| 17 |
+
{
|
| 18 |
+
namespace F2S
|
| 19 |
+
{
|
| 20 |
+
|
| 21 |
+
// Parses a string representation of a tree fragment, adding the terminals
|
| 22 |
+
// and non-terminals to FactorCollection::Instance() and building a
|
| 23 |
+
// HyperPath object.
|
| 24 |
+
//
|
| 25 |
+
// This class is designed to be used during rule table loading. Since every
|
| 26 |
+
// rule has a tree fragment on the source-side, Load() may be called millions
|
| 27 |
+
// of times. The algorithm therefore sacrifices readability for speed and
|
| 28 |
+
// shoehorns everything into two passes over the input token sequence.
|
| 29 |
+
//
|
| 30 |
+
class HyperPathLoader
|
| 31 |
+
{
|
| 32 |
+
public:
|
| 33 |
+
void Load(const StringPiece &, HyperPath &);
|
| 34 |
+
|
| 35 |
+
private:
|
| 36 |
+
struct NodeTuple {
|
| 37 |
+
int index; // Preorder index of the node.
|
| 38 |
+
int parent; // Preorder index of the node's parent.
|
| 39 |
+
int depth; // Depth of the node.
|
| 40 |
+
std::size_t symbol; // Either the factor ID of a tree terminal/non-terminal
|
| 41 |
+
// or for virtual nodes, HyperPath::kEpsilon.
|
| 42 |
+
};
|
| 43 |
+
|
| 44 |
+
// Determine the height of the current tree fragment (stored in m_tokenSeq).
|
| 45 |
+
int DetermineHeight() const;
|
| 46 |
+
|
| 47 |
+
// Generate the preorder sequence of NodeTuples for the current tree fragment,
|
| 48 |
+
// including virtual nodes.
|
| 49 |
+
void GenerateNodeTupleSeq(int height);
|
| 50 |
+
|
| 51 |
+
const Factor *AddTerminalFactor(const StringPiece &s) {
|
| 52 |
+
return FactorCollection::Instance().AddFactor(s, false);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
const Factor *AddNonTerminalFactor(const StringPiece &s) {
|
| 56 |
+
return FactorCollection::Instance().AddFactor(s, true);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
std::vector<TreeFragmentToken> m_tokenSeq;
|
| 60 |
+
std::vector<NodeTuple> m_nodeTupleSeq;
|
| 61 |
+
std::stack<int> m_parentStack;
|
| 62 |
+
};
|
| 63 |
+
|
| 64 |
+
} // namespace F2S
|
| 65 |
+
} // namespace Syntax
|
| 66 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/HyperTree.cpp
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "HyperTree.h"
|
| 2 |
+
|
| 3 |
+
namespace Moses
|
| 4 |
+
{
|
| 5 |
+
namespace Syntax
|
| 6 |
+
{
|
| 7 |
+
namespace F2S
|
| 8 |
+
{
|
| 9 |
+
|
| 10 |
+
void HyperTree::Node::Prune(std::size_t tableLimit)
|
| 11 |
+
{
|
| 12 |
+
// Recusively prune child nodes.
|
| 13 |
+
for (Map::iterator p = m_map.begin(); p != m_map.end(); ++p) {
|
| 14 |
+
p->second.Prune(tableLimit);
|
| 15 |
+
}
|
| 16 |
+
// Prune TargetPhraseCollection at this node.
|
| 17 |
+
m_targetPhraseCollection->Prune(true, tableLimit);
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
void HyperTree::Node::Sort(std::size_t tableLimit)
|
| 21 |
+
{
|
| 22 |
+
// Recusively sort child nodes.
|
| 23 |
+
for (Map::iterator p = m_map.begin(); p != m_map.end(); ++p) {
|
| 24 |
+
p->second.Sort(tableLimit);
|
| 25 |
+
}
|
| 26 |
+
// Sort TargetPhraseCollection at this node.
|
| 27 |
+
m_targetPhraseCollection->Sort(true, tableLimit);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
HyperTree::Node *HyperTree::Node::GetOrCreateChild(
|
| 31 |
+
const HyperPath::NodeSeq &nodeSeq)
|
| 32 |
+
{
|
| 33 |
+
return &m_map[nodeSeq];
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
const HyperTree::Node *HyperTree::Node::GetChild(
|
| 37 |
+
const HyperPath::NodeSeq &nodeSeq) const
|
| 38 |
+
{
|
| 39 |
+
Map::const_iterator p = m_map.find(nodeSeq);
|
| 40 |
+
return (p == m_map.end()) ? NULL : &p->second;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection(
|
| 44 |
+
const HyperPath &hyperPath)
|
| 45 |
+
{
|
| 46 |
+
Node &node = GetOrCreateNode(hyperPath);
|
| 47 |
+
return node.GetTargetPhraseCollection();
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
HyperTree::Node &HyperTree::GetOrCreateNode(const HyperPath &hyperPath)
|
| 51 |
+
{
|
| 52 |
+
const std::size_t height = hyperPath.nodeSeqs.size();
|
| 53 |
+
Node *node = &m_root;
|
| 54 |
+
for (std::size_t i = 0; i < height; ++i) {
|
| 55 |
+
const HyperPath::NodeSeq &nodeSeq = hyperPath.nodeSeqs[i];
|
| 56 |
+
node = node->GetOrCreateChild(nodeSeq);
|
| 57 |
+
}
|
| 58 |
+
return *node;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
void HyperTree::SortAndPrune(std::size_t tableLimit)
|
| 62 |
+
{
|
| 63 |
+
if (tableLimit) {
|
| 64 |
+
m_root.Sort(tableLimit);
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
} // namespace F2S
|
| 69 |
+
} // namespace Syntax
|
| 70 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/HyperTree.h
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <map>
|
| 4 |
+
#include <vector>
|
| 5 |
+
|
| 6 |
+
#include <boost/unordered_map.hpp>
|
| 7 |
+
|
| 8 |
+
#include "moses/Syntax/RuleTable.h"
|
| 9 |
+
#include "moses/TargetPhraseCollection.h"
|
| 10 |
+
|
| 11 |
+
#include "HyperPath.h"
|
| 12 |
+
|
| 13 |
+
namespace Moses
|
| 14 |
+
{
|
| 15 |
+
namespace Syntax
|
| 16 |
+
{
|
| 17 |
+
namespace F2S
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
// A HyperTree for representing a tree-to-string rule table. See this paper:
|
| 21 |
+
//
|
| 22 |
+
// Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
|
| 23 |
+
// "Fast Translation Rule Matching for Syntax-based Statistical Machine
|
| 24 |
+
// Translation"
|
| 25 |
+
// In proceedings of EMNLP 2009
|
| 26 |
+
//
|
| 27 |
+
class HyperTree : public RuleTable
|
| 28 |
+
{
|
| 29 |
+
public:
|
| 30 |
+
class Node
|
| 31 |
+
{
|
| 32 |
+
public:
|
| 33 |
+
typedef boost::unordered_map<HyperPath::NodeSeq, Node> Map;
|
| 34 |
+
|
| 35 |
+
bool IsLeaf() const {
|
| 36 |
+
return m_map.empty();
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
bool HasRules() const {
|
| 40 |
+
return !m_targetPhraseCollection->IsEmpty();
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
void Prune(std::size_t tableLimit);
|
| 44 |
+
void Sort(std::size_t tableLimit);
|
| 45 |
+
|
| 46 |
+
Node *GetOrCreateChild(const HyperPath::NodeSeq &);
|
| 47 |
+
|
| 48 |
+
const Node *GetChild(const HyperPath::NodeSeq &) const;
|
| 49 |
+
|
| 50 |
+
TargetPhraseCollection::shared_ptr
|
| 51 |
+
GetTargetPhraseCollection() const {
|
| 52 |
+
return m_targetPhraseCollection;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
TargetPhraseCollection::shared_ptr
|
| 56 |
+
GetTargetPhraseCollection() {
|
| 57 |
+
return m_targetPhraseCollection;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const Map &GetMap() const {
|
| 61 |
+
return m_map;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
Node() : m_targetPhraseCollection(new TargetPhraseCollection) { }
|
| 65 |
+
|
| 66 |
+
private:
|
| 67 |
+
Map m_map;
|
| 68 |
+
TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
|
| 69 |
+
};
|
| 70 |
+
|
| 71 |
+
HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
|
| 72 |
+
|
| 73 |
+
const Node &GetRootNode() const {
|
| 74 |
+
return m_root;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
private:
|
| 78 |
+
friend class HyperTreeCreator;
|
| 79 |
+
|
| 80 |
+
TargetPhraseCollection::shared_ptr
|
| 81 |
+
GetOrCreateTargetPhraseCollection(const HyperPath &);
|
| 82 |
+
|
| 83 |
+
Node &GetOrCreateNode(const HyperPath &);
|
| 84 |
+
|
| 85 |
+
void SortAndPrune(std::size_t);
|
| 86 |
+
|
| 87 |
+
Node m_root;
|
| 88 |
+
};
|
| 89 |
+
|
| 90 |
+
} // namespace F2S
|
| 91 |
+
} // namespace Syntax
|
| 92 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.cpp
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "HyperTreeLoader.h"
|
| 2 |
+
|
| 3 |
+
#include <sys/stat.h>
|
| 4 |
+
|
| 5 |
+
#include <cmath>
|
| 6 |
+
#include <cstdlib>
|
| 7 |
+
#include <fstream>
|
| 8 |
+
#include <string>
|
| 9 |
+
#include <iterator>
|
| 10 |
+
#include <algorithm>
|
| 11 |
+
#include <iostream>
|
| 12 |
+
|
| 13 |
+
#include "moses/FactorCollection.h"
|
| 14 |
+
#include "moses/Word.h"
|
| 15 |
+
#include "moses/Util.h"
|
| 16 |
+
#include "moses/Timer.h"
|
| 17 |
+
#include "moses/InputFileStream.h"
|
| 18 |
+
#include "moses/StaticData.h"
|
| 19 |
+
#include "moses/Range.h"
|
| 20 |
+
#include "moses/ChartTranslationOptionList.h"
|
| 21 |
+
#include "moses/FactorCollection.h"
|
| 22 |
+
#include "moses/Syntax/RuleTableFF.h"
|
| 23 |
+
#include "moses/parameters/AllOptions.h"
|
| 24 |
+
#include "util/file_piece.hh"
|
| 25 |
+
#include "util/string_piece.hh"
|
| 26 |
+
#include "util/tokenize_piece.hh"
|
| 27 |
+
#include "util/double-conversion/double-conversion.h"
|
| 28 |
+
#include "util/exception.hh"
|
| 29 |
+
|
| 30 |
+
#include "HyperPath.h"
|
| 31 |
+
#include "HyperPathLoader.h"
|
| 32 |
+
#include "HyperTree.h"
|
| 33 |
+
|
| 34 |
+
namespace Moses
|
| 35 |
+
{
|
| 36 |
+
|
| 37 |
+
namespace Syntax
|
| 38 |
+
{
|
| 39 |
+
namespace F2S
|
| 40 |
+
{
|
| 41 |
+
|
| 42 |
+
bool HyperTreeLoader::Load(AllOptions const& opts,
|
| 43 |
+
const std::vector<FactorType> &input,
|
| 44 |
+
const std::vector<FactorType> &output,
|
| 45 |
+
const std::string &inFile,
|
| 46 |
+
const RuleTableFF &ff,
|
| 47 |
+
HyperTree &trie,
|
| 48 |
+
boost::unordered_set<std::size_t> &sourceTermSet)
|
| 49 |
+
{
|
| 50 |
+
PrintUserTime(std::string("Start loading HyperTree"));
|
| 51 |
+
|
| 52 |
+
sourceTermSet.clear();
|
| 53 |
+
|
| 54 |
+
std::size_t count = 0;
|
| 55 |
+
|
| 56 |
+
std::ostream *progress = NULL;
|
| 57 |
+
IFVERBOSE(1) progress = &std::cerr;
|
| 58 |
+
util::FilePiece in(inFile.c_str(), progress);
|
| 59 |
+
|
| 60 |
+
// reused variables
|
| 61 |
+
std::vector<float> scoreVector;
|
| 62 |
+
StringPiece line;
|
| 63 |
+
|
| 64 |
+
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
|
| 65 |
+
|
| 66 |
+
HyperPathLoader hyperPathLoader;
|
| 67 |
+
|
| 68 |
+
Phrase dummySourcePhrase;
|
| 69 |
+
{
|
| 70 |
+
Word *lhs = NULL;
|
| 71 |
+
dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs);
|
| 72 |
+
delete lhs;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
while(true) {
|
| 76 |
+
try {
|
| 77 |
+
line = in.ReadLine();
|
| 78 |
+
} catch (const util::EndOfFileException &e) {
|
| 79 |
+
break;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
|
| 83 |
+
StringPiece sourceString(*pipes);
|
| 84 |
+
StringPiece targetString(*++pipes);
|
| 85 |
+
StringPiece scoreString(*++pipes);
|
| 86 |
+
|
| 87 |
+
StringPiece alignString;
|
| 88 |
+
if (++pipes) {
|
| 89 |
+
StringPiece temp(*pipes);
|
| 90 |
+
alignString = temp;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
++pipes; // counts
|
| 94 |
+
|
| 95 |
+
scoreVector.clear();
|
| 96 |
+
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
|
| 97 |
+
int processed;
|
| 98 |
+
float score = converter.StringToFloat(s->data(), s->length(), &processed);
|
| 99 |
+
UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count);
|
| 100 |
+
scoreVector.push_back(FloorScore(TransformScore(score)));
|
| 101 |
+
}
|
| 102 |
+
const std::size_t numScoreComponents = ff.GetNumScoreComponents();
|
| 103 |
+
if (scoreVector.size() != numScoreComponents) {
|
| 104 |
+
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
|
| 105 |
+
<< numScoreComponents << ") of score components on line " << count);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// Source-side
|
| 109 |
+
HyperPath sourceFragment;
|
| 110 |
+
hyperPathLoader.Load(sourceString, sourceFragment);
|
| 111 |
+
ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
|
| 112 |
+
|
| 113 |
+
// Target-side
|
| 114 |
+
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
|
| 115 |
+
Word *targetLHS = NULL;
|
| 116 |
+
targetPhrase->CreateFromString(Output, output, targetString, &targetLHS);
|
| 117 |
+
targetPhrase->SetTargetLHS(targetLHS);
|
| 118 |
+
targetPhrase->SetAlignmentInfo(alignString);
|
| 119 |
+
|
| 120 |
+
if (++pipes) {
|
| 121 |
+
StringPiece sparseString(*pipes);
|
| 122 |
+
targetPhrase->SetSparseScore(&ff, sparseString);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
if (++pipes) {
|
| 126 |
+
StringPiece propertiesString(*pipes);
|
| 127 |
+
targetPhrase->SetProperties(propertiesString);
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
|
| 131 |
+
targetPhrase->EvaluateInIsolation(dummySourcePhrase,
|
| 132 |
+
ff.GetFeaturesToApply());
|
| 133 |
+
|
| 134 |
+
// Add rule to trie.
|
| 135 |
+
TargetPhraseCollection::shared_ptr phraseColl
|
| 136 |
+
= GetOrCreateTargetPhraseCollection(trie, sourceFragment);
|
| 137 |
+
phraseColl->Add(targetPhrase);
|
| 138 |
+
|
| 139 |
+
count++;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
// sort and prune each target phrase collection
|
| 143 |
+
if (ff.GetTableLimit()) {
|
| 144 |
+
SortAndPrune(trie, ff.GetTableLimit());
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
return true;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
|
| 151 |
+
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
|
| 152 |
+
{
|
| 153 |
+
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
|
| 154 |
+
p != hp.nodeSeqs.end(); ++p) {
|
| 155 |
+
for (std::vector<std::size_t>::const_iterator q = p->begin();
|
| 156 |
+
q != p->end(); ++q) {
|
| 157 |
+
const std::size_t factorId = *q;
|
| 158 |
+
if (factorId >= moses_MaxNumNonterminals &&
|
| 159 |
+
factorId != HyperPath::kComma &&
|
| 160 |
+
factorId != HyperPath::kEpsilon) {
|
| 161 |
+
sourceTerminalSet.insert(factorId);
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
} // namespace F2S
|
| 168 |
+
} // namespace Syntax
|
| 169 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.h
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <istream>
|
| 4 |
+
#include <vector>
|
| 5 |
+
|
| 6 |
+
#include <boost/unordered_set.hpp>
|
| 7 |
+
|
| 8 |
+
#include "moses/TypeDef.h"
|
| 9 |
+
#include "moses/Syntax/RuleTableFF.h"
|
| 10 |
+
|
| 11 |
+
#include "HyperPath.h"
|
| 12 |
+
#include "HyperTree.h"
|
| 13 |
+
#include "HyperTreeCreator.h"
|
| 14 |
+
|
| 15 |
+
namespace Moses
|
| 16 |
+
{
|
| 17 |
+
class AllOptions;
|
| 18 |
+
namespace Syntax
|
| 19 |
+
{
|
| 20 |
+
namespace F2S
|
| 21 |
+
{
|
| 22 |
+
|
| 23 |
+
class HyperTreeLoader : public HyperTreeCreator
|
| 24 |
+
{
|
| 25 |
+
public:
|
| 26 |
+
bool Load(AllOptions const& opts,
|
| 27 |
+
const std::vector<FactorType> &input,
|
| 28 |
+
const std::vector<FactorType> &output,
|
| 29 |
+
const std::string &inFile,
|
| 30 |
+
const RuleTableFF &,
|
| 31 |
+
HyperTree &,
|
| 32 |
+
boost::unordered_set<std::size_t> &);
|
| 33 |
+
|
| 34 |
+
private:
|
| 35 |
+
void ExtractSourceTerminalSetFromHyperPath(
|
| 36 |
+
const HyperPath &, boost::unordered_set<std::size_t> &);
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
} // namespace F2S
|
| 40 |
+
} // namespace Syntax
|
| 41 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "moses/Syntax/PHyperedge.h"
|
| 4 |
+
#include "moses/Syntax/PVertex.h"
|
| 5 |
+
#include "moses/Syntax/SHyperedgeBundle.h"
|
| 6 |
+
|
| 7 |
+
#include "PVertexToStackMap.h"
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
namespace Syntax
|
| 12 |
+
{
|
| 13 |
+
namespace F2S
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
// Given a PHyperedge object and SStackSet produces a SHyperedgeBundle object.
|
| 17 |
+
inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
|
| 18 |
+
const PVertexToStackMap &stackMap,
|
| 19 |
+
SHyperedgeBundle &bundle)
|
| 20 |
+
{
|
| 21 |
+
bundle.inputWeight = hyperedge.label.inputWeight;
|
| 22 |
+
bundle.translations = hyperedge.label.translations;
|
| 23 |
+
bundle.stacks.clear();
|
| 24 |
+
for (std::vector<PVertex*>::const_iterator p = hyperedge.tail.begin();
|
| 25 |
+
p != hyperedge.tail.end(); ++p) {
|
| 26 |
+
PVertexToStackMap::const_iterator q = stackMap.find(*p);
|
| 27 |
+
const SVertexStack &stack = q->second;
|
| 28 |
+
bundle.stacks.push_back(&stack);
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
} // F2S
|
| 33 |
+
} // Syntax
|
| 34 |
+
} // Moses
|
mosesdecoder/moses/Syntax/F2S/PVertexToStackMap.h
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <boost/unordered_map.hpp>
|
| 4 |
+
|
| 5 |
+
#include "moses/Syntax/PVertex.h"
|
| 6 |
+
#include "moses/Syntax/SVertexStack.h"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
namespace Syntax
|
| 12 |
+
{
|
| 13 |
+
namespace F2S
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
typedef boost::unordered_map<const PVertex *, SVertexStack> PVertexToStackMap;
|
| 17 |
+
|
| 18 |
+
} // namespace F2S
|
| 19 |
+
} // namespace Syntax
|
| 20 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/RuleMatcherCallback.h
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "moses/Syntax/BoundedPriorityContainer.h"
|
| 4 |
+
#include "moses/Syntax/PHyperedge.h"
|
| 5 |
+
#include "moses/Syntax/PVertex.h"
|
| 6 |
+
#include "moses/Syntax/SHyperedgeBundle.h"
|
| 7 |
+
#include "moses/Syntax/SHyperedgeBundleScorer.h"
|
| 8 |
+
|
| 9 |
+
#include "PHyperedgeToSHyperedgeBundle.h"
|
| 10 |
+
#include "PVertexToStackMap.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses
|
| 13 |
+
{
|
| 14 |
+
namespace Syntax
|
| 15 |
+
{
|
| 16 |
+
namespace F2S
|
| 17 |
+
{
|
| 18 |
+
|
| 19 |
+
class RuleMatcherCallback
|
| 20 |
+
{
|
| 21 |
+
private:
|
| 22 |
+
typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
|
| 23 |
+
|
| 24 |
+
public:
|
| 25 |
+
RuleMatcherCallback(const PVertexToStackMap &stackMap, std::size_t ruleLimit)
|
| 26 |
+
: m_stackMap(stackMap)
|
| 27 |
+
, m_container(ruleLimit) {}
|
| 28 |
+
|
| 29 |
+
void operator()(const PHyperedge &hyperedge) {
|
| 30 |
+
PHyperedgeToSHyperedgeBundle(hyperedge, m_stackMap, m_tmpBundle);
|
| 31 |
+
float score = SHyperedgeBundleScorer::Score(m_tmpBundle);
|
| 32 |
+
m_container.SwapIn(m_tmpBundle, score);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
void ClearContainer() {
|
| 36 |
+
m_container.LazyClear();
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
const Container &GetContainer() {
|
| 40 |
+
return m_container;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
private:
|
| 44 |
+
const PVertexToStackMap &m_stackMap;
|
| 45 |
+
SHyperedgeBundle m_tmpBundle;
|
| 46 |
+
BoundedPriorityContainer<SHyperedgeBundle> m_container;
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
} // F2S
|
| 50 |
+
} // Syntax
|
| 51 |
+
} // Moses
|
mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
namespace Moses
|
| 4 |
+
{
|
| 5 |
+
namespace Syntax
|
| 6 |
+
{
|
| 7 |
+
namespace F2S
|
| 8 |
+
{
|
| 9 |
+
|
| 10 |
+
template<typename Callback>
|
| 11 |
+
RuleMatcherHyperTree<Callback>::RuleMatcherHyperTree(const HyperTree &ruleTrie)
|
| 12 |
+
: m_ruleTrie(ruleTrie)
|
| 13 |
+
{
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
template<typename Callback>
|
| 17 |
+
void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
|
| 18 |
+
const Forest::Vertex &v, Callback &callback)
|
| 19 |
+
{
|
| 20 |
+
const HyperTree::Node &root = m_ruleTrie.GetRootNode();
|
| 21 |
+
HyperPath::NodeSeq nodeSeq(1, v.pvertex.symbol[0]->GetId());
|
| 22 |
+
const HyperTree::Node *child = root.GetChild(nodeSeq);
|
| 23 |
+
if (!child) {
|
| 24 |
+
return;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
m_hyperedge.head = const_cast<PVertex*>(&v.pvertex);
|
| 28 |
+
|
| 29 |
+
// Initialize the queue.
|
| 30 |
+
MatchItem item;
|
| 31 |
+
item.annotatedFNS.fns = FNS(1, &v);
|
| 32 |
+
item.trieNode = child;
|
| 33 |
+
m_queue.push(item);
|
| 34 |
+
|
| 35 |
+
while (!m_queue.empty()) {
|
| 36 |
+
MatchItem item = m_queue.front();
|
| 37 |
+
m_queue.pop();
|
| 38 |
+
if (item.trieNode->HasRules()) {
|
| 39 |
+
const FNS &fns = item.annotatedFNS.fns;
|
| 40 |
+
// Set the output hyperedge's tail.
|
| 41 |
+
m_hyperedge.tail.clear();
|
| 42 |
+
for (FNS::const_iterator p = fns.begin(); p != fns.end(); ++p) {
|
| 43 |
+
const Forest::Vertex *v = *p;
|
| 44 |
+
m_hyperedge.tail.push_back(const_cast<PVertex *>(&(v->pvertex)));
|
| 45 |
+
}
|
| 46 |
+
// Set the output hyperedge label's input weight.
|
| 47 |
+
m_hyperedge.label.inputWeight = 0.0f;
|
| 48 |
+
for (std::vector<const Forest::Hyperedge *>::const_iterator
|
| 49 |
+
p = item.annotatedFNS.fragment.begin();
|
| 50 |
+
p != item.annotatedFNS.fragment.end(); ++p) {
|
| 51 |
+
m_hyperedge.label.inputWeight += (*p)->weight;
|
| 52 |
+
}
|
| 53 |
+
// Set the output hyperedge label's translation set pointer.
|
| 54 |
+
m_hyperedge.label.translations
|
| 55 |
+
= item.trieNode->GetTargetPhraseCollection();
|
| 56 |
+
// Pass the output hyperedge to the callback.
|
| 57 |
+
callback(m_hyperedge);
|
| 58 |
+
}
|
| 59 |
+
PropagateNextLexel(item);
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
template<typename Callback>
|
| 64 |
+
void RuleMatcherHyperTree<Callback>::PropagateNextLexel(const MatchItem &item)
|
| 65 |
+
{
|
| 66 |
+
std::vector<AnnotatedFNS> tfns;
|
| 67 |
+
std::vector<AnnotatedFNS> rfns;
|
| 68 |
+
std::vector<AnnotatedFNS> rfns2;
|
| 69 |
+
|
| 70 |
+
const HyperTree::Node &trieNode = *(item.trieNode);
|
| 71 |
+
const HyperTree::Node::Map &map = trieNode.GetMap();
|
| 72 |
+
|
| 73 |
+
for (HyperTree::Node::Map::const_iterator p = map.begin();
|
| 74 |
+
p != map.end(); ++p) {
|
| 75 |
+
const HyperPath::NodeSeq &edgeLabel = p->first;
|
| 76 |
+
const HyperTree::Node &child = p->second;
|
| 77 |
+
|
| 78 |
+
const int numSubSeqs = CountCommas(edgeLabel) + 1;
|
| 79 |
+
|
| 80 |
+
std::size_t pos = 0;
|
| 81 |
+
for (int i = 0; i < numSubSeqs; ++i) {
|
| 82 |
+
const FNS &fns = item.annotatedFNS.fns;
|
| 83 |
+
tfns.clear();
|
| 84 |
+
if (edgeLabel[pos] == HyperPath::kEpsilon) {
|
| 85 |
+
AnnotatedFNS x;
|
| 86 |
+
x.fns = FNS(1, fns[i]);
|
| 87 |
+
tfns.push_back(x);
|
| 88 |
+
pos += 2;
|
| 89 |
+
} else {
|
| 90 |
+
const int subSeqLength = SubSeqLength(edgeLabel, pos);
|
| 91 |
+
const std::vector<Forest::Hyperedge*> &incoming = fns[i]->incoming;
|
| 92 |
+
for (std::vector<Forest::Hyperedge *>::const_iterator q =
|
| 93 |
+
incoming.begin(); q != incoming.end(); ++q) {
|
| 94 |
+
const Forest::Hyperedge &edge = **q;
|
| 95 |
+
if (MatchChildren(edge.tail, edgeLabel, pos, subSeqLength)) {
|
| 96 |
+
tfns.resize(tfns.size()+1);
|
| 97 |
+
tfns.back().fns.assign(edge.tail.begin(), edge.tail.end());
|
| 98 |
+
tfns.back().fragment.push_back(&edge);
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
pos += subSeqLength + 1;
|
| 102 |
+
}
|
| 103 |
+
if (tfns.empty()) {
|
| 104 |
+
rfns.clear();
|
| 105 |
+
break;
|
| 106 |
+
} else if (i == 0) {
|
| 107 |
+
rfns.swap(tfns);
|
| 108 |
+
} else {
|
| 109 |
+
CartesianProduct(rfns, tfns, rfns2);
|
| 110 |
+
rfns.swap(rfns2);
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
for (typename std::vector<AnnotatedFNS>::const_iterator q = rfns.begin();
|
| 115 |
+
q != rfns.end(); ++q) {
|
| 116 |
+
MatchItem newItem;
|
| 117 |
+
newItem.annotatedFNS.fns = q->fns;
|
| 118 |
+
newItem.annotatedFNS.fragment = item.annotatedFNS.fragment;
|
| 119 |
+
newItem.annotatedFNS.fragment.insert(newItem.annotatedFNS.fragment.end(),
|
| 120 |
+
q->fragment.begin(),
|
| 121 |
+
q->fragment.end());
|
| 122 |
+
newItem.trieNode = &child;
|
| 123 |
+
m_queue.push(newItem);
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
template<typename Callback>
|
| 129 |
+
void RuleMatcherHyperTree<Callback>::CartesianProduct(
|
| 130 |
+
const std::vector<AnnotatedFNS> &x,
|
| 131 |
+
const std::vector<AnnotatedFNS> &y,
|
| 132 |
+
std::vector<AnnotatedFNS> &z)
|
| 133 |
+
{
|
| 134 |
+
z.clear();
|
| 135 |
+
z.reserve(x.size() * y.size());
|
| 136 |
+
for (typename std::vector<AnnotatedFNS>::const_iterator p = x.begin();
|
| 137 |
+
p != x.end(); ++p) {
|
| 138 |
+
const AnnotatedFNS &a = *p;
|
| 139 |
+
for (typename std::vector<AnnotatedFNS>::const_iterator q = y.begin();
|
| 140 |
+
q != y.end(); ++q) {
|
| 141 |
+
const AnnotatedFNS &b = *q;
|
| 142 |
+
// Create a new AnnotatedFNS.
|
| 143 |
+
z.resize(z.size()+1);
|
| 144 |
+
AnnotatedFNS &c = z.back();
|
| 145 |
+
// Combine frontier node sequences from a and b.
|
| 146 |
+
c.fns.reserve(a.fns.size() + b.fns.size());
|
| 147 |
+
c.fns.assign(a.fns.begin(), a.fns.end());
|
| 148 |
+
c.fns.insert(c.fns.end(), b.fns.begin(), b.fns.end());
|
| 149 |
+
// Combine tree fragments from a and b.
|
| 150 |
+
c.fragment.reserve(a.fragment.size() + b.fragment.size());
|
| 151 |
+
c.fragment.assign(a.fragment.begin(), a.fragment.end());
|
| 152 |
+
c.fragment.insert(c.fragment.end(), b.fragment.begin(), b.fragment.end());
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
template<typename Callback>
|
| 158 |
+
bool RuleMatcherHyperTree<Callback>::MatchChildren(
|
| 159 |
+
const std::vector<Forest::Vertex *> &children,
|
| 160 |
+
const HyperPath::NodeSeq &edgeLabel,
|
| 161 |
+
std::size_t pos,
|
| 162 |
+
std::size_t subSeqSize)
|
| 163 |
+
{
|
| 164 |
+
if (children.size() != subSeqSize) {
|
| 165 |
+
return false;
|
| 166 |
+
}
|
| 167 |
+
for (size_t i = 0; i < subSeqSize; ++i) {
|
| 168 |
+
if (edgeLabel[pos+i] != children[i]->pvertex.symbol[0]->GetId()) {
|
| 169 |
+
return false;
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
return true;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
template<typename Callback>
|
| 176 |
+
int RuleMatcherHyperTree<Callback>::CountCommas(const HyperPath::NodeSeq &seq)
|
| 177 |
+
{
|
| 178 |
+
int count = 0;
|
| 179 |
+
for (std::vector<std::size_t>::const_iterator p = seq.begin();
|
| 180 |
+
p != seq.end(); ++p) {
|
| 181 |
+
if (*p == HyperPath::kComma) {
|
| 182 |
+
++count;
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
return count;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
template<typename Callback>
|
| 189 |
+
int RuleMatcherHyperTree<Callback>::SubSeqLength(const HyperPath::NodeSeq &seq,
|
| 190 |
+
int pos)
|
| 191 |
+
{
|
| 192 |
+
int length = 0;
|
| 193 |
+
HyperPath::NodeSeq::size_type curpos = pos;
|
| 194 |
+
while (curpos != seq.size() && seq[curpos] != HyperPath::kComma) {
|
| 195 |
+
++curpos;
|
| 196 |
+
++length;
|
| 197 |
+
}
|
| 198 |
+
return length;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
} // namespace F2S
|
| 202 |
+
} // namespace Syntax
|
| 203 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree.h
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "moses/Syntax/PHyperedge.h"
|
| 4 |
+
|
| 5 |
+
#include "Forest.h"
|
| 6 |
+
#include "HyperTree.h"
|
| 7 |
+
#include "RuleMatcher.h"
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
namespace Syntax
|
| 12 |
+
{
|
| 13 |
+
namespace F2S
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
// Rule matcher based on the algorithm from this paper:
|
| 17 |
+
//
|
| 18 |
+
// Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
|
| 19 |
+
// "Fast Translation Rule Matching for Syntax-based Statistical Machine
|
| 20 |
+
// Translation"
|
| 21 |
+
// In proceedings of EMNLP 2009
|
| 22 |
+
//
|
| 23 |
+
template<typename Callback>
|
| 24 |
+
class RuleMatcherHyperTree : public RuleMatcher<Callback>
|
| 25 |
+
{
|
| 26 |
+
public:
|
| 27 |
+
RuleMatcherHyperTree(const HyperTree &);
|
| 28 |
+
|
| 29 |
+
~RuleMatcherHyperTree() {}
|
| 30 |
+
|
| 31 |
+
void EnumerateHyperedges(const Forest::Vertex &, Callback &);
|
| 32 |
+
|
| 33 |
+
private:
|
| 34 |
+
// Frontier node sequence.
|
| 35 |
+
typedef std::vector<const Forest::Vertex *> FNS;
|
| 36 |
+
|
| 37 |
+
// An AnnotatedFNS is a FNS annotated with the set of forest hyperedges that
|
| 38 |
+
// constitute the tree fragment from which it was derived.
|
| 39 |
+
struct AnnotatedFNS {
|
| 40 |
+
FNS fns;
|
| 41 |
+
std::vector<const Forest::Hyperedge *> fragment;
|
| 42 |
+
};
|
| 43 |
+
|
| 44 |
+
// A MatchItem is like the FP structure in Zhang et al. (2009), but it also
|
| 45 |
+
// records the set of forest hyperedges that constitute the matched tree
|
| 46 |
+
// fragment.
|
| 47 |
+
struct MatchItem {
|
| 48 |
+
AnnotatedFNS annotatedFNS;
|
| 49 |
+
const HyperTree::Node *trieNode;
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
// Implements the Cartsian product operation from line 16 of Algorithm 4
|
| 53 |
+
// (Zhang et al., 2009), which in this implementation also involves
|
| 54 |
+
// combining the fragment information associated with the FNS objects.
|
| 55 |
+
void CartesianProduct(const std::vector<AnnotatedFNS> &,
|
| 56 |
+
const std::vector<AnnotatedFNS> &,
|
| 57 |
+
std::vector<AnnotatedFNS> &);
|
| 58 |
+
|
| 59 |
+
int CountCommas(const HyperPath::NodeSeq &);
|
| 60 |
+
|
| 61 |
+
bool MatchChildren(const std::vector<Forest::Vertex *> &,
|
| 62 |
+
const HyperPath::NodeSeq &, std::size_t, std::size_t);
|
| 63 |
+
|
| 64 |
+
void PropagateNextLexel(const MatchItem &);
|
| 65 |
+
|
| 66 |
+
int SubSeqLength(const HyperPath::NodeSeq &, int);
|
| 67 |
+
|
| 68 |
+
const HyperTree &m_ruleTrie;
|
| 69 |
+
PHyperedge m_hyperedge;
|
| 70 |
+
std::queue<MatchItem> m_queue; // Called "SFP" in Zhang et al. (2009)
|
| 71 |
+
};
|
| 72 |
+
|
| 73 |
+
} // namespace F2S
|
| 74 |
+
} // namespace Syntax
|
| 75 |
+
} // namespace Moses
|
| 76 |
+
|
| 77 |
+
// Implementation
|
| 78 |
+
#include "RuleMatcherHyperTree-inl.h"
|
mosesdecoder/moses/Syntax/F2S/TopologicalSorter.cpp
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "TopologicalSorter.h"
|
| 2 |
+
|
| 3 |
+
namespace Moses
|
| 4 |
+
{
|
| 5 |
+
namespace Syntax
|
| 6 |
+
{
|
| 7 |
+
namespace F2S
|
| 8 |
+
{
|
| 9 |
+
|
| 10 |
+
void TopologicalSorter::Sort(const Forest &forest,
|
| 11 |
+
std::vector<const Forest::Vertex *> &permutation)
|
| 12 |
+
{
|
| 13 |
+
permutation.clear();
|
| 14 |
+
BuildPredSets(forest);
|
| 15 |
+
m_visited.clear();
|
| 16 |
+
for (std::vector<Forest::Vertex *>::const_iterator
|
| 17 |
+
p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
|
| 18 |
+
if (m_visited.find(*p) == m_visited.end()) {
|
| 19 |
+
Visit(**p, permutation);
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
void TopologicalSorter::BuildPredSets(const Forest &forest)
|
| 25 |
+
{
|
| 26 |
+
m_predSets.clear();
|
| 27 |
+
for (std::vector<Forest::Vertex *>::const_iterator
|
| 28 |
+
p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
|
| 29 |
+
const Forest::Vertex *head = *p;
|
| 30 |
+
for (std::vector<Forest::Hyperedge *>::const_iterator
|
| 31 |
+
q = head->incoming.begin(); q != head->incoming.end(); ++q) {
|
| 32 |
+
for (std::vector<Forest::Vertex *>::const_iterator
|
| 33 |
+
r = (*q)->tail.begin(); r != (*q)->tail.end(); ++r) {
|
| 34 |
+
m_predSets[head].insert(*r);
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
void TopologicalSorter::Visit(const Forest::Vertex &v,
|
| 41 |
+
std::vector<const Forest::Vertex *> &permutation)
|
| 42 |
+
{
|
| 43 |
+
m_visited.insert(&v);
|
| 44 |
+
const VertexSet &predSet = m_predSets[&v];
|
| 45 |
+
for (VertexSet::const_iterator p = predSet.begin(); p != predSet.end(); ++p) {
|
| 46 |
+
if (m_visited.find(*p) == m_visited.end()) {
|
| 47 |
+
Visit(**p, permutation);
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
permutation.push_back(&v);
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
} // namespace F2S
|
| 54 |
+
} // namespace Syntax
|
| 55 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/TopologicalSorter.h
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <vector>
|
| 4 |
+
|
| 5 |
+
#include <boost/unordered_map.hpp>
|
| 6 |
+
#include <boost/unordered_set.hpp>
|
| 7 |
+
|
| 8 |
+
#include "Forest.h"
|
| 9 |
+
|
| 10 |
+
namespace Moses
|
| 11 |
+
{
|
| 12 |
+
namespace Syntax
|
| 13 |
+
{
|
| 14 |
+
namespace F2S
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
class TopologicalSorter
|
| 18 |
+
{
|
| 19 |
+
public:
|
| 20 |
+
void Sort(const Forest &, std::vector<const Forest::Vertex *> &);
|
| 21 |
+
|
| 22 |
+
private:
|
| 23 |
+
typedef boost::unordered_set<const Forest::Vertex *> VertexSet;
|
| 24 |
+
|
| 25 |
+
void BuildPredSets(const Forest &);
|
| 26 |
+
void Visit(const Forest::Vertex &, std::vector<const Forest::Vertex *> &);
|
| 27 |
+
|
| 28 |
+
boost::unordered_set<const Forest::Vertex *> m_visited;
|
| 29 |
+
boost::unordered_map<const Forest::Vertex *, VertexSet> m_predSets;
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
} // namespace F2S
|
| 33 |
+
} // namespace Syntax
|
| 34 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/F2S/TreeFragmentTokenizer.cpp
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "TreeFragmentTokenizer.h"
|
| 2 |
+
|
| 3 |
+
#include <cctype>
|
| 4 |
+
|
| 5 |
+
namespace Moses
|
| 6 |
+
{
|
| 7 |
+
namespace Syntax
|
| 8 |
+
{
|
| 9 |
+
namespace F2S
|
| 10 |
+
{
|
| 11 |
+
|
| 12 |
+
TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t,
|
| 13 |
+
StringPiece v, std::size_t p)
|
| 14 |
+
: type(t)
|
| 15 |
+
, value(v)
|
| 16 |
+
, pos(p)
|
| 17 |
+
{
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
TreeFragmentTokenizer::TreeFragmentTokenizer()
|
| 21 |
+
: value_(TreeFragmentToken_EOS, "", -1)
|
| 22 |
+
{
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
TreeFragmentTokenizer::TreeFragmentTokenizer(const StringPiece &s)
|
| 26 |
+
: str_(s)
|
| 27 |
+
, value_(TreeFragmentToken_EOS, "", -1)
|
| 28 |
+
, iter_(s.begin())
|
| 29 |
+
, end_(s.end())
|
| 30 |
+
, pos_(0)
|
| 31 |
+
{
|
| 32 |
+
++(*this);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
TreeFragmentTokenizer &TreeFragmentTokenizer::operator++()
|
| 36 |
+
{
|
| 37 |
+
while (iter_ != end_ && (*iter_ == ' ' || *iter_ == '\t')) {
|
| 38 |
+
++iter_;
|
| 39 |
+
++pos_;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
if (iter_ == end_) {
|
| 43 |
+
value_ = TreeFragmentToken(TreeFragmentToken_EOS, "", pos_);
|
| 44 |
+
return *this;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
if (*iter_ == '[') {
|
| 48 |
+
value_ = TreeFragmentToken(TreeFragmentToken_LSB, "[", pos_);
|
| 49 |
+
++iter_;
|
| 50 |
+
++pos_;
|
| 51 |
+
} else if (*iter_ == ']') {
|
| 52 |
+
value_ = TreeFragmentToken(TreeFragmentToken_RSB, "]", pos_);
|
| 53 |
+
++iter_;
|
| 54 |
+
++pos_;
|
| 55 |
+
} else {
|
| 56 |
+
std::size_t start = pos_;
|
| 57 |
+
while (true) {
|
| 58 |
+
++iter_;
|
| 59 |
+
++pos_;
|
| 60 |
+
if (iter_ == end_ || *iter_ == ' ' || *iter_ == '\t') {
|
| 61 |
+
break;
|
| 62 |
+
}
|
| 63 |
+
if (*iter_ == '[' || *iter_ == ']') {
|
| 64 |
+
break;
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
StringPiece word = str_.substr(start, pos_-start);
|
| 68 |
+
value_ = TreeFragmentToken(TreeFragmentToken_WORD, word, start);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
return *this;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
TreeFragmentTokenizer TreeFragmentTokenizer::operator++(int)
|
| 75 |
+
{
|
| 76 |
+
TreeFragmentTokenizer tmp(*this);
|
| 77 |
+
++*this;
|
| 78 |
+
return tmp;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
bool operator==(const TreeFragmentTokenizer &lhs,
|
| 82 |
+
const TreeFragmentTokenizer &rhs)
|
| 83 |
+
{
|
| 84 |
+
if (lhs.value_.type == TreeFragmentToken_EOS ||
|
| 85 |
+
rhs.value_.type == TreeFragmentToken_EOS) {
|
| 86 |
+
return lhs.value_.type == TreeFragmentToken_EOS &&
|
| 87 |
+
rhs.value_.type == TreeFragmentToken_EOS;
|
| 88 |
+
}
|
| 89 |
+
return lhs.iter_ == rhs.iter_;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
bool operator!=(const TreeFragmentTokenizer &lhs,
|
| 93 |
+
const TreeFragmentTokenizer &rhs)
|
| 94 |
+
{
|
| 95 |
+
return !(lhs == rhs);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
} // namespace F2S
|
| 99 |
+
} // namespace Syntax
|
| 100 |
+
} // namespace Moses
|
mosesdecoder/moses/Syntax/Manager.h
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <boost/unordered_set.hpp>
|
| 4 |
+
#include "moses/InputType.h"
|
| 5 |
+
#include "moses/BaseManager.h"
|
| 6 |
+
|
| 7 |
+
#include "KBestExtractor.h"
|
| 8 |
+
|
| 9 |
+
namespace Moses
|
| 10 |
+
{
|
| 11 |
+
namespace Syntax
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
// Common base class for Moses::Syntax managers.
|
| 15 |
+
class Manager : public BaseManager
|
| 16 |
+
{
|
| 17 |
+
public:
|
| 18 |
+
Manager(ttasksptr const& ttask);
|
| 19 |
+
|
| 20 |
+
// Virtual functions from Moses::BaseManager that are implemented the same
|
| 21 |
+
// way for all Syntax managers.
|
| 22 |
+
void OutputBest(OutputCollector *collector) const;
|
| 23 |
+
void OutputNBest(OutputCollector *collector) const;
|
| 24 |
+
void OutputUnknowns(OutputCollector *collector) const;
|
| 25 |
+
|
| 26 |
+
// Virtual functions from Moses::BaseManager that are no-ops for all Syntax
|
| 27 |
+
// managers.
|
| 28 |
+
void OutputAlignment(OutputCollector *collector) const {}
|
| 29 |
+
void OutputDetailedTreeFragmentsTranslationReport(
|
| 30 |
+
OutputCollector *collector) const {}
|
| 31 |
+
void OutputLatticeSamples(OutputCollector *collector) const {}
|
| 32 |
+
void OutputSearchGraph(OutputCollector *collector) const {}
|
| 33 |
+
// void OutputSearchGraphHypergraph() const {}
|
| 34 |
+
|
| 35 |
+
void
|
| 36 |
+
OutputSearchGraphAsHypergraph
|
| 37 |
+
( std::string const& fname, size_t const precision ) const
|
| 38 |
+
{ }
|
| 39 |
+
|
| 40 |
+
void OutputSearchGraphSLF() const {}
|
| 41 |
+
void OutputWordGraph(OutputCollector *collector) const {}
|
| 42 |
+
void OutputDetailedTranslationReport(OutputCollector *collector) const {}
|
| 43 |
+
|
| 44 |
+
void CalcDecoderStatistics() const {}
|
| 45 |
+
|
| 46 |
+
// Syntax-specific virtual functions that derived classes must implement.
|
| 47 |
+
virtual void ExtractKBest(
|
| 48 |
+
std::size_t k,
|
| 49 |
+
std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
|
| 50 |
+
bool onlyDistinct=false) const = 0;
|
| 51 |
+
virtual const SHyperedge *GetBestSHyperedge() const = 0;
|
| 52 |
+
|
| 53 |
+
protected:
|
| 54 |
+
boost::unordered_set<Word> m_oovs;
|
| 55 |
+
|
| 56 |
+
private:
|
| 57 |
+
// Syntax-specific helper functions used to implement OutputNBest.
|
| 58 |
+
void OutputNBestList(OutputCollector *collector,
|
| 59 |
+
const KBestExtractor::KBestVec &nBestList,
|
| 60 |
+
long translationId) const;
|
| 61 |
+
|
| 62 |
+
std::size_t OutputAlignmentNBest(Alignments &retAlign,
|
| 63 |
+
const KBestExtractor::Derivation &d,
|
| 64 |
+
std::size_t startTarget) const;
|
| 65 |
+
|
| 66 |
+
std::size_t CalcSourceSize(const KBestExtractor::Derivation &d) const;
|
| 67 |
+
};
|
| 68 |
+
|
| 69 |
+
} // Syntax
|
| 70 |
+
} // Moses
|
mosesdecoder/moses/Syntax/PVertex.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "moses/Word.h"
|
| 4 |
+
#include "moses/Range.h"
|
| 5 |
+
|
| 6 |
+
namespace Moses
|
| 7 |
+
{
|
| 8 |
+
namespace Syntax
|
| 9 |
+
{
|
| 10 |
+
|
| 11 |
+
struct PVertex {
|
| 12 |
+
public:
|
| 13 |
+
PVertex(const Range &wr, const Word &w) : span(wr), symbol(w) {}
|
| 14 |
+
|
| 15 |
+
Range span;
|
| 16 |
+
Word symbol;
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
inline bool operator==(const PVertex &v, const PVertex &w)
|
| 20 |
+
{
|
| 21 |
+
return v.span == w.span && v.symbol == w.symbol;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
} // Syntax
|
| 25 |
+
} // Moses
|
mosesdecoder/moses/Syntax/RuleTable.h
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
namespace Moses
|
| 4 |
+
{
|
| 5 |
+
namespace Syntax
|
| 6 |
+
{
|
| 7 |
+
|
| 8 |
+
class RuleTableFF;
|
| 9 |
+
|
| 10 |
+
// Base class for any data structure representing a synchronous
|
| 11 |
+
// grammar, like a trie (for S2T) or a DFA (for T2S).
|
| 12 |
+
class RuleTable
|
| 13 |
+
{
|
| 14 |
+
public:
|
| 15 |
+
RuleTable(const RuleTableFF *ff) : m_ff(ff) {}
|
| 16 |
+
|
| 17 |
+
virtual ~RuleTable() {}
|
| 18 |
+
|
| 19 |
+
protected:
|
| 20 |
+
const RuleTableFF *m_ff;
|
| 21 |
+
};
|
| 22 |
+
|
| 23 |
+
} // Syntax
|
| 24 |
+
} // Moses
|