Upload 440 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- mosesdecoder/moses2/AlignmentInfo.cpp +176 -0
- mosesdecoder/moses2/AlignmentInfo.h +148 -0
- mosesdecoder/moses2/AlignmentInfoCollection.cpp +62 -0
- mosesdecoder/moses2/AlignmentInfoCollection.h +81 -0
- mosesdecoder/moses2/ArcLists.cpp +127 -0
- mosesdecoder/moses2/ArcLists.h +43 -0
- mosesdecoder/moses2/Array.h +83 -0
- mosesdecoder/moses2/DLLEntryApi.cpp +74 -0
- mosesdecoder/moses2/EstimatedScores.cpp +117 -0
- mosesdecoder/moses2/EstimatedScores.h +59 -0
- mosesdecoder/moses2/FF/Distortion.cpp +182 -0
- mosesdecoder/moses2/FF/Distortion.h +59 -0
- mosesdecoder/moses2/FF/ExampleStatefulFF.cpp +96 -0
- mosesdecoder/moses2/FF/ExampleStatefulFF.h +46 -0
- mosesdecoder/moses2/FF/ExampleStatelessFF.cpp +40 -0
- mosesdecoder/moses2/FF/ExampleStatelessFF.h +34 -0
- mosesdecoder/moses2/FF/FFState.cpp +1 -0
- mosesdecoder/moses2/FF/FFState.h +50 -0
- mosesdecoder/moses2/FF/FeatureFunction.cpp +82 -0
- mosesdecoder/moses2/FF/FeatureFunction.h +118 -0
- mosesdecoder/moses2/FF/FeatureFunctions.cpp +291 -0
- mosesdecoder/moses2/FF/FeatureFunctions.h +113 -0
- mosesdecoder/moses2/FF/FeatureRegistry.cpp +128 -0
- mosesdecoder/moses2/FF/FeatureRegistry.h +52 -0
- mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp +79 -0
- mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.h +40 -0
- mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp +71 -0
- mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.h +37 -0
- mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.cpp +87 -0
- mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.h +41 -0
- mosesdecoder/moses2/FF/LexicalReordering/LRModel.cpp +196 -0
- mosesdecoder/moses2/FF/LexicalReordering/LRModel.h +99 -0
- mosesdecoder/moses2/FF/LexicalReordering/LRState.cpp +93 -0
- mosesdecoder/moses2/FF/LexicalReordering/LRState.h +48 -0
- mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.cpp +226 -0
- mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.h +115 -0
- mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp +84 -0
- mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h +44 -0
- mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.cpp +99 -0
- mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.h +41 -0
- mosesdecoder/moses2/FF/OSM/KenOSM.cpp +33 -0
- mosesdecoder/moses2/FF/OSM/KenOSM.h +53 -0
- mosesdecoder/moses2/FF/OSM/OpSequenceModel.cpp +248 -0
- mosesdecoder/moses2/FF/OSM/OpSequenceModel.h +57 -0
- mosesdecoder/moses2/FF/OSM/osmHyp.cpp +601 -0
- mosesdecoder/moses2/FF/OSM/osmHyp.h +112 -0
- mosesdecoder/moses2/FF/PhrasePenalty.cpp +40 -0
- mosesdecoder/moses2/FF/PhrasePenalty.h +34 -0
- mosesdecoder/moses2/FF/PointerState.cpp +6 -0
.gitattributes
CHANGED
|
@@ -98,3 +98,6 @@ mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryPhraseTable
|
|
| 98 |
mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
|
| 99 |
mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
|
| 100 |
mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-9/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
|
| 99 |
mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
|
| 100 |
mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-9/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
mosesdecoder/moses2/bin/gcc-9/release/link-static/threading-multi/libmoses2_lib.a filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
mosesdecoder/moses2/bin/gcc-9/release/link-static/threading-multi/libmoses2decoder.a filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
mosesdecoder/moses2/bin/gcc-9/release/link-static/threading-multi/moses2 filter=lfs diff=lfs merge=lfs -text
|
mosesdecoder/moses2/AlignmentInfo.cpp
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
#include <algorithm>
|
| 20 |
+
#include <set>
|
| 21 |
+
#include <sstream>
|
| 22 |
+
#include "AlignmentInfo.h"
|
| 23 |
+
#include "legacy/Util2.h"
|
| 24 |
+
#include "util/exception.hh"
|
| 25 |
+
|
| 26 |
+
namespace Moses2
|
| 27 |
+
{
|
| 28 |
+
|
| 29 |
+
AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
|
| 30 |
+
: m_collection(pairs)
|
| 31 |
+
{
|
| 32 |
+
BuildNonTermIndexMaps();
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
|
| 36 |
+
{
|
| 37 |
+
assert(aln.size()%2==0);
|
| 38 |
+
for (size_t i = 0; i < aln.size(); i+= 2)
|
| 39 |
+
m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
|
| 40 |
+
BuildNonTermIndexMaps();
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
AlignmentInfo::AlignmentInfo(const std::string &str)
|
| 44 |
+
{
|
| 45 |
+
std::vector<std::string> points = Tokenize(str, " ");
|
| 46 |
+
std::vector<std::string>::const_iterator iter;
|
| 47 |
+
for (iter = points.begin(); iter != points.end(); iter++) {
|
| 48 |
+
std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
|
| 49 |
+
UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
|
| 50 |
+
Add(point[0], point[1]);
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
void AlignmentInfo::BuildNonTermIndexMaps()
|
| 55 |
+
{
|
| 56 |
+
if (m_collection.empty()) {
|
| 57 |
+
return;
|
| 58 |
+
}
|
| 59 |
+
const_iterator p = begin();
|
| 60 |
+
size_t maxIndex = p->second;
|
| 61 |
+
for (++p; p != end(); ++p) {
|
| 62 |
+
if (p->second > maxIndex) {
|
| 63 |
+
maxIndex = p->second;
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
|
| 67 |
+
m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
|
| 68 |
+
size_t i = 0;
|
| 69 |
+
for (p = begin(); p != end(); ++p) {
|
| 70 |
+
if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
|
| 71 |
+
// 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
|
| 72 |
+
m_nonTermIndexMap.clear();
|
| 73 |
+
m_nonTermIndexMap2.clear();
|
| 74 |
+
return;
|
| 75 |
+
}
|
| 76 |
+
m_nonTermIndexMap[p->second] = i++;
|
| 77 |
+
m_nonTermIndexMap2[p->second] = p->first;
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
|
| 82 |
+
{
|
| 83 |
+
std::set<size_t> ret;
|
| 84 |
+
CollType::const_iterator iter;
|
| 85 |
+
for (iter = begin(); iter != end(); ++iter) {
|
| 86 |
+
// const std::pair<size_t,size_t> &align = *iter;
|
| 87 |
+
if (iter->first == sourcePos) {
|
| 88 |
+
ret.insert(iter->second);
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
return ret;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
|
| 95 |
+
{
|
| 96 |
+
std::set<size_t> ret;
|
| 97 |
+
CollType::const_iterator iter;
|
| 98 |
+
for (iter = begin(); iter != end(); ++iter) {
|
| 99 |
+
// const std::pair<size_t,size_t> &align = *iter;
|
| 100 |
+
if (iter->second == targetPos) {
|
| 101 |
+
ret.insert(iter->first);
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
return ret;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
bool
|
| 109 |
+
compare_target(std::pair<size_t,size_t> const* a,
|
| 110 |
+
std::pair<size_t,size_t> const* b)
|
| 111 |
+
{
|
| 112 |
+
if(a->second < b->second) return true;
|
| 113 |
+
if(a->second == b->second) return (a->first < b->first);
|
| 114 |
+
return false;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
std::vector< const std::pair<size_t,size_t>* >
|
| 119 |
+
AlignmentInfo::
|
| 120 |
+
GetSortedAlignments(WordAlignmentSort SortOrder) const
|
| 121 |
+
{
|
| 122 |
+
std::vector< const std::pair<size_t,size_t>* > ret;
|
| 123 |
+
|
| 124 |
+
CollType::const_iterator iter;
|
| 125 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 126 |
+
const std::pair<size_t,size_t> &alignPair = *iter;
|
| 127 |
+
ret.push_back(&alignPair);
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
switch (SortOrder) {
|
| 131 |
+
case NoSort:
|
| 132 |
+
break;
|
| 133 |
+
|
| 134 |
+
case TargetOrder:
|
| 135 |
+
std::sort(ret.begin(), ret.end(), compare_target);
|
| 136 |
+
break;
|
| 137 |
+
|
| 138 |
+
default:
|
| 139 |
+
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
|
| 140 |
+
<< SortOrder);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
return ret;
|
| 144 |
+
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
|
| 148 |
+
{
|
| 149 |
+
std::set<size_t> sourcePoses;
|
| 150 |
+
|
| 151 |
+
CollType::const_iterator iter;
|
| 152 |
+
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
| 153 |
+
size_t sourcePos = iter->first;
|
| 154 |
+
sourcePoses.insert(sourcePos);
|
| 155 |
+
}
|
| 156 |
+
std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
|
| 157 |
+
return ret;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
std::string AlignmentInfo::Debug(const System &system) const
|
| 161 |
+
{
|
| 162 |
+
std::stringstream out;
|
| 163 |
+
out << *this;
|
| 164 |
+
return out.str();
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj)
|
| 168 |
+
{
|
| 169 |
+
AlignmentInfo::const_iterator iter;
|
| 170 |
+
for (iter = obj.begin(); iter != obj.end(); ++iter) {
|
| 171 |
+
out << iter->first << "-" << iter->second << " ";
|
| 172 |
+
}
|
| 173 |
+
return out;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
}
|
mosesdecoder/moses2/AlignmentInfo.h
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include <iostream>
|
| 23 |
+
#include <ostream>
|
| 24 |
+
#include <set>
|
| 25 |
+
#include <vector>
|
| 26 |
+
#include <cstdlib>
|
| 27 |
+
|
| 28 |
+
#include <boost/functional/hash.hpp>
|
| 29 |
+
#include "TypeDef.h"
|
| 30 |
+
|
| 31 |
+
namespace Moses2
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
class AlignmentInfoCollection;
|
| 35 |
+
class System;
|
| 36 |
+
|
| 37 |
+
/** Collection of non-terminal alignment pairs, ordered by source index.
|
| 38 |
+
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
|
| 39 |
+
*/
|
| 40 |
+
class AlignmentInfo
|
| 41 |
+
{
|
| 42 |
+
friend struct AlignmentInfoOrderer;
|
| 43 |
+
friend struct AlignmentInfoHasher;
|
| 44 |
+
friend class AlignmentInfoCollection;
|
| 45 |
+
friend class VW;
|
| 46 |
+
|
| 47 |
+
friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
|
| 48 |
+
|
| 49 |
+
public:
|
| 50 |
+
typedef std::set<std::pair<size_t,size_t> > CollType;
|
| 51 |
+
typedef std::vector<size_t> NonTermIndexMap;
|
| 52 |
+
typedef CollType::const_iterator const_iterator;
|
| 53 |
+
|
| 54 |
+
const_iterator begin() const {
|
| 55 |
+
return m_collection.begin();
|
| 56 |
+
}
|
| 57 |
+
const_iterator end() const {
|
| 58 |
+
return m_collection.end();
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
void Add(size_t sourcePos, size_t targetPos) {
|
| 62 |
+
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
|
| 63 |
+
}
|
| 64 |
+
/** Provides a map from target-side to source-side non-terminal indices.
|
| 65 |
+
* The target-side index should be the rule symbol index (COUNTING terminals).
|
| 66 |
+
* The index returned is the rule non-terminal index (IGNORING terminals).
|
| 67 |
+
*/
|
| 68 |
+
const NonTermIndexMap &GetNonTermIndexMap() const {
|
| 69 |
+
return m_nonTermIndexMap;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
|
| 73 |
+
* the index counting both terminals and non-terminals) */
|
| 74 |
+
const NonTermIndexMap &GetNonTermIndexMap2() const {
|
| 75 |
+
return m_nonTermIndexMap2;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
const CollType &GetAlignments() const {
|
| 79 |
+
return m_collection;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
|
| 83 |
+
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
|
| 84 |
+
|
| 85 |
+
size_t GetSize() const {
|
| 86 |
+
return m_collection.size();
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
std::vector< const std::pair<size_t,size_t>* >
|
| 90 |
+
GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
|
| 91 |
+
|
| 92 |
+
std::vector<size_t> GetSourceIndex2PosMap() const;
|
| 93 |
+
|
| 94 |
+
bool operator==(const AlignmentInfo& rhs) const {
|
| 95 |
+
return m_collection == rhs.m_collection &&
|
| 96 |
+
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
std::string Debug(const System &system) const;
|
| 100 |
+
|
| 101 |
+
private:
|
| 102 |
+
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
|
| 103 |
+
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
|
| 104 |
+
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
|
| 105 |
+
|
| 106 |
+
// used only by VW to load word alignment between sentences
|
| 107 |
+
explicit AlignmentInfo(const std::string &str);
|
| 108 |
+
|
| 109 |
+
void BuildNonTermIndexMaps();
|
| 110 |
+
|
| 111 |
+
CollType m_collection;
|
| 112 |
+
NonTermIndexMap m_nonTermIndexMap;
|
| 113 |
+
NonTermIndexMap m_nonTermIndexMap2;
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
|
| 117 |
+
* for use by AlignmentInfoCollection.
|
| 118 |
+
*/
|
| 119 |
+
struct AlignmentInfoOrderer {
|
| 120 |
+
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
|
| 121 |
+
if (a.m_collection == b.m_collection) {
|
| 122 |
+
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
|
| 123 |
+
} else {
|
| 124 |
+
return a.m_collection < b.m_collection;
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
};
|
| 128 |
+
|
| 129 |
+
/**
|
| 130 |
+
* Hashing functoid
|
| 131 |
+
**/
|
| 132 |
+
struct AlignmentInfoHasher {
|
| 133 |
+
size_t operator()(const AlignmentInfo& a) const {
|
| 134 |
+
size_t seed = 0;
|
| 135 |
+
boost::hash_combine(seed,a.m_collection);
|
| 136 |
+
boost::hash_combine(seed,a.m_nonTermIndexMap);
|
| 137 |
+
return seed;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
};
|
| 141 |
+
|
| 142 |
+
inline size_t hash_value(const AlignmentInfo& a)
|
| 143 |
+
{
|
| 144 |
+
static AlignmentInfoHasher hasher;
|
| 145 |
+
return hasher(a);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
}
|
mosesdecoder/moses2/AlignmentInfoCollection.cpp
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "AlignmentInfoCollection.h"
|
| 21 |
+
|
| 22 |
+
using namespace std;
|
| 23 |
+
|
| 24 |
+
namespace Moses2
|
| 25 |
+
{
|
| 26 |
+
|
| 27 |
+
AlignmentInfoCollection AlignmentInfoCollection::s_instance;
|
| 28 |
+
|
| 29 |
+
AlignmentInfoCollection::AlignmentInfoCollection()
|
| 30 |
+
{
|
| 31 |
+
std::set<std::pair<size_t,size_t> > pairs;
|
| 32 |
+
m_emptyAlignmentInfo = Add(pairs);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
AlignmentInfoCollection::~AlignmentInfoCollection()
|
| 36 |
+
{}
|
| 37 |
+
|
| 38 |
+
const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
|
| 39 |
+
{
|
| 40 |
+
return *m_emptyAlignmentInfo;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
AlignmentInfo const *
|
| 44 |
+
AlignmentInfoCollection::
|
| 45 |
+
Add(AlignmentInfo const& ainfo)
|
| 46 |
+
{
|
| 47 |
+
#ifdef WITH_THREADS
|
| 48 |
+
{
|
| 49 |
+
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
| 50 |
+
AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
|
| 51 |
+
if (i != m_collection.end())
|
| 52 |
+
return &*i;
|
| 53 |
+
}
|
| 54 |
+
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
| 55 |
+
#endif
|
| 56 |
+
std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
|
| 57 |
+
return &(*ret.first);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
}
|
mosesdecoder/moses2/AlignmentInfoCollection.h
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include "AlignmentInfo.h"
|
| 23 |
+
|
| 24 |
+
#include <set>
|
| 25 |
+
|
| 26 |
+
#ifdef WITH_THREADS
|
| 27 |
+
#include <boost/thread/shared_mutex.hpp>
|
| 28 |
+
#include <boost/thread/locks.hpp>
|
| 29 |
+
#endif
|
| 30 |
+
|
| 31 |
+
namespace Moses2
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
/** Singleton collection of all AlignmentInfo objects.
|
| 35 |
+
* Used as a cache of all alignment info to save space.
|
| 36 |
+
*/
|
| 37 |
+
class AlignmentInfoCollection
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
static AlignmentInfoCollection &Instance() {
|
| 41 |
+
return s_instance;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
/** Returns a pointer to an AlignmentInfo object with the same source-target
|
| 45 |
+
* alignment pairs as given in the argument. If the collection already
|
| 46 |
+
* contains such an object then returns a pointer to it; otherwise a new
|
| 47 |
+
* one is inserted.
|
| 48 |
+
*/
|
| 49 |
+
private:
|
| 50 |
+
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
|
| 51 |
+
|
| 52 |
+
public:
|
| 53 |
+
template<typename ALNREP>
|
| 54 |
+
AlignmentInfo const *
|
| 55 |
+
Add(ALNREP const & aln) {
|
| 56 |
+
return this->Add(AlignmentInfo(aln));
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
//! Returns a pointer to an empty AlignmentInfo object.
|
| 60 |
+
const AlignmentInfo &GetEmptyAlignmentInfo() const;
|
| 61 |
+
|
| 62 |
+
private:
|
| 63 |
+
typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
//! Only a single static variable should be created.
|
| 67 |
+
AlignmentInfoCollection();
|
| 68 |
+
~AlignmentInfoCollection();
|
| 69 |
+
|
| 70 |
+
static AlignmentInfoCollection s_instance;
|
| 71 |
+
|
| 72 |
+
#ifdef WITH_THREADS
|
| 73 |
+
//reader-writer lock
|
| 74 |
+
mutable boost::shared_mutex m_accessLock;
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
+
AlignmentInfoSet m_collection;
|
| 78 |
+
const AlignmentInfo *m_emptyAlignmentInfo;
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
}
|
mosesdecoder/moses2/ArcLists.cpp
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ArcList.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 26 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <iostream>
|
| 8 |
+
#include <sstream>
|
| 9 |
+
#include <algorithm>
|
| 10 |
+
#include <boost/foreach.hpp>
|
| 11 |
+
#include "ArcLists.h"
|
| 12 |
+
#include "HypothesisBase.h"
|
| 13 |
+
#include "util/exception.hh"
|
| 14 |
+
|
| 15 |
+
using namespace std;
|
| 16 |
+
|
| 17 |
+
namespace Moses2
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
ArcLists::ArcLists()
|
| 21 |
+
{
|
| 22 |
+
// TODO Auto-generated constructor stub
|
| 23 |
+
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
ArcLists::~ArcLists()
|
| 27 |
+
{
|
| 28 |
+
BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
|
| 29 |
+
const ArcList *arcList = collPair.second;
|
| 30 |
+
delete arcList;
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
|
| 35 |
+
const HypothesisBase *otherHypo)
|
| 36 |
+
{
|
| 37 |
+
//cerr << added << " " << currHypo << " " << otherHypo << endl;
|
| 38 |
+
ArcList *arcList;
|
| 39 |
+
if (added) {
|
| 40 |
+
// we're winners!
|
| 41 |
+
if (otherHypo) {
|
| 42 |
+
// there was a existing losing hypo
|
| 43 |
+
arcList = &GetAndDetachArcList(otherHypo);
|
| 44 |
+
} else {
|
| 45 |
+
// there was no existing hypo
|
| 46 |
+
arcList = new ArcList;
|
| 47 |
+
}
|
| 48 |
+
m_coll[currHypo] = arcList;
|
| 49 |
+
} else {
|
| 50 |
+
// we're losers!
|
| 51 |
+
// there should be a winner, we're not doing beam pruning
|
| 52 |
+
UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
|
| 53 |
+
arcList = &GetArcList(otherHypo);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// in any case, add the curr hypo
|
| 57 |
+
arcList->push_back(currHypo);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
|
| 61 |
+
{
|
| 62 |
+
Coll::iterator iter = m_coll.find(hypo);
|
| 63 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
| 64 |
+
ArcList &arcList = *iter->second;
|
| 65 |
+
return arcList;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
|
| 69 |
+
{
|
| 70 |
+
Coll::const_iterator iter = m_coll.find(hypo);
|
| 71 |
+
|
| 72 |
+
if (iter == m_coll.end()) {
|
| 73 |
+
cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
|
| 74 |
+
BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
|
| 75 |
+
const HypothesisBase *hypo = collPair.first;
|
| 76 |
+
cerr << hypo << " ";
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
|
| 81 |
+
ArcList &arcList = *iter->second;
|
| 82 |
+
return arcList;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
|
| 86 |
+
{
|
| 87 |
+
Coll::iterator iter = m_coll.find(hypo);
|
| 88 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
| 89 |
+
ArcList &arcList = *iter->second;
|
| 90 |
+
|
| 91 |
+
m_coll.erase(iter);
|
| 92 |
+
|
| 93 |
+
return arcList;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
void ArcLists::Sort()
|
| 97 |
+
{
|
| 98 |
+
BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
|
| 99 |
+
ArcList &list = *collPair.second;
|
| 100 |
+
std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
void ArcLists::Delete(const HypothesisBase *hypo)
|
| 105 |
+
{
|
| 106 |
+
//cerr << "hypo=" << hypo->Debug() << endl;
|
| 107 |
+
//cerr << "m_coll=" << m_coll.size() << endl;
|
| 108 |
+
Coll::iterator iter = m_coll.find(hypo);
|
| 109 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
| 110 |
+
ArcList *arcList = iter->second;
|
| 111 |
+
|
| 112 |
+
m_coll.erase(iter);
|
| 113 |
+
delete arcList;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
std::string ArcLists::Debug(const System &system) const
|
| 117 |
+
{
|
| 118 |
+
stringstream strm;
|
| 119 |
+
BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
|
| 120 |
+
const ArcList *arcList = collPair.second;
|
| 121 |
+
strm << arcList << "(" << arcList->size() << ") ";
|
| 122 |
+
}
|
| 123 |
+
return strm.str();
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
}
|
| 127 |
+
|
mosesdecoder/moses2/ArcLists.h
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ArcList.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 26 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include <vector>
|
| 9 |
+
#include <boost/unordered_map.hpp>
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
class System;
|
| 14 |
+
|
| 15 |
+
class HypothesisBase;
|
| 16 |
+
|
| 17 |
+
typedef std::vector<const HypothesisBase*> ArcList;
|
| 18 |
+
|
| 19 |
+
class ArcLists
|
| 20 |
+
{
|
| 21 |
+
public:
|
| 22 |
+
ArcLists();
|
| 23 |
+
virtual ~ArcLists();
|
| 24 |
+
|
| 25 |
+
void AddArc(bool added, const HypothesisBase *currHypo,
|
| 26 |
+
const HypothesisBase *otherHypo);
|
| 27 |
+
void Sort();
|
| 28 |
+
void Delete(const HypothesisBase *hypo);
|
| 29 |
+
|
| 30 |
+
const ArcList &GetArcList(const HypothesisBase *hypo) const;
|
| 31 |
+
|
| 32 |
+
std::string Debug(const System &system) const;
|
| 33 |
+
protected:
|
| 34 |
+
typedef boost::unordered_map<const HypothesisBase*, ArcList*> Coll;
|
| 35 |
+
Coll m_coll;
|
| 36 |
+
|
| 37 |
+
ArcList &GetArcList(const HypothesisBase *hypo);
|
| 38 |
+
ArcList &GetAndDetachArcList(const HypothesisBase *hypo);
|
| 39 |
+
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
}
|
| 43 |
+
|
mosesdecoder/moses2/Array.h
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <cassert>
|
| 3 |
+
#include <boost/functional/hash.hpp>
|
| 4 |
+
#include "MemPool.h"
|
| 5 |
+
|
| 6 |
+
namespace Moses2
|
| 7 |
+
{
|
| 8 |
+
|
| 9 |
+
template<typename T>
|
| 10 |
+
class Array
|
| 11 |
+
{
|
| 12 |
+
public:
|
| 13 |
+
typedef T* iterator;
|
| 14 |
+
typedef const T* const_iterator;
|
| 15 |
+
//! iterators
|
| 16 |
+
const_iterator begin() const {
|
| 17 |
+
return m_arr;
|
| 18 |
+
}
|
| 19 |
+
const_iterator end() const {
|
| 20 |
+
return m_arr + m_size;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
iterator begin() {
|
| 24 |
+
return m_arr;
|
| 25 |
+
}
|
| 26 |
+
iterator end() {
|
| 27 |
+
return m_arr + m_size;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
Array(MemPool &pool, size_t size = 0, const T &val = T()) {
|
| 31 |
+
m_size = size;
|
| 32 |
+
m_maxSize = size;
|
| 33 |
+
m_arr = pool.Allocate<T>(size);
|
| 34 |
+
for (size_t i = 0; i < size; ++i) {
|
| 35 |
+
m_arr[i] = val;
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
size_t size() const {
|
| 40 |
+
return m_size;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
const T& operator[](size_t ind) const {
|
| 44 |
+
return m_arr[ind];
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
T& operator[](size_t ind) {
|
| 48 |
+
return m_arr[ind];
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
T *GetArray() {
|
| 52 |
+
return m_arr;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
size_t hash() const {
|
| 56 |
+
size_t seed = 0;
|
| 57 |
+
for (size_t i = 0; i < m_size; ++i) {
|
| 58 |
+
boost::hash_combine(seed, m_arr[i]);
|
| 59 |
+
}
|
| 60 |
+
return seed;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
int Compare(const Array &compare) const {
|
| 64 |
+
|
| 65 |
+
int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
|
| 66 |
+
return cmp;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
bool operator==(const Array &compare) const {
|
| 70 |
+
int cmp = Compare(compare);
|
| 71 |
+
return cmp == 0;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
void resize(size_t newSize) {
|
| 75 |
+
assert(m_size <= m_maxSize);
|
| 76 |
+
m_size = newSize;
|
| 77 |
+
}
|
| 78 |
+
protected:
|
| 79 |
+
size_t m_size, m_maxSize;
|
| 80 |
+
T *m_arr;
|
| 81 |
+
};
|
| 82 |
+
|
| 83 |
+
}
|
mosesdecoder/moses2/DLLEntryApi.cpp
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "Moses2Wrapper.h"
|
| 2 |
+
#include <iostream>
|
| 3 |
+
#include <string.h>
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
// Generic helper definitions for shared library support
|
| 7 |
+
#if defined _WIN32
|
| 8 |
+
#define IMPORT __declspec(dllimport)
|
| 9 |
+
#define EXPORT __declspec(dllexport)
|
| 10 |
+
#else // !(defined _WIN32 || defined __CYGWIN__) -- i.e., not Windows
|
| 11 |
+
#define __stdcall
|
| 12 |
+
#if __GNUC__ >= 4
|
| 13 |
+
#define IMPORT __attribute__ ((visibility ("default")))
|
| 14 |
+
#define EXPORT __attribute__ ((visibility ("default")))
|
| 15 |
+
#else // __GNUC__ < 4, which does not support the __attribute__ tag
|
| 16 |
+
#define IMPORT
|
| 17 |
+
#define EXPORT
|
| 18 |
+
#endif // __GNUC__ >= 4
|
| 19 |
+
#endif
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
using namespace std;
|
| 23 |
+
using namespace Moses2;
|
| 24 |
+
|
| 25 |
+
extern "C" EXPORT MosesApiErrorCode __stdcall GetMosesSystem(const char* filePath, Moses2::Moses2Wrapper * *pObject) {
|
| 26 |
+
|
| 27 |
+
if (*pObject == NULL) {
|
| 28 |
+
*pObject = new Moses2::Moses2Wrapper(filePath);
|
| 29 |
+
return MS_API_OK;
|
| 30 |
+
}
|
| 31 |
+
else {
|
| 32 |
+
return MS_API_E_FAILURE;
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
extern "C" EXPORT MosesApiErrorCode __stdcall Translate(Moses2::Moses2Wrapper * pObject, long id, const char* input, char** output) {
|
| 37 |
+
if (pObject != NULL)
|
| 38 |
+
{
|
| 39 |
+
std::string tr = pObject->Translate(input, id);
|
| 40 |
+
*output = Moses2Wrapper::CopyString(tr.c_str());
|
| 41 |
+
return MS_API_OK;
|
| 42 |
+
}
|
| 43 |
+
else {
|
| 44 |
+
return MS_API_E_FAILURE;
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
extern "C" EXPORT MosesApiErrorCode __stdcall FreeMemory(char* output) {
|
| 49 |
+
if (output != nullptr) {
|
| 50 |
+
Moses2Wrapper::Free(output);
|
| 51 |
+
return MS_API_OK;
|
| 52 |
+
}
|
| 53 |
+
else {
|
| 54 |
+
return MS_API_E_FAILURE;
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
extern "C" EXPORT MosesApiErrorCode __stdcall ReleaseSystem(Moses2::Moses2Wrapper **pObject) {
|
| 59 |
+
if (*pObject != NULL)
|
| 60 |
+
{
|
| 61 |
+
delete* pObject;
|
| 62 |
+
*pObject = NULL;
|
| 63 |
+
return MS_API_OK;
|
| 64 |
+
}
|
| 65 |
+
else {
|
| 66 |
+
return MS_API_E_FAILURE;
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
extern "C" EXPORT MosesApiErrorCode __stdcall EngineVersion() {
|
| 71 |
+
//std::cout << "windows build on v1142/ msvc 14.27.29110"<< std::endl;
|
| 72 |
+
std::cout << "0.0.1" << std::endl;
|
| 73 |
+
return MS_API_OK;
|
| 74 |
+
}
|
mosesdecoder/moses2/EstimatedScores.cpp
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
// vim:tabstop=2
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (C) 2006 University of Edinburgh
|
| 7 |
+
|
| 8 |
+
This library is free software; you can redistribute it and/or
|
| 9 |
+
modify it under the terms of the GNU Lesser General Public
|
| 10 |
+
License as published by the Free Software Foundation; either
|
| 11 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 12 |
+
|
| 13 |
+
This library is distributed in the hope that it will be useful,
|
| 14 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 15 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 16 |
+
Lesser General Public License for more details.
|
| 17 |
+
|
| 18 |
+
You should have received a copy of the GNU Lesser General Public
|
| 19 |
+
License along with this library; if not, write to the Free Software
|
| 20 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 21 |
+
***********************************************************************/
|
| 22 |
+
|
| 23 |
+
#include <string>
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include "EstimatedScores.h"
|
| 26 |
+
|
| 27 |
+
using namespace std;
|
| 28 |
+
|
| 29 |
+
namespace Moses2
|
| 30 |
+
{
|
| 31 |
+
/**
|
| 32 |
+
* Calculate future score estimate for a given coverage bitmap
|
| 33 |
+
*
|
| 34 |
+
* /param bitmap coverage bitmap
|
| 35 |
+
*/
|
| 36 |
+
|
| 37 |
+
float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap) const
|
| 38 |
+
{
|
| 39 |
+
const size_t notInGap = numeric_limits<size_t>::max();
|
| 40 |
+
size_t startGap = notInGap;
|
| 41 |
+
float estimatedScore = 0.0f;
|
| 42 |
+
for (size_t currPos = 0; currPos < bitmap.GetSize(); currPos++) {
|
| 43 |
+
// start of a new gap?
|
| 44 |
+
if (bitmap.GetValue(currPos) == false && startGap == notInGap) {
|
| 45 |
+
startGap = currPos;
|
| 46 |
+
}
|
| 47 |
+
// end of a gap?
|
| 48 |
+
else if (bitmap.GetValue(currPos) == true && startGap != notInGap) {
|
| 49 |
+
estimatedScore += GetValue(startGap, currPos - 1);
|
| 50 |
+
startGap = notInGap;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
// coverage ending with gap?
|
| 54 |
+
if (startGap != notInGap) {
|
| 55 |
+
estimatedScore += GetValue(startGap, bitmap.GetSize() - 1);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
return estimatedScore;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
/**
|
| 62 |
+
* Calculare future score estimate for a given coverage bitmap
|
| 63 |
+
* and an additional span that is also covered. This function is used
|
| 64 |
+
* to compute future score estimates for hypotheses that we may want
|
| 65 |
+
* build, but first want to check.
|
| 66 |
+
*
|
| 67 |
+
* Note: this function is implemented a bit more complex than
|
| 68 |
+
* the basic one (w/o additional phrase) for speed reasons,
|
| 69 |
+
* which is probably overkill.
|
| 70 |
+
*
|
| 71 |
+
* /param bitmap coverage bitmap
|
| 72 |
+
* /param startPos start of the span that is added to the coverage
|
| 73 |
+
* /param endPos end of the span that is added to the coverage
|
| 74 |
+
*/
|
| 75 |
+
|
| 76 |
+
float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap, size_t startPos,
|
| 77 |
+
size_t endPos) const
|
| 78 |
+
{
|
| 79 |
+
const size_t notInGap = numeric_limits<size_t>::max();
|
| 80 |
+
float estimatedScore = 0.0f;
|
| 81 |
+
size_t startGap = bitmap.GetFirstGapPos();
|
| 82 |
+
if (startGap == NOT_FOUND) return estimatedScore; // everything filled
|
| 83 |
+
|
| 84 |
+
// start loop at first gap
|
| 85 |
+
size_t startLoop = startGap + 1;
|
| 86 |
+
if (startPos == startGap) { // unless covered by phrase
|
| 87 |
+
startGap = notInGap;
|
| 88 |
+
startLoop = endPos + 1; // -> postpone start
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
size_t lastCovered = bitmap.GetLastPos();
|
| 92 |
+
if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;
|
| 93 |
+
|
| 94 |
+
for (size_t currPos = startLoop; currPos <= lastCovered; currPos++) {
|
| 95 |
+
// start of a new gap?
|
| 96 |
+
if (startGap == notInGap && bitmap.GetValue(currPos) == false
|
| 97 |
+
&& (currPos < startPos || currPos > endPos)) {
|
| 98 |
+
startGap = currPos;
|
| 99 |
+
}
|
| 100 |
+
// end of a gap?
|
| 101 |
+
else if (startGap != notInGap
|
| 102 |
+
&& (bitmap.GetValue(currPos) == true
|
| 103 |
+
|| (startPos <= currPos && currPos <= endPos))) {
|
| 104 |
+
estimatedScore += GetValue(startGap, currPos - 1);
|
| 105 |
+
startGap = notInGap;
|
| 106 |
+
}
|
| 107 |
+
}
|
| 108 |
+
// coverage ending with gap?
|
| 109 |
+
if (lastCovered != bitmap.GetSize() - 1) {
|
| 110 |
+
estimatedScore += GetValue(lastCovered + 1, bitmap.GetSize() - 1);
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
return estimatedScore;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
}
|
| 117 |
+
|
mosesdecoder/moses2/EstimatedScores.h
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include "legacy/Util2.h"
|
| 26 |
+
#include "legacy/Bitmap.h"
|
| 27 |
+
#include "legacy/Matrix.h"
|
| 28 |
+
|
| 29 |
+
namespace Moses2
|
| 30 |
+
{
|
| 31 |
+
class MemPool;
|
| 32 |
+
class System;
|
| 33 |
+
|
| 34 |
+
//! A square array of floats to store future costs in the phrase-based decoder
|
| 35 |
+
class EstimatedScores: public Matrix<float>
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
EstimatedScores(MemPool &pool, size_t size) :
|
| 39 |
+
Matrix<float>(pool, size, size) {
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
~EstimatedScores(); // not implemented
|
| 43 |
+
|
| 44 |
+
float CalcEstimatedScore(Bitmap const&) const;
|
| 45 |
+
float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
|
| 46 |
+
|
| 47 |
+
std::ostream &Debug(std::ostream &out, const System &system) const {
|
| 48 |
+
for (size_t endPos = 0; endPos < GetSize(); endPos++) {
|
| 49 |
+
for (size_t startPos = 0; startPos < GetSize(); startPos++)
|
| 50 |
+
out << GetValue(startPos, endPos) << " ";
|
| 51 |
+
out << std::endl;
|
| 52 |
+
}
|
| 53 |
+
return out;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
};
|
| 57 |
+
|
| 58 |
+
}
|
| 59 |
+
|
mosesdecoder/moses2/FF/Distortion.cpp
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Distortion.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 28 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <sstream>
|
| 8 |
+
#include "Distortion.h"
|
| 9 |
+
#include "../PhraseBased/Hypothesis.h"
|
| 10 |
+
#include "../PhraseBased/Manager.h"
|
| 11 |
+
#include "../legacy/Range.h"
|
| 12 |
+
#include "../legacy/Bitmap.h"
|
| 13 |
+
|
| 14 |
+
using namespace std;
|
| 15 |
+
|
| 16 |
+
namespace Moses2
|
| 17 |
+
{
|
| 18 |
+
|
| 19 |
+
struct DistortionState_traditional: public FFState {
|
| 20 |
+
Range range;
|
| 21 |
+
int first_gap;
|
| 22 |
+
|
| 23 |
+
DistortionState_traditional() :
|
| 24 |
+
range() {
|
| 25 |
+
// uninitialised
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
void Set(const Range& wr, int fg) {
|
| 29 |
+
range = wr;
|
| 30 |
+
first_gap = fg;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
size_t hash() const {
|
| 34 |
+
return range.GetEndPos();
|
| 35 |
+
}
|
| 36 |
+
virtual bool operator==(const FFState& other) const {
|
| 37 |
+
const DistortionState_traditional& o =
|
| 38 |
+
static_cast<const DistortionState_traditional&>(other);
|
| 39 |
+
return range.GetEndPos() == o.range.GetEndPos();
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
virtual std::string ToString() const {
|
| 43 |
+
stringstream sb;
|
| 44 |
+
sb << first_gap << " " << range;
|
| 45 |
+
return sb.str();
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
///////////////////////////////////////////////////////////////////////
|
| 51 |
+
Distortion::Distortion(size_t startInd, const std::string &line) :
|
| 52 |
+
StatefulFeatureFunction(startInd, line)
|
| 53 |
+
{
|
| 54 |
+
ReadParameters();
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
Distortion::~Distortion()
|
| 58 |
+
{
|
| 59 |
+
// TODO Auto-generated destructor stub
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
|
| 63 |
+
{
|
| 64 |
+
return new (pool.Allocate<DistortionState_traditional>()) DistortionState_traditional();
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
| 68 |
+
const InputType &input, const Hypothesis &hypo) const
|
| 69 |
+
{
|
| 70 |
+
DistortionState_traditional &stateCast =
|
| 71 |
+
static_cast<DistortionState_traditional&>(state);
|
| 72 |
+
|
| 73 |
+
// fake previous translated phrase start and end
|
| 74 |
+
size_t start = NOT_FOUND;
|
| 75 |
+
size_t end = NOT_FOUND;
|
| 76 |
+
/*
|
| 77 |
+
if (input.m_frontSpanCoveredLength > 0) {
|
| 78 |
+
// can happen with --continue-partial-translation
|
| 79 |
+
start = 0;
|
| 80 |
+
end = input.m_frontSpanCoveredLength -1;
|
| 81 |
+
}
|
| 82 |
+
*/
|
| 83 |
+
|
| 84 |
+
stateCast.range = Range(start, end);
|
| 85 |
+
stateCast.first_gap = NOT_FOUND;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
void Distortion::EvaluateInIsolation(MemPool &pool, const System &system,
|
| 89 |
+
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 90 |
+
SCORE &estimatedScore) const
|
| 91 |
+
{
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 95 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 96 |
+
SCORE &estimatedScore) const
|
| 97 |
+
{
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
|
| 101 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 102 |
+
FFState &state) const
|
| 103 |
+
{
|
| 104 |
+
const DistortionState_traditional &prev =
|
| 105 |
+
static_cast<const DistortionState_traditional&>(prevState);
|
| 106 |
+
SCORE distortionScore = CalculateDistortionScore(prev.range,
|
| 107 |
+
hypo.GetInputPath().range, prev.first_gap);
|
| 108 |
+
//cerr << "distortionScore=" << distortionScore << endl;
|
| 109 |
+
|
| 110 |
+
scores.PlusEquals(mgr.system, *this, distortionScore);
|
| 111 |
+
|
| 112 |
+
DistortionState_traditional &stateCast =
|
| 113 |
+
static_cast<DistortionState_traditional&>(state);
|
| 114 |
+
stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos());
|
| 115 |
+
|
| 116 |
+
//cerr << "hypo=" << hypo.Debug(mgr.system) << endl;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
|
| 120 |
+
const int FirstGap) const
|
| 121 |
+
{
|
| 122 |
+
bool useEarlyDistortionCost = false;
|
| 123 |
+
if (!useEarlyDistortionCost) {
|
| 124 |
+
return -(SCORE) ComputeDistortionDistance(prev, curr);
|
| 125 |
+
} else {
|
| 126 |
+
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
| 127 |
+
Definitions:
|
| 128 |
+
S : current source range
|
| 129 |
+
S' : last translated source phrase range
|
| 130 |
+
S'' : longest fully-translated initial segment
|
| 131 |
+
*/
|
| 132 |
+
|
| 133 |
+
int prefixEndPos = (int) FirstGap - 1;
|
| 134 |
+
if ((int) FirstGap == -1) prefixEndPos = -1;
|
| 135 |
+
|
| 136 |
+
// case1: S is adjacent to S'' => return 0
|
| 137 |
+
if ((int) curr.GetStartPos() == prefixEndPos + 1) {
|
| 138 |
+
//IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
|
| 139 |
+
return 0;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
// case2: S is to the left of S' => return 2(length(S))
|
| 143 |
+
if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
|
| 144 |
+
//IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
|
| 145 |
+
return (float) -2 * (int) curr.GetNumWordsCovered();
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
// case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
|
| 149 |
+
if ((int) prev.GetEndPos() <= prefixEndPos) {
|
| 150 |
+
//IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
|
| 151 |
+
int z = (int) curr.GetStartPos() - prefixEndPos - 1;
|
| 152 |
+
return (float) -2 * (z + (int) curr.GetNumWordsCovered());
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
// case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
|
| 156 |
+
//IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
|
| 157 |
+
return (float) -2
|
| 158 |
+
* ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered());
|
| 159 |
+
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
int Distortion::ComputeDistortionDistance(const Range& prev,
|
| 164 |
+
const Range& current) const
|
| 165 |
+
{
|
| 166 |
+
int dist = 0;
|
| 167 |
+
if (prev.GetNumWordsCovered() == 0) {
|
| 168 |
+
dist = current.GetStartPos();
|
| 169 |
+
} else {
|
| 170 |
+
dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
|
| 171 |
+
}
|
| 172 |
+
return abs(dist);
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 176 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 177 |
+
FFState &state) const
|
| 178 |
+
{
|
| 179 |
+
UTIL_THROW2("Not implemented");
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
}
|
mosesdecoder/moses2/FF/Distortion.h
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Distortion.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 28 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#ifndef DISTORTION_H_
|
| 9 |
+
#define DISTORTION_H_
|
| 10 |
+
|
| 11 |
+
#include "StatefulFeatureFunction.h"
|
| 12 |
+
#include "../legacy/Range.h"
|
| 13 |
+
#include "../TypeDef.h"
|
| 14 |
+
|
| 15 |
+
namespace Moses2
|
| 16 |
+
{
|
| 17 |
+
|
| 18 |
+
class Distortion: public StatefulFeatureFunction
|
| 19 |
+
{
|
| 20 |
+
public:
|
| 21 |
+
Distortion(size_t startInd, const std::string &line);
|
| 22 |
+
virtual ~Distortion();
|
| 23 |
+
|
| 24 |
+
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
|
| 25 |
+
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
| 26 |
+
const InputType &input, const Hypothesis &hypo) const;
|
| 27 |
+
|
| 28 |
+
virtual void
|
| 29 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 30 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 31 |
+
SCORE &estimatedScore) const;
|
| 32 |
+
|
| 33 |
+
virtual void
|
| 34 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 35 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 36 |
+
SCORE &estimatedScore) const;
|
| 37 |
+
|
| 38 |
+
virtual void EvaluateWhenApplied(const std::deque<Hypothesis*> &hypos) const {
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
|
| 42 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 43 |
+
FFState &state) const;
|
| 44 |
+
|
| 45 |
+
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 46 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 47 |
+
FFState &state) const;
|
| 48 |
+
|
| 49 |
+
protected:
|
| 50 |
+
SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
|
| 51 |
+
const int FirstGap) const;
|
| 52 |
+
|
| 53 |
+
int ComputeDistortionDistance(const Range& prev, const Range& current) const;
|
| 54 |
+
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
#endif /* DISTORTION_H_ */
|
mosesdecoder/moses2/FF/ExampleStatefulFF.cpp
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ExampleStatefulFF.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <sstream>
|
| 8 |
+
#include "ExampleStatefulFF.h"
|
| 9 |
+
#include "../PhraseBased/Manager.h"
|
| 10 |
+
#include "../PhraseBased/Hypothesis.h"
|
| 11 |
+
|
| 12 |
+
using namespace std;
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
class ExampleState: public FFState
|
| 18 |
+
{
|
| 19 |
+
public:
|
| 20 |
+
int targetLen;
|
| 21 |
+
|
| 22 |
+
ExampleState() {
|
| 23 |
+
// uninitialised
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
virtual size_t hash() const {
|
| 27 |
+
return (size_t) targetLen;
|
| 28 |
+
}
|
| 29 |
+
virtual bool operator==(const FFState& o) const {
|
| 30 |
+
const ExampleState& other = static_cast<const ExampleState&>(o);
|
| 31 |
+
return targetLen == other.targetLen;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
virtual std::string ToString() const {
|
| 35 |
+
stringstream sb;
|
| 36 |
+
sb << targetLen;
|
| 37 |
+
return sb.str();
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
////////////////////////////////////////////////////////////////////////////////////////
|
| 43 |
+
ExampleStatefulFF::ExampleStatefulFF(size_t startInd, const std::string &line) :
|
| 44 |
+
StatefulFeatureFunction(startInd, line)
|
| 45 |
+
{
|
| 46 |
+
ReadParameters();
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
ExampleStatefulFF::~ExampleStatefulFF()
|
| 50 |
+
{
|
| 51 |
+
// TODO Auto-generated destructor stub
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
FFState* ExampleStatefulFF::BlankState(MemPool &pool, const System &sys) const
|
| 55 |
+
{
|
| 56 |
+
return new (pool.Allocate<ExampleState>()) ExampleState();
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
void ExampleStatefulFF::EmptyHypothesisState(FFState &state,
|
| 60 |
+
const ManagerBase &mgr, const InputType &input,
|
| 61 |
+
const Hypothesis &hypo) const
|
| 62 |
+
{
|
| 63 |
+
ExampleState &stateCast = static_cast<ExampleState&>(state);
|
| 64 |
+
stateCast.targetLen = 0;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool,
|
| 68 |
+
const System &system, const Phrase<Moses2::Word> &source,
|
| 69 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 70 |
+
SCORE &estimatedScore) const
|
| 71 |
+
{
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 75 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 76 |
+
SCORE &estimatedScore) const
|
| 77 |
+
{
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
void ExampleStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr,
|
| 81 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 82 |
+
FFState &state) const
|
| 83 |
+
{
|
| 84 |
+
ExampleState &stateCast = static_cast<ExampleState&>(state);
|
| 85 |
+
stateCast.targetLen = hypo.GetTargetPhrase().GetSize();
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
void ExampleStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 89 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 90 |
+
FFState &state) const
|
| 91 |
+
{
|
| 92 |
+
UTIL_THROW2("Not implemented");
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
}
|
| 96 |
+
|
mosesdecoder/moses2/FF/ExampleStatefulFF.h
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ExampleStatefulFF.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include "StatefulFeatureFunction.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses2
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
class ExampleStatefulFF: public StatefulFeatureFunction
|
| 16 |
+
{
|
| 17 |
+
public:
|
| 18 |
+
ExampleStatefulFF(size_t startInd, const std::string &line);
|
| 19 |
+
virtual ~ExampleStatefulFF();
|
| 20 |
+
|
| 21 |
+
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
|
| 22 |
+
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
| 23 |
+
const InputType &input, const Hypothesis &hypo) const;
|
| 24 |
+
|
| 25 |
+
virtual void
|
| 26 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 27 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 28 |
+
SCORE &estimatedScore) const;
|
| 29 |
+
|
| 30 |
+
virtual void
|
| 31 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 32 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 33 |
+
SCORE &estimatedScore) const;
|
| 34 |
+
|
| 35 |
+
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
|
| 36 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 37 |
+
FFState &state) const;
|
| 38 |
+
|
| 39 |
+
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 40 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 41 |
+
FFState &state) const;
|
| 42 |
+
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
}
|
| 46 |
+
|
mosesdecoder/moses2/FF/ExampleStatelessFF.cpp
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* SkeletonStatefulFF.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include "../Scores.h"
|
| 8 |
+
|
| 9 |
+
#include "ExampleStatelessFF.h"
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
ExampleStatelessFF::ExampleStatelessFF(size_t startInd,
|
| 15 |
+
const std::string &line) :
|
| 16 |
+
StatelessFeatureFunction(startInd, line)
|
| 17 |
+
{
|
| 18 |
+
ReadParameters();
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
ExampleStatelessFF::~ExampleStatelessFF()
|
| 22 |
+
{
|
| 23 |
+
// TODO Auto-generated destructor stub
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool,
|
| 27 |
+
const System &system, const Phrase<Moses2::Word> &source,
|
| 28 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 29 |
+
SCORE &estimatedScore) const
|
| 30 |
+
{
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 34 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 35 |
+
SCORE &estimatedScore) const
|
| 36 |
+
{
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
}
|
| 40 |
+
|
mosesdecoder/moses2/FF/ExampleStatelessFF.h
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* SkeletonStatefulFF.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include "StatelessFeatureFunction.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses2
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
class ExampleStatelessFF: public StatelessFeatureFunction
|
| 16 |
+
{
|
| 17 |
+
public:
|
| 18 |
+
ExampleStatelessFF(size_t startInd, const std::string &line);
|
| 19 |
+
virtual ~ExampleStatelessFF();
|
| 20 |
+
|
| 21 |
+
virtual void
|
| 22 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 23 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 24 |
+
SCORE &estimatedScore) const;
|
| 25 |
+
|
| 26 |
+
virtual void
|
| 27 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 28 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 29 |
+
SCORE &estimatedScore) const;
|
| 30 |
+
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
}
|
| 34 |
+
|
mosesdecoder/moses2/FF/FFState.cpp
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
#include "FFState.h"
|
mosesdecoder/moses2/FF/FFState.h
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <vector>
|
| 4 |
+
#include <stddef.h>
|
| 5 |
+
#include "util/exception.hh"
|
| 6 |
+
|
| 7 |
+
namespace Moses2
|
| 8 |
+
{
|
| 9 |
+
|
| 10 |
+
class FFState
|
| 11 |
+
{
|
| 12 |
+
public:
|
| 13 |
+
virtual ~FFState() {
|
| 14 |
+
}
|
| 15 |
+
virtual size_t hash() const = 0;
|
| 16 |
+
virtual bool operator==(const FFState& other) const = 0;
|
| 17 |
+
|
| 18 |
+
virtual bool operator!=(const FFState& other) const {
|
| 19 |
+
return !(*this == other);
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
virtual std::string ToString() const = 0;
|
| 23 |
+
};
|
| 24 |
+
|
| 25 |
+
////////////////////////////////////////////////////////////////////////////////////////
|
| 26 |
+
inline std::ostream& operator<<(std::ostream& out, const FFState& obj)
|
| 27 |
+
{
|
| 28 |
+
out << obj.ToString();
|
| 29 |
+
return out;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
////////////////////////////////////////////////////////////////////////////////////////
|
| 33 |
+
class DummyState: public FFState
|
| 34 |
+
{
|
| 35 |
+
public:
|
| 36 |
+
DummyState() {
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
virtual size_t hash() const {
|
| 40 |
+
return 0;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
virtual bool operator==(const FFState& other) const {
|
| 44 |
+
return true;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
}
|
| 50 |
+
|
mosesdecoder/moses2/FF/FeatureFunction.cpp
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* FeatureFunction.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <string>
|
| 8 |
+
#include <vector>
|
| 9 |
+
#include "FeatureFunction.h"
|
| 10 |
+
#include "../System.h"
|
| 11 |
+
#include "../legacy/Util2.h"
|
| 12 |
+
#include "util/exception.hh"
|
| 13 |
+
|
| 14 |
+
using namespace std;
|
| 15 |
+
|
| 16 |
+
namespace Moses2
|
| 17 |
+
{
|
| 18 |
+
|
| 19 |
+
FeatureFunction::FeatureFunction(size_t startInd, const std::string &line)
|
| 20 |
+
:m_startInd(startInd)
|
| 21 |
+
,m_numScores(1)
|
| 22 |
+
,m_PhraseTableInd(NOT_FOUND)
|
| 23 |
+
,m_tuneable(true)
|
| 24 |
+
{
|
| 25 |
+
ParseLine(line);
|
| 26 |
+
//cerr << GetName() << " " << m_startInd << "-" << (m_startInd + m_numScores - 1) << endl;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
FeatureFunction::~FeatureFunction()
|
| 30 |
+
{
|
| 31 |
+
// TODO Auto-generated destructor stub
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
void FeatureFunction::ParseLine(const std::string &line)
|
| 35 |
+
{
|
| 36 |
+
vector<string> toks = Tokenize(line);
|
| 37 |
+
UTIL_THROW_IF2(toks.empty(), "Empty line");
|
| 38 |
+
|
| 39 |
+
string nameStub = toks[0];
|
| 40 |
+
|
| 41 |
+
set<string> keys;
|
| 42 |
+
|
| 43 |
+
for (size_t i = 1; i < toks.size(); ++i) {
|
| 44 |
+
vector<string> args = TokenizeFirstOnly(toks[i], "=");
|
| 45 |
+
UTIL_THROW_IF2(args.size() != 2,
|
| 46 |
+
"Incorrect format for feature function arg: " << toks[i]);
|
| 47 |
+
|
| 48 |
+
pair<set<string>::iterator, bool> ret = keys.insert(args[0]);
|
| 49 |
+
UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
|
| 50 |
+
|
| 51 |
+
if (args[0] == "num-features") {
|
| 52 |
+
m_numScores = Scan<size_t>(args[1]);
|
| 53 |
+
} else if (args[0] == "name") {
|
| 54 |
+
m_name = args[1];
|
| 55 |
+
} else {
|
| 56 |
+
m_args.push_back(args);
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
void FeatureFunction::ReadParameters()
|
| 62 |
+
{
|
| 63 |
+
while (!m_args.empty()) {
|
| 64 |
+
const vector<string> &args = m_args[0];
|
| 65 |
+
SetParameter(args[0], args[1]);
|
| 66 |
+
|
| 67 |
+
m_args.erase(m_args.begin());
|
| 68 |
+
}
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
void FeatureFunction::SetParameter(const std::string& key,
|
| 72 |
+
const std::string& value)
|
| 73 |
+
{
|
| 74 |
+
if (key == "tuneable") {
|
| 75 |
+
m_tuneable = Scan<bool>(value);
|
| 76 |
+
} else {
|
| 77 |
+
UTIL_THROW2(GetName() << ": Unknown argument " << key << "=" << value);
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
}
|
| 82 |
+
|
mosesdecoder/moses2/FF/FeatureFunction.h
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* FeatureFunction.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include <cstddef>
|
| 11 |
+
#include <string>
|
| 12 |
+
#include <vector>
|
| 13 |
+
#include "../TypeDef.h"
|
| 14 |
+
#include "../Phrase.h"
|
| 15 |
+
|
| 16 |
+
namespace Moses2
|
| 17 |
+
{
|
| 18 |
+
template<typename WORD>
|
| 19 |
+
class TargetPhrase;
|
| 20 |
+
|
| 21 |
+
class System;
|
| 22 |
+
class PhraseImpl;
|
| 23 |
+
class TargetPhrases;
|
| 24 |
+
class TargetPhraseImpl;
|
| 25 |
+
class Scores;
|
| 26 |
+
class ManagerBase;
|
| 27 |
+
class MemPool;
|
| 28 |
+
class InputType;
|
| 29 |
+
|
| 30 |
+
namespace SCFG
|
| 31 |
+
{
|
| 32 |
+
class TargetPhrase;
|
| 33 |
+
class TargetPhrases;
|
| 34 |
+
class Word;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
class FeatureFunction
|
| 38 |
+
{
|
| 39 |
+
public:
|
| 40 |
+
|
| 41 |
+
FeatureFunction(size_t startInd, const std::string &line);
|
| 42 |
+
virtual ~FeatureFunction();
|
| 43 |
+
virtual void Load(System &system) {
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
size_t GetStartInd() const {
|
| 47 |
+
return m_startInd;
|
| 48 |
+
}
|
| 49 |
+
size_t GetNumScores() const {
|
| 50 |
+
return m_numScores;
|
| 51 |
+
}
|
| 52 |
+
const std::string &GetName() const {
|
| 53 |
+
return m_name;
|
| 54 |
+
}
|
| 55 |
+
void SetName(const std::string &val) {
|
| 56 |
+
m_name = val;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
virtual size_t HasPhraseTableInd() const {
|
| 60 |
+
return false;
|
| 61 |
+
}
|
| 62 |
+
void SetPhraseTableInd(size_t ind) {
|
| 63 |
+
m_PhraseTableInd = ind;
|
| 64 |
+
}
|
| 65 |
+
size_t GetPhraseTableInd() const {
|
| 66 |
+
return m_PhraseTableInd;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
//! if false, then this feature is not displayed in the n-best list.
|
| 70 |
+
// use with care
|
| 71 |
+
virtual bool IsTuneable() const {
|
| 72 |
+
return m_tuneable;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
virtual void SetParameter(const std::string& key, const std::string& value);
|
| 76 |
+
|
| 77 |
+
// may have more factors than actually need, but not guaranteed.
|
| 78 |
+
virtual void
|
| 79 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 80 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 81 |
+
SCORE &estimatedScore) const = 0;
|
| 82 |
+
|
| 83 |
+
// For SCFG decoding, the source can contain non-terminals, NOT the raw
|
| 84 |
+
// source from the input sentence
|
| 85 |
+
virtual void
|
| 86 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 87 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 88 |
+
SCORE &estimatedScore) const = 0;
|
| 89 |
+
|
| 90 |
+
// used by lexicalised reordering model to add scores to tp data structures
|
| 91 |
+
virtual void EvaluateAfterTablePruning(MemPool &pool,
|
| 92 |
+
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const {
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
virtual void EvaluateAfterTablePruning(MemPool &pool,
|
| 96 |
+
const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const {
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
virtual void InitializeForInput(const ManagerBase &mgr, const InputType &input) { };
|
| 100 |
+
|
| 101 |
+
// clean up temporary memory, called after processing each sentence
|
| 102 |
+
virtual void CleanUpAfterSentenceProcessing(const System &system, const InputType &input) const {
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
protected:
|
| 106 |
+
size_t m_startInd;
|
| 107 |
+
size_t m_numScores;
|
| 108 |
+
size_t m_PhraseTableInd;
|
| 109 |
+
std::string m_name;
|
| 110 |
+
std::vector<std::vector<std::string> > m_args;
|
| 111 |
+
bool m_tuneable;
|
| 112 |
+
|
| 113 |
+
virtual void ReadParameters();
|
| 114 |
+
void ParseLine(const std::string &line);
|
| 115 |
+
};
|
| 116 |
+
|
| 117 |
+
}
|
| 118 |
+
|
mosesdecoder/moses2/FF/FeatureFunctions.cpp
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* FeatureFunctions.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include <boost/foreach.hpp>
|
| 9 |
+
#include "FeatureRegistry.h"
|
| 10 |
+
#include "FeatureFunctions.h"
|
| 11 |
+
#include "StatefulFeatureFunction.h"
|
| 12 |
+
#include "../System.h"
|
| 13 |
+
#include "../Scores.h"
|
| 14 |
+
#include "../MemPool.h"
|
| 15 |
+
|
| 16 |
+
#include "../TranslationModel/PhraseTable.h"
|
| 17 |
+
#include "../TranslationModel/UnknownWordPenalty.h"
|
| 18 |
+
#include "../SCFG/TargetPhraseImpl.h"
|
| 19 |
+
#include "../SCFG/Word.h"
|
| 20 |
+
#include "../PhraseBased/TargetPhraseImpl.h"
|
| 21 |
+
#include "util/exception.hh"
|
| 22 |
+
|
| 23 |
+
using namespace std;
|
| 24 |
+
|
| 25 |
+
namespace Moses2
|
| 26 |
+
{
|
| 27 |
+
FeatureFunctions::FeatureFunctions(System &system) :
|
| 28 |
+
m_system(system), m_ffStartInd(0)
|
| 29 |
+
{
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
FeatureFunctions::~FeatureFunctions()
|
| 33 |
+
{
|
| 34 |
+
RemoveAllInColl(m_featureFunctions);
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
void FeatureFunctions::Load()
|
| 38 |
+
{
|
| 39 |
+
// load, everything but pts
|
| 40 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 41 |
+
FeatureFunction *nonConstFF = const_cast<FeatureFunction*>(ff);
|
| 42 |
+
PhraseTable *pt = dynamic_cast<PhraseTable*>(nonConstFF);
|
| 43 |
+
|
| 44 |
+
if (pt) {
|
| 45 |
+
// do nothing. load pt last
|
| 46 |
+
} else {
|
| 47 |
+
cerr << "Loading " << nonConstFF->GetName() << endl;
|
| 48 |
+
nonConstFF->Load(m_system);
|
| 49 |
+
cerr << "Finished loading " << nonConstFF->GetName() << endl;
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
// load pt
|
| 54 |
+
BOOST_FOREACH(const PhraseTable *pt, phraseTables) {
|
| 55 |
+
PhraseTable *nonConstPT = const_cast<PhraseTable*>(pt);
|
| 56 |
+
cerr << "Loading " << nonConstPT->GetName() << endl;
|
| 57 |
+
nonConstPT->Load(m_system);
|
| 58 |
+
cerr << "Finished loading " << nonConstPT->GetName() << endl;
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
void FeatureFunctions::Create()
|
| 63 |
+
{
|
| 64 |
+
const Parameter ¶ms = m_system.params;
|
| 65 |
+
|
| 66 |
+
const PARAM_VEC *ffParams = params.GetParam("feature");
|
| 67 |
+
UTIL_THROW_IF2(ffParams == NULL, "Must have [feature] section");
|
| 68 |
+
|
| 69 |
+
BOOST_FOREACH(const std::string &line, *ffParams) {
|
| 70 |
+
FeatureFunction *ff = Create(line);
|
| 71 |
+
|
| 72 |
+
m_featureFunctions.push_back(ff);
|
| 73 |
+
|
| 74 |
+
StatefulFeatureFunction *sfff = dynamic_cast<StatefulFeatureFunction*>(ff);
|
| 75 |
+
if (sfff) {
|
| 76 |
+
sfff->SetStatefulInd(m_statefulFeatureFunctions.size());
|
| 77 |
+
m_statefulFeatureFunctions.push_back(sfff);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
if (ff->HasPhraseTableInd()) {
|
| 81 |
+
ff->SetPhraseTableInd(m_withPhraseTableInd.size());
|
| 82 |
+
m_withPhraseTableInd.push_back(ff);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
PhraseTable *pt = dynamic_cast<PhraseTable*>(ff);
|
| 86 |
+
if (pt) {
|
| 87 |
+
pt->SetPtInd(phraseTables.size());
|
| 88 |
+
phraseTables.push_back(pt);
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
UnknownWordPenalty *unkWP = dynamic_cast<UnknownWordPenalty *>(pt);
|
| 92 |
+
if (unkWP) {
|
| 93 |
+
m_unkWP = unkWP;
|
| 94 |
+
|
| 95 |
+
// legacy support
|
| 96 |
+
if (m_system.options.unk.drop) {
|
| 97 |
+
unkWP->SetParameter("drop", "true");
|
| 98 |
+
}
|
| 99 |
+
if (m_system.options.unk.mark) {
|
| 100 |
+
unkWP->SetParameter("prefix", m_system.options.unk.prefix);
|
| 101 |
+
unkWP->SetParameter("suffix", m_system.options.unk.suffix);
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
OverrideFeatures();
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
FeatureFunction *FeatureFunctions::Create(const std::string &line)
|
| 110 |
+
{
|
| 111 |
+
vector<string> toks = Tokenize(line);
|
| 112 |
+
|
| 113 |
+
FeatureFunction *ff = FeatureRegistry::Instance().Construct(m_ffStartInd, toks[0], line);
|
| 114 |
+
UTIL_THROW_IF2(ff == NULL, "Feature function not created");
|
| 115 |
+
|
| 116 |
+
// name
|
| 117 |
+
if (ff->GetName() == "") {
|
| 118 |
+
ff->SetName(GetDefaultName(toks[0]));
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
m_ffStartInd += ff->GetNumScores();
|
| 122 |
+
|
| 123 |
+
return ff;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
std::string FeatureFunctions::GetDefaultName(const std::string &stub)
|
| 127 |
+
{
|
| 128 |
+
size_t ind;
|
| 129 |
+
boost::unordered_map<std::string, size_t>::iterator iter =
|
| 130 |
+
m_defaultNames.find(stub);
|
| 131 |
+
if (iter == m_defaultNames.end()) {
|
| 132 |
+
m_defaultNames[stub] = 0;
|
| 133 |
+
ind = 0;
|
| 134 |
+
} else {
|
| 135 |
+
ind = ++(iter->second);
|
| 136 |
+
}
|
| 137 |
+
return stub + SPrint(ind);
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
const FeatureFunction *FeatureFunctions::FindFeatureFunction(
|
| 141 |
+
const std::string &name) const
|
| 142 |
+
{
|
| 143 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 144 |
+
if (ff->GetName() == name) {
|
| 145 |
+
return ff;
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
return NULL;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
FeatureFunction *FeatureFunctions::FindFeatureFunction(
|
| 152 |
+
const std::string &name)
|
| 153 |
+
{
|
| 154 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 155 |
+
if (ff->GetName() == name) {
|
| 156 |
+
return const_cast<FeatureFunction *>(ff);
|
| 157 |
+
}
|
| 158 |
+
}
|
| 159 |
+
return NULL;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
|
| 163 |
+
{
|
| 164 |
+
// assume only 1 unk wp
|
| 165 |
+
std::vector<const PhraseTable*> tmpVec(phraseTables);
|
| 166 |
+
std::vector<const PhraseTable*>::iterator iter;
|
| 167 |
+
for (iter = tmpVec.begin(); iter != tmpVec.end(); ++iter) {
|
| 168 |
+
const PhraseTable *pt = *iter;
|
| 169 |
+
if (pt == m_unkWP) {
|
| 170 |
+
tmpVec.erase(iter);
|
| 171 |
+
break;
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
const PhraseTable *pt = tmpVec[ptInd];
|
| 176 |
+
return pt;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system,
|
| 180 |
+
const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const
|
| 181 |
+
{
|
| 182 |
+
SCORE estimatedScore = 0;
|
| 183 |
+
|
| 184 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 185 |
+
Scores& scores = targetPhrase.GetScores();
|
| 186 |
+
ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
targetPhrase.SetEstimatedScore(estimatedScore);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
void FeatureFunctions::EvaluateInIsolation(
|
| 193 |
+
MemPool &pool,
|
| 194 |
+
const System &system,
|
| 195 |
+
const Phrase<SCFG::Word> &source,
|
| 196 |
+
SCFG::TargetPhraseImpl &targetPhrase) const
|
| 197 |
+
{
|
| 198 |
+
SCORE estimatedScore = 0;
|
| 199 |
+
|
| 200 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 201 |
+
Scores& scores = targetPhrase.GetScores();
|
| 202 |
+
ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
targetPhrase.SetEstimatedScore(estimatedScore);
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool,
|
| 209 |
+
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
|
| 210 |
+
{
|
| 211 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 212 |
+
ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
|
| 217 |
+
const Phrase<SCFG::Word> &sourcePhrase) const
|
| 218 |
+
{
|
| 219 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 220 |
+
ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const
|
| 225 |
+
{
|
| 226 |
+
BOOST_FOREACH(const StatefulFeatureFunction *ff, m_statefulFeatureFunctions) {
|
| 227 |
+
ff->EvaluateWhenAppliedBatch(m_system, batch);
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
void FeatureFunctions::InitializeForInput(const ManagerBase &mgr, const InputType &input)
|
| 232 |
+
{
|
| 233 |
+
BOOST_FOREACH(FeatureFunction *ff, m_featureFunctions) {
|
| 234 |
+
ff->InitializeForInput(mgr, input);
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
void FeatureFunctions::CleanUpAfterSentenceProcessing(const InputType &input) const
|
| 239 |
+
{
|
| 240 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 241 |
+
ff->CleanUpAfterSentenceProcessing(m_system, input);
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
void FeatureFunctions::ShowWeights(const Weights &allWeights)
|
| 246 |
+
{
|
| 247 |
+
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
|
| 248 |
+
cout << ff->GetName();
|
| 249 |
+
if (ff->IsTuneable()) {
|
| 250 |
+
cout << "=";
|
| 251 |
+
vector<SCORE> weights = allWeights.GetWeights(*ff);
|
| 252 |
+
for (size_t i = 0; i < weights.size(); ++i) {
|
| 253 |
+
cout << " " << weights[i];
|
| 254 |
+
}
|
| 255 |
+
cout << endl;
|
| 256 |
+
} else {
|
| 257 |
+
cout << " UNTUNEABLE" << endl;
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
void FeatureFunctions::OverrideFeatures()
|
| 263 |
+
{
|
| 264 |
+
const Parameter ¶meter = m_system.params;
|
| 265 |
+
|
| 266 |
+
const PARAM_VEC *params = parameter.GetParam("feature-overwrite");
|
| 267 |
+
for (size_t i = 0; params && i < params->size(); ++i) {
|
| 268 |
+
const string &str = params->at(i);
|
| 269 |
+
vector<string> toks = Tokenize(str);
|
| 270 |
+
UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
|
| 271 |
+
|
| 272 |
+
FeatureFunction *ff = FindFeatureFunction(toks[0]);
|
| 273 |
+
UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]);
|
| 274 |
+
|
| 275 |
+
for (size_t j = 1; j < toks.size(); ++j) {
|
| 276 |
+
const string &keyValStr = toks[j];
|
| 277 |
+
vector<string> keyVal = Tokenize(keyValStr, "=");
|
| 278 |
+
UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
|
| 279 |
+
|
| 280 |
+
cerr << "Override " << ff->GetName() << " "
|
| 281 |
+
<< keyVal[0] << "=" << keyVal[1] << endl;
|
| 282 |
+
|
| 283 |
+
ff->SetParameter(keyVal[0], keyVal[1]);
|
| 284 |
+
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
}
|
| 291 |
+
|
mosesdecoder/moses2/FF/FeatureFunctions.h
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* FeatureFunctions.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include <boost/unordered_map.hpp>
|
| 11 |
+
#include <vector>
|
| 12 |
+
#include <string>
|
| 13 |
+
#include "../legacy/Parameter.h"
|
| 14 |
+
#include "../Phrase.h"
|
| 15 |
+
|
| 16 |
+
namespace Moses2
|
| 17 |
+
{
|
| 18 |
+
template<typename WORD>
|
| 19 |
+
class TargetPhrase;
|
| 20 |
+
|
| 21 |
+
class System;
|
| 22 |
+
class FeatureFunction;
|
| 23 |
+
class StatefulFeatureFunction;
|
| 24 |
+
class PhraseTable;
|
| 25 |
+
class Manager;
|
| 26 |
+
class MemPool;
|
| 27 |
+
class PhraseImpl;
|
| 28 |
+
class TargetPhrases;
|
| 29 |
+
class TargetPhraseImpl;
|
| 30 |
+
class Scores;
|
| 31 |
+
class Hypothesis;
|
| 32 |
+
class UnknownWordPenalty;
|
| 33 |
+
class Weights;
|
| 34 |
+
class InputType;
|
| 35 |
+
|
| 36 |
+
namespace SCFG
|
| 37 |
+
{
|
| 38 |
+
class TargetPhraseImpl;
|
| 39 |
+
class TargetPhrases;
|
| 40 |
+
class Word;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
class FeatureFunctions
|
| 44 |
+
{
|
| 45 |
+
public:
|
| 46 |
+
std::vector<const PhraseTable*> phraseTables;
|
| 47 |
+
|
| 48 |
+
FeatureFunctions(System &system);
|
| 49 |
+
virtual ~FeatureFunctions();
|
| 50 |
+
|
| 51 |
+
const std::vector<FeatureFunction*> &GetFeatureFunctions() const {
|
| 52 |
+
return m_featureFunctions;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const {
|
| 56 |
+
return m_statefulFeatureFunctions;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
const std::vector<const FeatureFunction*> &GetWithPhraseTableInd() const {
|
| 60 |
+
return m_withPhraseTableInd;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
size_t GetNumScores() const {
|
| 64 |
+
return m_ffStartInd;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
void Create();
|
| 68 |
+
void Load();
|
| 69 |
+
|
| 70 |
+
const FeatureFunction *FindFeatureFunction(const std::string &name) const;
|
| 71 |
+
|
| 72 |
+
const PhraseTable *GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd);
|
| 73 |
+
const UnknownWordPenalty *GetUnknownWordPenalty() const {
|
| 74 |
+
return m_unkWP;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// the pool here must be the system pool if the rule was loaded during load, or the mgr pool if it was loaded on demand
|
| 78 |
+
void EvaluateInIsolation(MemPool &pool, const System &system,
|
| 79 |
+
const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const;
|
| 80 |
+
void EvaluateInIsolation(MemPool &pool, const System &system,
|
| 81 |
+
const Phrase<SCFG::Word> &source, SCFG::TargetPhraseImpl &targetPhrase) const;
|
| 82 |
+
|
| 83 |
+
void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
|
| 84 |
+
const Phrase<Moses2::Word> &sourcePhrase) const;
|
| 85 |
+
void EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
|
| 86 |
+
const Phrase<SCFG::Word> &sourcePhrase) const;
|
| 87 |
+
|
| 88 |
+
void EvaluateWhenAppliedBatch(const Batch &batch) const;
|
| 89 |
+
|
| 90 |
+
void InitializeForInput(const ManagerBase &mgr, const InputType &input);
|
| 91 |
+
void CleanUpAfterSentenceProcessing(const InputType &input) const;
|
| 92 |
+
|
| 93 |
+
void ShowWeights(const Weights &allWeights);
|
| 94 |
+
|
| 95 |
+
protected:
|
| 96 |
+
std::vector<FeatureFunction*> m_featureFunctions;
|
| 97 |
+
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
|
| 98 |
+
std::vector<const FeatureFunction*> m_withPhraseTableInd;
|
| 99 |
+
const UnknownWordPenalty *m_unkWP;
|
| 100 |
+
|
| 101 |
+
boost::unordered_map<std::string, size_t> m_defaultNames;
|
| 102 |
+
System &m_system;
|
| 103 |
+
size_t m_ffStartInd;
|
| 104 |
+
|
| 105 |
+
FeatureFunction *Create(const std::string &line);
|
| 106 |
+
std::string GetDefaultName(const std::string &stub);
|
| 107 |
+
void OverrideFeatures();
|
| 108 |
+
FeatureFunction *FindFeatureFunction(const std::string &name);
|
| 109 |
+
|
| 110 |
+
};
|
| 111 |
+
|
| 112 |
+
}
|
| 113 |
+
|
mosesdecoder/moses2/FF/FeatureRegistry.cpp
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "FeatureRegistry.h"
|
| 2 |
+
|
| 3 |
+
#include "../TranslationModel/Memory/PhraseTableMemory.h"
|
| 4 |
+
#include "../TranslationModel/ProbingPT.h"
|
| 5 |
+
#include "../TranslationModel/UnknownWordPenalty.h"
|
| 6 |
+
#include "../TranslationModel/Transliteration.h"
|
| 7 |
+
#include "../TranslationModel/Dynamic/DynamicPhraseTable.h"
|
| 8 |
+
|
| 9 |
+
#include "../LM/KENLM.h"
|
| 10 |
+
#include "../LM/KENLMBatch.h"
|
| 11 |
+
#include "../LM/LanguageModel.h"
|
| 12 |
+
#include "../LM/GPULM.h"
|
| 13 |
+
|
| 14 |
+
#include "Distortion.h"
|
| 15 |
+
#include "LexicalReordering/LexicalReordering.h"
|
| 16 |
+
#include "PhrasePenalty.h"
|
| 17 |
+
#include "WordPenalty.h"
|
| 18 |
+
#include "OSM/OpSequenceModel.h"
|
| 19 |
+
|
| 20 |
+
#include "ExampleStatefulFF.h"
|
| 21 |
+
#include "ExampleStatelessFF.h"
|
| 22 |
+
|
| 23 |
+
using namespace std;
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
namespace Moses2
|
| 27 |
+
{
|
| 28 |
+
FeatureRegistry FeatureRegistry::s_instance;
|
| 29 |
+
|
| 30 |
+
template<class F>
|
| 31 |
+
class DefaultFeatureFactory: public FeatureFactory
|
| 32 |
+
{
|
| 33 |
+
public:
|
| 34 |
+
FeatureFunction *Create(size_t startInd, const std::string &line) const {
|
| 35 |
+
return new F(startInd, line);
|
| 36 |
+
}
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
////////////////////////////////////////////////////////////////////
|
| 40 |
+
class KenFactory: public FeatureFactory
|
| 41 |
+
{
|
| 42 |
+
public:
|
| 43 |
+
FeatureFunction *Create(size_t startInd, const std::string &line) const {
|
| 44 |
+
return ConstructKenLM(startInd, line);
|
| 45 |
+
}
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
////////////////////////////////////////////////////////////////////
|
| 49 |
+
FeatureRegistry::FeatureRegistry()
|
| 50 |
+
{
|
| 51 |
+
// Feature with same name as class
|
| 52 |
+
#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
|
| 53 |
+
// Feature with different name than class.
|
| 54 |
+
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
|
| 55 |
+
|
| 56 |
+
MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory);
|
| 57 |
+
MOSES_FNAME(ProbingPT);
|
| 58 |
+
MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration);
|
| 59 |
+
MOSES_FNAME(UnknownWordPenalty);
|
| 60 |
+
MOSES_FNAME(DynamicPhraseTable);
|
| 61 |
+
|
| 62 |
+
Add("KENLM", new KenFactory());
|
| 63 |
+
|
| 64 |
+
MOSES_FNAME(KENLMBatch);
|
| 65 |
+
MOSES_FNAME(GPULM);
|
| 66 |
+
|
| 67 |
+
MOSES_FNAME(LanguageModel);
|
| 68 |
+
|
| 69 |
+
MOSES_FNAME(Distortion);
|
| 70 |
+
MOSES_FNAME(LexicalReordering);
|
| 71 |
+
MOSES_FNAME(PhrasePenalty);
|
| 72 |
+
MOSES_FNAME(WordPenalty);
|
| 73 |
+
MOSES_FNAME(OpSequenceModel);
|
| 74 |
+
|
| 75 |
+
MOSES_FNAME(ExampleStatefulFF);
|
| 76 |
+
MOSES_FNAME(ExampleStatelessFF);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
FeatureRegistry::~FeatureRegistry()
|
| 80 |
+
{
|
| 81 |
+
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
|
| 85 |
+
{
|
| 86 |
+
std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name,
|
| 87 |
+
boost::shared_ptr<FeatureFactory>(factory));
|
| 88 |
+
if (!registry_.insert(to_ins).second) {
|
| 89 |
+
cerr << "Duplicate feature name " << name << endl;
|
| 90 |
+
abort();
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
FeatureFunction *FeatureRegistry::Construct(size_t startInd,
|
| 95 |
+
const std::string &name, const std::string &line) const
|
| 96 |
+
{
|
| 97 |
+
Map::const_iterator i = registry_.find(name);
|
| 98 |
+
if (i == registry_.end()) {
|
| 99 |
+
cerr << "Feature name " << name << " is not registered.";
|
| 100 |
+
abort();
|
| 101 |
+
}
|
| 102 |
+
FeatureFactory *fact = i->second.get();
|
| 103 |
+
FeatureFunction *ff = fact->Create(startInd, line);
|
| 104 |
+
return ff;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
void FeatureRegistry::PrintFF() const
|
| 108 |
+
{
|
| 109 |
+
std::vector<std::string> ffs;
|
| 110 |
+
std::cerr << "Available feature functions:" << std::endl;
|
| 111 |
+
Map::const_iterator iter;
|
| 112 |
+
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
|
| 113 |
+
const std::string &ffName = iter->first;
|
| 114 |
+
ffs.push_back(ffName);
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
std::vector<std::string>::const_iterator iterVec;
|
| 118 |
+
std::sort(ffs.begin(), ffs.end());
|
| 119 |
+
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
|
| 120 |
+
const std::string &ffName = *iterVec;
|
| 121 |
+
std::cerr << ffName << " ";
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
std::cerr << std::endl;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
}
|
| 128 |
+
|
mosesdecoder/moses2/FF/FeatureRegistry.h
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <boost/unordered_map.hpp>
|
| 3 |
+
#include <boost/shared_ptr.hpp>
|
| 4 |
+
|
| 5 |
+
namespace Moses2
|
| 6 |
+
{
|
| 7 |
+
class FeatureFunction;
|
| 8 |
+
|
| 9 |
+
////////////////////////////////////////////////////////////////////
|
| 10 |
+
class FeatureFactory
|
| 11 |
+
{
|
| 12 |
+
public:
|
| 13 |
+
virtual ~FeatureFactory() {
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
virtual FeatureFunction *Create(size_t startInd, const std::string &line) const = 0;
|
| 17 |
+
|
| 18 |
+
protected:
|
| 19 |
+
FeatureFactory() {
|
| 20 |
+
}
|
| 21 |
+
};
|
| 22 |
+
|
| 23 |
+
////////////////////////////////////////////////////////////////////
|
| 24 |
+
class FeatureRegistry
|
| 25 |
+
{
|
| 26 |
+
public:
|
| 27 |
+
static const FeatureRegistry &Instance() {
|
| 28 |
+
return s_instance;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
~FeatureRegistry();
|
| 32 |
+
|
| 33 |
+
FeatureFunction *Construct(size_t startInd, const std::string &name,
|
| 34 |
+
const std::string &line) const;
|
| 35 |
+
void PrintFF() const;
|
| 36 |
+
|
| 37 |
+
private:
|
| 38 |
+
static FeatureRegistry s_instance;
|
| 39 |
+
|
| 40 |
+
typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
|
| 41 |
+
Map registry_;
|
| 42 |
+
|
| 43 |
+
FeatureRegistry();
|
| 44 |
+
|
| 45 |
+
void Add(const std::string &name, FeatureFactory *factory);
|
| 46 |
+
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
////////////////////////////////////////////////////////////////////
|
| 50 |
+
|
| 51 |
+
}
|
| 52 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* BidirectionalReorderingState.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <boost/functional/hash_fwd.hpp>
|
| 8 |
+
#include "BidirectionalReorderingState.h"
|
| 9 |
+
#include "../../legacy/Util2.h"
|
| 10 |
+
#include "../../PhraseBased/Manager.h"
|
| 11 |
+
|
| 12 |
+
using namespace std;
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
BidirectionalReorderingState::BidirectionalReorderingState(
|
| 18 |
+
const LRModel &config, LRState *bw, LRState *fw, size_t offset) :
|
| 19 |
+
LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward(
|
| 20 |
+
fw)
|
| 21 |
+
{
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
BidirectionalReorderingState::~BidirectionalReorderingState()
|
| 25 |
+
{
|
| 26 |
+
// TODO Auto-generated destructor stub
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
void BidirectionalReorderingState::Init(const LRState *prev,
|
| 30 |
+
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
|
| 31 |
+
const Bitmap *coverage)
|
| 32 |
+
{
|
| 33 |
+
if (m_backward) {
|
| 34 |
+
m_backward->Init(prev, topt, path, first, coverage);
|
| 35 |
+
}
|
| 36 |
+
if (m_forward) {
|
| 37 |
+
m_forward->Init(prev, topt, path, first, coverage);
|
| 38 |
+
}
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
std::string BidirectionalReorderingState::ToString() const
|
| 42 |
+
{
|
| 43 |
+
return "BidirectionalReorderingState " + SPrint(this) + " "
|
| 44 |
+
+ SPrint(m_backward) + " " + SPrint(m_forward);
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
size_t BidirectionalReorderingState::hash() const
|
| 48 |
+
{
|
| 49 |
+
size_t ret = m_backward->hash();
|
| 50 |
+
boost::hash_combine(ret, m_forward->hash());
|
| 51 |
+
|
| 52 |
+
return ret;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
bool BidirectionalReorderingState::operator==(const FFState& o) const
|
| 56 |
+
{
|
| 57 |
+
if (&o == this) return true;
|
| 58 |
+
|
| 59 |
+
BidirectionalReorderingState const &other =
|
| 60 |
+
static_cast<BidirectionalReorderingState const&>(o);
|
| 61 |
+
|
| 62 |
+
bool ret = (*m_backward == *other.m_backward)
|
| 63 |
+
&& (*m_forward == *other.m_forward);
|
| 64 |
+
return ret;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
void BidirectionalReorderingState::Expand(const ManagerBase &mgr,
|
| 68 |
+
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
|
| 69 |
+
Scores &scores, FFState &state) const
|
| 70 |
+
{
|
| 71 |
+
BidirectionalReorderingState &stateCast =
|
| 72 |
+
static_cast<BidirectionalReorderingState&>(state);
|
| 73 |
+
m_backward->Expand(mgr, ff, hypo, phraseTableInd, scores,
|
| 74 |
+
*stateCast.m_backward);
|
| 75 |
+
m_forward->Expand(mgr, ff, hypo, phraseTableInd, scores,
|
| 76 |
+
*stateCast.m_forward);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.h
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* BidirectionalReorderingState.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include "LRState.h"
|
| 9 |
+
|
| 10 |
+
namespace Moses2
|
| 11 |
+
{
|
| 12 |
+
|
| 13 |
+
class BidirectionalReorderingState: public LRState
|
| 14 |
+
{
|
| 15 |
+
public:
|
| 16 |
+
BidirectionalReorderingState(const LRModel &config, LRState *bw, LRState *fw,
|
| 17 |
+
size_t offset);
|
| 18 |
+
|
| 19 |
+
virtual ~BidirectionalReorderingState();
|
| 20 |
+
|
| 21 |
+
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
|
| 22 |
+
const InputPathBase &path, bool first, const Bitmap *coverage);
|
| 23 |
+
|
| 24 |
+
size_t hash() const;
|
| 25 |
+
virtual bool operator==(const FFState& other) const;
|
| 26 |
+
|
| 27 |
+
virtual std::string ToString() const;
|
| 28 |
+
|
| 29 |
+
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
|
| 30 |
+
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
|
| 31 |
+
FFState &state) const;
|
| 32 |
+
|
| 33 |
+
protected:
|
| 34 |
+
LRState *m_backward;
|
| 35 |
+
LRState *m_forward;
|
| 36 |
+
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
} /* namespace Moses2 */
|
| 40 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* HReorderingBackwardState.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "HReorderingBackwardState.h"
|
| 9 |
+
#include "../../PhraseBased/Hypothesis.h"
|
| 10 |
+
#include "../../PhraseBased/Manager.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses2
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
HReorderingBackwardState::HReorderingBackwardState(MemPool &pool,
|
| 16 |
+
const LRModel &config, size_t offset) :
|
| 17 |
+
LRState(config, LRModel::Backward, offset), reoStack(pool)
|
| 18 |
+
{
|
| 19 |
+
// TODO Auto-generated constructor stub
|
| 20 |
+
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
HReorderingBackwardState::~HReorderingBackwardState()
|
| 24 |
+
{
|
| 25 |
+
// TODO Auto-generated destructor stub
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
void HReorderingBackwardState::Init(const LRState *prev,
|
| 29 |
+
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
|
| 30 |
+
const Bitmap *coverage)
|
| 31 |
+
{
|
| 32 |
+
prevTP = &topt;
|
| 33 |
+
reoStack.Init();
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
size_t HReorderingBackwardState::hash() const
|
| 37 |
+
{
|
| 38 |
+
size_t ret = reoStack.hash();
|
| 39 |
+
return ret;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
bool HReorderingBackwardState::operator==(const FFState& o) const
|
| 43 |
+
{
|
| 44 |
+
const HReorderingBackwardState& other =
|
| 45 |
+
static_cast<const HReorderingBackwardState&>(o);
|
| 46 |
+
bool ret = reoStack == other.reoStack;
|
| 47 |
+
return ret;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
std::string HReorderingBackwardState::ToString() const
|
| 51 |
+
{
|
| 52 |
+
return "HReorderingBackwardState " + SPrint(m_offset);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void HReorderingBackwardState::Expand(const ManagerBase &mgr,
|
| 56 |
+
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
|
| 57 |
+
Scores &scores, FFState &state) const
|
| 58 |
+
{
|
| 59 |
+
HReorderingBackwardState &nextState =
|
| 60 |
+
static_cast<HReorderingBackwardState&>(state);
|
| 61 |
+
nextState.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
|
| 62 |
+
NULL);
|
| 63 |
+
nextState.reoStack = reoStack;
|
| 64 |
+
|
| 65 |
+
const Range &swrange = hypo.GetInputPath().range;
|
| 66 |
+
int reoDistance = nextState.reoStack.ShiftReduce(swrange);
|
| 67 |
+
ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
|
| 68 |
+
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.h
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* HReorderingBackwardState.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include "LRState.h"
|
| 9 |
+
#include "ReorderingStack.h"
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
class HReorderingBackwardState: public LRState
|
| 15 |
+
{
|
| 16 |
+
private:
|
| 17 |
+
ReorderingStack reoStack;
|
| 18 |
+
|
| 19 |
+
public:
|
| 20 |
+
HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset);
|
| 21 |
+
|
| 22 |
+
virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
|
| 23 |
+
const InputPathBase &path, bool first, const Bitmap *coverage);
|
| 24 |
+
|
| 25 |
+
virtual ~HReorderingBackwardState();
|
| 26 |
+
|
| 27 |
+
size_t hash() const;
|
| 28 |
+
virtual bool operator==(const FFState& other) const;
|
| 29 |
+
virtual std::string ToString() const;
|
| 30 |
+
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
|
| 31 |
+
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
|
| 32 |
+
FFState &state) const;
|
| 33 |
+
|
| 34 |
+
};
|
| 35 |
+
|
| 36 |
+
} /* namespace Moses2 */
|
| 37 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.cpp
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* HReorderingForwardState.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "HReorderingForwardState.h"
|
| 9 |
+
#include "../../InputPathBase.h"
|
| 10 |
+
#include "../../PhraseBased/Manager.h"
|
| 11 |
+
#include "../../PhraseBased/Hypothesis.h"
|
| 12 |
+
|
| 13 |
+
namespace Moses2
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
HReorderingForwardState::HReorderingForwardState(const LRModel &config,
|
| 17 |
+
size_t offset) :
|
| 18 |
+
LRState(config, LRModel::Forward, offset), m_first(true)
|
| 19 |
+
{
|
| 20 |
+
prevPath = NULL;
|
| 21 |
+
m_coverage = NULL;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
HReorderingForwardState::~HReorderingForwardState()
|
| 25 |
+
{
|
| 26 |
+
// TODO Auto-generated destructor stub
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
void HReorderingForwardState::Init(const LRState *prev,
|
| 30 |
+
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
|
| 31 |
+
const Bitmap *coverage)
|
| 32 |
+
{
|
| 33 |
+
prevTP = &topt;
|
| 34 |
+
prevPath = &path;
|
| 35 |
+
m_first = first;
|
| 36 |
+
m_coverage = coverage;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
size_t HReorderingForwardState::hash() const
|
| 40 |
+
{
|
| 41 |
+
size_t ret;
|
| 42 |
+
ret = hash_value(prevPath->range);
|
| 43 |
+
return ret;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
bool HReorderingForwardState::operator==(const FFState& o) const
|
| 47 |
+
{
|
| 48 |
+
if (&o == this) return true;
|
| 49 |
+
|
| 50 |
+
HReorderingForwardState const& other =
|
| 51 |
+
static_cast<HReorderingForwardState const&>(o);
|
| 52 |
+
|
| 53 |
+
int compareScores = (
|
| 54 |
+
(prevPath->range == other.prevPath->range) ?
|
| 55 |
+
ComparePrevScores(other.prevTP) :
|
| 56 |
+
(prevPath->range < other.prevPath->range) ? -1 : 1);
|
| 57 |
+
return compareScores == 0;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
std::string HReorderingForwardState::ToString() const
|
| 61 |
+
{
|
| 62 |
+
return "HReorderingForwardState " + SPrint(m_offset);
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
void HReorderingForwardState::Expand(const ManagerBase &mgr,
|
| 66 |
+
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
|
| 67 |
+
Scores &scores, FFState &state) const
|
| 68 |
+
{
|
| 69 |
+
const Range &cur = hypo.GetInputPath().range;
|
| 70 |
+
// keep track of the current coverage ourselves so we don't need the hypothesis
|
| 71 |
+
Manager &mgrCast = const_cast<Manager&>(static_cast<const Manager&>(mgr));
|
| 72 |
+
Bitmaps &bms = mgrCast.GetBitmaps();
|
| 73 |
+
const Bitmap &cov = bms.GetBitmap(*m_coverage, cur);
|
| 74 |
+
|
| 75 |
+
if (!m_first) {
|
| 76 |
+
LRModel::ReorderingType reoType;
|
| 77 |
+
reoType = m_configuration.GetOrientation(prevPath->range, cur, cov);
|
| 78 |
+
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
HReorderingForwardState &stateCast =
|
| 82 |
+
static_cast<HReorderingForwardState&>(state);
|
| 83 |
+
stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
|
| 84 |
+
&cov);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.h
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* HReorderingForwardState.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include "LRState.h"
|
| 9 |
+
|
| 10 |
+
namespace Moses2
|
| 11 |
+
{
|
| 12 |
+
class Range;
|
| 13 |
+
class Bitmap;
|
| 14 |
+
class InputPathBase;
|
| 15 |
+
|
| 16 |
+
class HReorderingForwardState: public LRState
|
| 17 |
+
{
|
| 18 |
+
public:
|
| 19 |
+
HReorderingForwardState(const LRModel &config, size_t offset);
|
| 20 |
+
virtual ~HReorderingForwardState();
|
| 21 |
+
|
| 22 |
+
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
|
| 23 |
+
const InputPathBase &path, bool first, const Bitmap *coverage);
|
| 24 |
+
|
| 25 |
+
size_t hash() const;
|
| 26 |
+
virtual bool operator==(const FFState& other) const;
|
| 27 |
+
virtual std::string ToString() const;
|
| 28 |
+
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
|
| 29 |
+
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
|
| 30 |
+
FFState &state) const;
|
| 31 |
+
|
| 32 |
+
protected:
|
| 33 |
+
bool m_first;
|
| 34 |
+
//const Range &m_prevRange;
|
| 35 |
+
const InputPathBase *prevPath;
|
| 36 |
+
const Bitmap *m_coverage;
|
| 37 |
+
|
| 38 |
+
};
|
| 39 |
+
|
| 40 |
+
} /* namespace Moses2 */
|
| 41 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/LRModel.cpp
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* LRModel.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "LRModel.h"
|
| 9 |
+
#include "../../legacy/Util2.h"
|
| 10 |
+
#include "../../legacy/Range.h"
|
| 11 |
+
#include "../../legacy/Bitmap.h"
|
| 12 |
+
#include "../../MemPool.h"
|
| 13 |
+
#include "util/exception.hh"
|
| 14 |
+
#include "PhraseBasedReorderingState.h"
|
| 15 |
+
#include "BidirectionalReorderingState.h"
|
| 16 |
+
#include "HReorderingBackwardState.h"
|
| 17 |
+
#include "HReorderingForwardState.h"
|
| 18 |
+
|
| 19 |
+
using namespace std;
|
| 20 |
+
|
| 21 |
+
namespace Moses2
|
| 22 |
+
{
|
| 23 |
+
|
| 24 |
+
bool IsMonotonicStep(Range const& prev, // words range of last source phrase
|
| 25 |
+
Range const& cur, // words range of current source phrase
|
| 26 |
+
Bitmap const& cov) // coverage bitmap
|
| 27 |
+
{
|
| 28 |
+
size_t e = prev.GetEndPos() + 1;
|
| 29 |
+
size_t s = cur.GetStartPos();
|
| 30 |
+
return (s == e || (s >= e && !cov.GetValue(e)));
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
bool IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
|
| 34 |
+
{
|
| 35 |
+
size_t s = prev.GetStartPos();
|
| 36 |
+
size_t e = cur.GetEndPos();
|
| 37 |
+
return (e + 1 == s || (e < s && !cov.GetValue(s - 1)));
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
|
| 41 |
+
m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(
|
| 42 |
+
Backward), m_scoreProducer(&ff)
|
| 43 |
+
{
|
| 44 |
+
std::vector<std::string> config = Tokenize(modelType, "-");
|
| 45 |
+
|
| 46 |
+
for (size_t i = 0; i < config.size(); ++i) {
|
| 47 |
+
if (config[i] == "hier") {
|
| 48 |
+
m_phraseBased = false;
|
| 49 |
+
} else if (config[i] == "phrase") {
|
| 50 |
+
m_phraseBased = true;
|
| 51 |
+
} else if (config[i] == "wbe") {
|
| 52 |
+
m_phraseBased = true;
|
| 53 |
+
}
|
| 54 |
+
// no word-based decoding available, fall-back to phrase-based
|
| 55 |
+
// This is the old lexical reordering model combination of moses
|
| 56 |
+
|
| 57 |
+
else if (config[i] == "msd") {
|
| 58 |
+
m_modelType = MSD;
|
| 59 |
+
} else if (config[i] == "mslr") {
|
| 60 |
+
m_modelType = MSLR;
|
| 61 |
+
} else if (config[i] == "monotonicity") {
|
| 62 |
+
m_modelType = Monotonic;
|
| 63 |
+
} else if (config[i] == "leftright") {
|
| 64 |
+
m_modelType = LeftRight;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// unidirectional is deprecated, use backward instead
|
| 68 |
+
else if (config[i] == "unidirectional") {
|
| 69 |
+
m_direction = Backward;
|
| 70 |
+
} else if (config[i] == "backward") {
|
| 71 |
+
m_direction = Backward;
|
| 72 |
+
} else if (config[i] == "forward") {
|
| 73 |
+
m_direction = Forward;
|
| 74 |
+
} else if (config[i] == "bidirectional") {
|
| 75 |
+
m_direction = Bidirectional;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
else if (config[i] == "f") {
|
| 79 |
+
m_condition = F;
|
| 80 |
+
} else if (config[i] == "fe") {
|
| 81 |
+
m_condition = FE;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
else if (config[i] == "collapseff") {
|
| 85 |
+
m_collapseScores = true;
|
| 86 |
+
} else if (config[i] == "allff") {
|
| 87 |
+
m_collapseScores = false;
|
| 88 |
+
} else {
|
| 89 |
+
std::cerr
|
| 90 |
+
<< "Illegal part in the lexical reordering configuration string: "
|
| 91 |
+
<< config[i] << std::endl;
|
| 92 |
+
exit(1);
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
if (m_modelType == None) {
|
| 97 |
+
std::cerr << "You need to specify the type of the reordering model "
|
| 98 |
+
<< "(msd, monotonicity,...)" << std::endl;
|
| 99 |
+
exit(1);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
LRModel::~LRModel()
|
| 105 |
+
{
|
| 106 |
+
// TODO Auto-generated destructor stub
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
size_t LRModel::GetNumberOfTypes() const
|
| 110 |
+
{
|
| 111 |
+
return ((m_modelType == MSD) ? 3 : (m_modelType == MSLR) ? 4 : 2);
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
/// return orientation for the first phrase
|
| 115 |
+
LRModel::ReorderingType LRModel::GetOrientation(Range const& cur) const
|
| 116 |
+
{
|
| 117 |
+
UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
|
| 118 |
+
return ((m_modelType == LeftRight) ? R : (cur.GetStartPos() == 0) ? M :
|
| 119 |
+
(m_modelType == MSD) ? D : (m_modelType == MSLR) ? DR : NM);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
|
| 123 |
+
Range const& cur) const
|
| 124 |
+
{
|
| 125 |
+
UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
|
| 126 |
+
return (
|
| 127 |
+
(m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
|
| 128 |
+
: (cur.GetStartPos() == prev.GetEndPos() + 1) ? M :
|
| 129 |
+
(m_modelType == Monotonic) ? NM :
|
| 130 |
+
(prev.GetStartPos() == cur.GetEndPos() + 1) ? S :
|
| 131 |
+
(m_modelType == MSD) ? D :
|
| 132 |
+
(cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
LRModel::ReorderingType LRModel::GetOrientation(int const reoDistance) const
|
| 136 |
+
{
|
| 137 |
+
// this one is for HierarchicalReorderingBackwardState
|
| 138 |
+
return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L
|
| 139 |
+
: (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM :
|
| 140 |
+
(reoDistance == -1) ? S : (m_modelType == MSD) ? D :
|
| 141 |
+
(reoDistance > 1) ? DR : DL);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
LRState *LRModel::CreateLRState(MemPool &pool) const
|
| 145 |
+
{
|
| 146 |
+
LRState *bwd = NULL, *fwd = NULL;
|
| 147 |
+
size_t offset = 0;
|
| 148 |
+
|
| 149 |
+
switch (m_direction) {
|
| 150 |
+
case Backward:
|
| 151 |
+
case Bidirectional:
|
| 152 |
+
if (m_phraseBased) {
|
| 153 |
+
bwd =
|
| 154 |
+
new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
|
| 155 |
+
*this, Backward, offset);
|
| 156 |
+
//cerr << "bwd=" << bwd << bwd->ToString() << endl;
|
| 157 |
+
} else {
|
| 158 |
+
bwd =
|
| 159 |
+
new (pool.Allocate<HReorderingBackwardState>()) HReorderingBackwardState(
|
| 160 |
+
pool, *this, offset);
|
| 161 |
+
}
|
| 162 |
+
offset += m_collapseScores ? 1 : GetNumberOfTypes();
|
| 163 |
+
if (m_direction == Backward) return bwd; // else fall through
|
| 164 |
+
case Forward:
|
| 165 |
+
if (m_phraseBased) {
|
| 166 |
+
fwd =
|
| 167 |
+
new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
|
| 168 |
+
*this, Forward, offset);
|
| 169 |
+
//cerr << "fwd=" << fwd << fwd->ToString() << endl;
|
| 170 |
+
} else {
|
| 171 |
+
fwd =
|
| 172 |
+
new (pool.Allocate<HReorderingForwardState>()) HReorderingForwardState(
|
| 173 |
+
*this, offset);
|
| 174 |
+
}
|
| 175 |
+
offset += m_collapseScores ? 1 : GetNumberOfTypes();
|
| 176 |
+
if (m_direction == Forward) return fwd;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
//cerr << "LRStates:" << *bwd << endl << *fwd << endl;
|
| 180 |
+
BidirectionalReorderingState *ret =
|
| 181 |
+
new (pool.Allocate<BidirectionalReorderingState>()) BidirectionalReorderingState(
|
| 182 |
+
*this, bwd, fwd, 0);
|
| 183 |
+
return ret;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
|
| 187 |
+
Range const& cur, Bitmap const& cov) const
|
| 188 |
+
{
|
| 189 |
+
return (
|
| 190 |
+
(m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L
|
| 191 |
+
: IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM :
|
| 192 |
+
IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D :
|
| 193 |
+
cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/FF/LexicalReordering/LRModel.h
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* LRModel.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include <string>
|
| 9 |
+
|
| 10 |
+
namespace Moses2
|
| 11 |
+
{
|
| 12 |
+
|
| 13 |
+
class MemPool;
|
| 14 |
+
class Range;
|
| 15 |
+
class Bitmap;
|
| 16 |
+
class LRState;
|
| 17 |
+
class LexicalReordering;
|
| 18 |
+
|
| 19 |
+
class LRModel
|
| 20 |
+
{
|
| 21 |
+
public:
|
| 22 |
+
enum ModelType {
|
| 23 |
+
Monotonic, MSD, MSLR, LeftRight, None
|
| 24 |
+
};
|
| 25 |
+
enum Direction {
|
| 26 |
+
Forward, Backward, Bidirectional
|
| 27 |
+
};
|
| 28 |
+
enum Condition {
|
| 29 |
+
F, E, FE
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
enum ReorderingType {
|
| 33 |
+
M = 0, // monotonic
|
| 34 |
+
NM = 1, // non-monotonic
|
| 35 |
+
S = 1, // swap
|
| 36 |
+
D = 2, // discontinuous
|
| 37 |
+
DL = 2, // discontinuous, left
|
| 38 |
+
DR = 3, // discontinuous, right
|
| 39 |
+
R = 0, // right
|
| 40 |
+
L = 1, // left
|
| 41 |
+
MAX = 3, // largest possible
|
| 42 |
+
NONE = 4 // largest possible
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
LRModel(const std::string &modelType, LexicalReordering &ff);
|
| 46 |
+
virtual ~LRModel();
|
| 47 |
+
|
| 48 |
+
ModelType GetModelType() const {
|
| 49 |
+
return m_modelType;
|
| 50 |
+
}
|
| 51 |
+
Direction GetDirection() const {
|
| 52 |
+
return m_direction;
|
| 53 |
+
}
|
| 54 |
+
Condition GetCondition() const {
|
| 55 |
+
return m_condition;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
bool IsPhraseBased() const {
|
| 59 |
+
return m_phraseBased;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
bool CollapseScores() const {
|
| 63 |
+
return m_collapseScores;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
size_t GetNumberOfTypes() const;
|
| 67 |
+
|
| 68 |
+
LexicalReordering*
|
| 69 |
+
GetScoreProducer() const {
|
| 70 |
+
return m_scoreProducer;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
LRState *CreateLRState(MemPool &pool) const;
|
| 74 |
+
|
| 75 |
+
ReorderingType // for first phrase in phrase-based
|
| 76 |
+
GetOrientation(Range const& cur) const;
|
| 77 |
+
|
| 78 |
+
ReorderingType // for non-first phrases in phrase-based
|
| 79 |
+
GetOrientation(Range const& prev, Range const& cur) const;
|
| 80 |
+
|
| 81 |
+
ReorderingType // for HReorderingForwardState
|
| 82 |
+
GetOrientation(Range const& prev, Range const& cur, Bitmap const& cov) const;
|
| 83 |
+
|
| 84 |
+
ReorderingType // for HReorderingBackwarddState
|
| 85 |
+
GetOrientation(int const reoDistance) const;
|
| 86 |
+
|
| 87 |
+
protected:
|
| 88 |
+
|
| 89 |
+
ModelType m_modelType;
|
| 90 |
+
bool m_phraseBased;
|
| 91 |
+
bool m_collapseScores;
|
| 92 |
+
Direction m_direction;
|
| 93 |
+
Condition m_condition;
|
| 94 |
+
LexicalReordering *m_scoreProducer;
|
| 95 |
+
|
| 96 |
+
};
|
| 97 |
+
|
| 98 |
+
} /* namespace Moses2 */
|
| 99 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/LRState.cpp
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* LRState.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include "LRState.h"
|
| 8 |
+
#include "LexicalReordering.h"
|
| 9 |
+
#include "../../Scores.h"
|
| 10 |
+
#include "../../TargetPhrase.h"
|
| 11 |
+
|
| 12 |
+
using namespace std;
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
class InputType;
|
| 18 |
+
|
| 19 |
+
LRState::LRState(const LRModel &config, LRModel::Direction dir, size_t offset) :
|
| 20 |
+
m_configuration(config), m_direction(dir), m_offset(offset)
|
| 21 |
+
{
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
int LRState::ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const
|
| 25 |
+
{
|
| 26 |
+
LexicalReordering* producer = m_configuration.GetScoreProducer();
|
| 27 |
+
size_t phraseTableInd = producer->GetPhraseTableInd();
|
| 28 |
+
const SCORE *myScores = (const SCORE*) prevTP->ffData[phraseTableInd]; //producer->
|
| 29 |
+
const SCORE *yrScores = (const SCORE*) other->ffData[phraseTableInd]; //producer->
|
| 30 |
+
|
| 31 |
+
if (myScores == yrScores) return 0;
|
| 32 |
+
|
| 33 |
+
// The pointers are NULL if a phrase pair isn't found in the reordering table.
|
| 34 |
+
if (yrScores == NULL) return -1;
|
| 35 |
+
if (myScores == NULL) return 1;
|
| 36 |
+
|
| 37 |
+
size_t stop = m_offset + m_configuration.GetNumberOfTypes();
|
| 38 |
+
for (size_t i = m_offset; i < stop; i++) {
|
| 39 |
+
if ((myScores)[i] < (yrScores)[i]) return -1;
|
| 40 |
+
if ((myScores)[i] > (yrScores)[i]) return 1;
|
| 41 |
+
}
|
| 42 |
+
return 0;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
void LRState::CopyScores(const System &system, Scores &accum,
|
| 46 |
+
const TargetPhrase<Moses2::Word> &topt, ReorderingType reoType) const
|
| 47 |
+
{
|
| 48 |
+
// don't call this on a bidirectional object
|
| 49 |
+
UTIL_THROW_IF2(
|
| 50 |
+
m_direction != LRModel::Backward && m_direction != LRModel::Forward,
|
| 51 |
+
"Unknown direction: " << m_direction);
|
| 52 |
+
|
| 53 |
+
TargetPhrase<Moses2::Word> const* relevantOpt = (
|
| 54 |
+
(m_direction == LRModel::Backward) ? &topt : prevTP);
|
| 55 |
+
|
| 56 |
+
LexicalReordering* producer = m_configuration.GetScoreProducer();
|
| 57 |
+
size_t phraseTableInd = producer->GetPhraseTableInd();
|
| 58 |
+
const SCORE *cached = (const SCORE*) relevantOpt->ffData[phraseTableInd]; //producer->
|
| 59 |
+
|
| 60 |
+
if (cached == NULL) {
|
| 61 |
+
return;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
size_t off_remote = m_offset + reoType;
|
| 65 |
+
size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
|
| 66 |
+
|
| 67 |
+
UTIL_THROW_IF2(off_local >= producer->GetNumScores(),
|
| 68 |
+
"offset out of vector bounds!");
|
| 69 |
+
|
| 70 |
+
// look up applicable score from vector of scores
|
| 71 |
+
//UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
|
| 72 |
+
//Scores scores(producer->GetNumScoreComponents(),0);
|
| 73 |
+
SCORE score = cached[off_remote];
|
| 74 |
+
accum.PlusEquals(system, *producer, score, off_local);
|
| 75 |
+
|
| 76 |
+
// else: use default scores (if specified)
|
| 77 |
+
/*
|
| 78 |
+
else if (producer->GetHaveDefaultScores()) {
|
| 79 |
+
Scores scores(producer->GetNumScoreComponents(),0);
|
| 80 |
+
scores[off_local] = producer->GetDefaultScore(off_remote);
|
| 81 |
+
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
|
| 82 |
+
}
|
| 83 |
+
*/
|
| 84 |
+
// note: if no default score, no cost
|
| 85 |
+
/*
|
| 86 |
+
const SparseReordering* sparse = m_configuration.GetSparseReordering();
|
| 87 |
+
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
|
| 88 |
+
m_direction, accum);
|
| 89 |
+
*/
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
}
|
| 93 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/LRState.h
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include "../FFState.h"
|
| 3 |
+
#include "LRModel.h"
|
| 4 |
+
|
| 5 |
+
namespace Moses2
|
| 6 |
+
{
|
| 7 |
+
template<typename WORD>
|
| 8 |
+
class TargetPhrase;
|
| 9 |
+
|
| 10 |
+
class LexicalReordering;
|
| 11 |
+
class Hypothesis;
|
| 12 |
+
class System;
|
| 13 |
+
class Scores;
|
| 14 |
+
class Bitmap;
|
| 15 |
+
class ManagerBase;
|
| 16 |
+
class InputType;
|
| 17 |
+
class InputPathBase;
|
| 18 |
+
class Word;
|
| 19 |
+
|
| 20 |
+
class LRState: public FFState
|
| 21 |
+
{
|
| 22 |
+
public:
|
| 23 |
+
typedef LRModel::ReorderingType ReorderingType;
|
| 24 |
+
const TargetPhrase<Moses2::Word> *prevTP;
|
| 25 |
+
|
| 26 |
+
LRState(const LRModel &config, LRModel::Direction dir, size_t offset);
|
| 27 |
+
|
| 28 |
+
virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
|
| 29 |
+
const InputPathBase &path, bool first, const Bitmap *coverage) = 0;
|
| 30 |
+
|
| 31 |
+
virtual void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
|
| 32 |
+
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
|
| 33 |
+
FFState &state) const = 0;
|
| 34 |
+
|
| 35 |
+
void CopyScores(const System &system, Scores &accum, const TargetPhrase<Moses2::Word> &topt,
|
| 36 |
+
ReorderingType reoType) const;
|
| 37 |
+
|
| 38 |
+
protected:
|
| 39 |
+
const LRModel& m_configuration;
|
| 40 |
+
LRModel::Direction m_direction;
|
| 41 |
+
size_t m_offset;
|
| 42 |
+
|
| 43 |
+
int
|
| 44 |
+
ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const;
|
| 45 |
+
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
}
|
mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.cpp
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* LexicalReordering.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 15 Dec 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include <boost/foreach.hpp>
|
| 9 |
+
#include "util/exception.hh"
|
| 10 |
+
#include "LexicalReordering.h"
|
| 11 |
+
#include "LRModel.h"
|
| 12 |
+
#include "PhraseBasedReorderingState.h"
|
| 13 |
+
#include "BidirectionalReorderingState.h"
|
| 14 |
+
#include "../../TranslationModel/PhraseTable.h"
|
| 15 |
+
#include "../../System.h"
|
| 16 |
+
#include "../../PhraseBased/PhraseImpl.h"
|
| 17 |
+
#include "../../PhraseBased/Manager.h"
|
| 18 |
+
#include "../../PhraseBased/Hypothesis.h"
|
| 19 |
+
#include "../../PhraseBased/TargetPhrases.h"
|
| 20 |
+
#include "../../PhraseBased/TargetPhraseImpl.h"
|
| 21 |
+
#include "../../legacy/InputFileStream.h"
|
| 22 |
+
#include "../../legacy/Util2.h"
|
| 23 |
+
|
| 24 |
+
#ifdef HAVE_CMPH
|
| 25 |
+
#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
|
| 26 |
+
#endif
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
using namespace std;
|
| 30 |
+
|
| 31 |
+
namespace Moses2
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
///////////////////////////////////////////////////////////////////////
|
| 35 |
+
|
| 36 |
+
LexicalReordering::LexicalReordering(size_t startInd, const std::string &line)
|
| 37 |
+
: StatefulFeatureFunction(startInd, line)
|
| 38 |
+
, m_blank(NULL)
|
| 39 |
+
, m_propertyInd(-1)
|
| 40 |
+
, m_coll(NULL)
|
| 41 |
+
, m_configuration(NULL)
|
| 42 |
+
#ifdef HAVE_CMPH
|
| 43 |
+
, m_compactModel(NULL)
|
| 44 |
+
#endif
|
| 45 |
+
{
|
| 46 |
+
ReadParameters();
|
| 47 |
+
assert(m_configuration);
|
| 48 |
+
//assert(m_numScores == 6);
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
LexicalReordering::~LexicalReordering()
|
| 52 |
+
{
|
| 53 |
+
delete m_coll;
|
| 54 |
+
delete m_configuration;
|
| 55 |
+
#ifdef HAVE_CMPH
|
| 56 |
+
delete m_compactModel;
|
| 57 |
+
#endif
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
void LexicalReordering::Load(System &system)
|
| 61 |
+
{
|
| 62 |
+
MemPool &pool = system.GetSystemPool();
|
| 63 |
+
|
| 64 |
+
if (m_propertyInd >= 0) {
|
| 65 |
+
// Using integrate Lex RO. No loading needed
|
| 66 |
+
#ifdef HAVE_CMPH
|
| 67 |
+
} else if (FileExists(m_path + ".minlexr")) {
|
| 68 |
+
m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr",
|
| 69 |
+
m_FactorsF, m_FactorsE, m_FactorsC);
|
| 70 |
+
m_blank = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, 0);
|
| 71 |
+
#endif
|
| 72 |
+
} else {
|
| 73 |
+
m_coll = new Coll();
|
| 74 |
+
InputFileStream file(m_path);
|
| 75 |
+
string line;
|
| 76 |
+
size_t lineNum = 0;
|
| 77 |
+
|
| 78 |
+
while (getline(file, line)) {
|
| 79 |
+
if (++lineNum % 1000000 == 0) {
|
| 80 |
+
cerr << lineNum << " ";
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
|
| 84 |
+
assert(toks.size() == 3);
|
| 85 |
+
PhraseImpl *source = PhraseImpl::CreateFromString(pool, system.GetVocab(),
|
| 86 |
+
system, toks[0]);
|
| 87 |
+
PhraseImpl *target = PhraseImpl::CreateFromString(pool, system.GetVocab(),
|
| 88 |
+
system, toks[1]);
|
| 89 |
+
std::vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
|
| 90 |
+
std::transform(scores.begin(), scores.end(), scores.begin(),
|
| 91 |
+
TransformScore);
|
| 92 |
+
std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
|
| 93 |
+
|
| 94 |
+
Key key(source, target);
|
| 95 |
+
(*m_coll)[key] = scores;
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
void LexicalReordering::SetParameter(const std::string& key,
|
| 101 |
+
const std::string& value)
|
| 102 |
+
{
|
| 103 |
+
if (key == "path") {
|
| 104 |
+
m_path = value;
|
| 105 |
+
} else if (key == "type") {
|
| 106 |
+
m_configuration = new LRModel(value, *this);
|
| 107 |
+
} else if (key == "input-factor") {
|
| 108 |
+
m_FactorsF = Tokenize<FactorType>(value);
|
| 109 |
+
} else if (key == "output-factor") {
|
| 110 |
+
m_FactorsE = Tokenize<FactorType>(value);
|
| 111 |
+
} else if (key == "property-index") {
|
| 112 |
+
m_propertyInd = Scan<int>(value);
|
| 113 |
+
} else {
|
| 114 |
+
StatefulFeatureFunction::SetParameter(key, value);
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
FFState* LexicalReordering::BlankState(MemPool &pool, const System &sys) const
|
| 119 |
+
{
|
| 120 |
+
FFState *ret = m_configuration->CreateLRState(pool);
|
| 121 |
+
return ret;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
void LexicalReordering::EmptyHypothesisState(FFState &state,
|
| 125 |
+
const ManagerBase &mgr, const InputType &input,
|
| 126 |
+
const Hypothesis &hypo) const
|
| 127 |
+
{
|
| 128 |
+
BidirectionalReorderingState &stateCast =
|
| 129 |
+
static_cast<BidirectionalReorderingState&>(state);
|
| 130 |
+
stateCast.Init(NULL, hypo.GetTargetPhrase(), hypo.GetInputPath(), true,
|
| 131 |
+
&hypo.GetBitmap());
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system,
|
| 135 |
+
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 136 |
+
SCORE &estimatedScore) const
|
| 137 |
+
{
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 141 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 142 |
+
SCORE &estimatedScore) const
|
| 143 |
+
{
|
| 144 |
+
UTIL_THROW2("Don't use with SCFG models");
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
|
| 149 |
+
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
|
| 150 |
+
{
|
| 151 |
+
BOOST_FOREACH(const TargetPhraseImpl *tp, tps) {
|
| 152 |
+
EvaluateAfterTablePruning(pool, *tp, sourcePhrase);
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
|
| 157 |
+
const TargetPhraseImpl &targetPhrase, const Phrase<Moses2::Word> &sourcePhrase) const
|
| 158 |
+
{
|
| 159 |
+
if (m_propertyInd >= 0) {
|
| 160 |
+
SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd);
|
| 161 |
+
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
|
| 162 |
+
#ifdef HAVE_CMPH
|
| 163 |
+
} else if (m_compactModel) {
|
| 164 |
+
// using external compact binary model
|
| 165 |
+
const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase,
|
| 166 |
+
*m_blank);
|
| 167 |
+
if (values.size()) {
|
| 168 |
+
assert(values.size() == m_numScores);
|
| 169 |
+
|
| 170 |
+
SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
|
| 171 |
+
for (size_t i = 0; i < m_numScores; ++i) {
|
| 172 |
+
scoreArr[i] = values[i];
|
| 173 |
+
}
|
| 174 |
+
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
|
| 175 |
+
} else {
|
| 176 |
+
targetPhrase.ffData[m_PhraseTableInd] = NULL;
|
| 177 |
+
}
|
| 178 |
+
#endif
|
| 179 |
+
} else if (m_coll) {
|
| 180 |
+
// using external memory model
|
| 181 |
+
|
| 182 |
+
// cache data in target phrase
|
| 183 |
+
const Values *values = GetValues(sourcePhrase, targetPhrase);
|
| 184 |
+
assert(values->size() == m_numScores);
|
| 185 |
+
|
| 186 |
+
if (values) {
|
| 187 |
+
SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
|
| 188 |
+
for (size_t i = 0; i < m_numScores; ++i) {
|
| 189 |
+
scoreArr[i] = (*values)[i];
|
| 190 |
+
}
|
| 191 |
+
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
|
| 192 |
+
} else {
|
| 193 |
+
targetPhrase.ffData[m_PhraseTableInd] = NULL;
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
void LexicalReordering::EvaluateWhenApplied(const ManagerBase &mgr,
|
| 199 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 200 |
+
FFState &state) const
|
| 201 |
+
{
|
| 202 |
+
const LRState &prevStateCast = static_cast<const LRState&>(prevState);
|
| 203 |
+
prevStateCast.Expand(mgr, *this, hypo, m_PhraseTableInd, scores, state);
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
const LexicalReordering::Values *LexicalReordering::GetValues(
|
| 207 |
+
const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const
|
| 208 |
+
{
|
| 209 |
+
Key key(&source, &target);
|
| 210 |
+
Coll::const_iterator iter;
|
| 211 |
+
iter = m_coll->find(key);
|
| 212 |
+
if (iter == m_coll->end()) {
|
| 213 |
+
return NULL;
|
| 214 |
+
} else {
|
| 215 |
+
return &iter->second;
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
void LexicalReordering::EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 220 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 221 |
+
FFState &state) const
|
| 222 |
+
{
|
| 223 |
+
UTIL_THROW2("Not implemented");
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.h
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* LexicalReordering.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 15 Dec 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
#include <vector>
|
| 10 |
+
#include <boost/unordered_map.hpp>
|
| 11 |
+
#include "../StatefulFeatureFunction.h"
|
| 12 |
+
#include "../../TypeDef.h"
|
| 13 |
+
#include "../../Phrase.h"
|
| 14 |
+
#include "../../legacy/Range.h"
|
| 15 |
+
|
| 16 |
+
namespace Moses2
|
| 17 |
+
{
|
| 18 |
+
|
| 19 |
+
class LexicalReorderingTableCompact;
|
| 20 |
+
class LRModel;
|
| 21 |
+
class TargetPhraseImpl;
|
| 22 |
+
|
| 23 |
+
class LexicalReordering: public StatefulFeatureFunction
|
| 24 |
+
{
|
| 25 |
+
public:
|
| 26 |
+
LexicalReordering(size_t startInd, const std::string &line);
|
| 27 |
+
virtual ~LexicalReordering();
|
| 28 |
+
|
| 29 |
+
virtual void Load(System &system);
|
| 30 |
+
|
| 31 |
+
virtual void SetParameter(const std::string& key, const std::string& value);
|
| 32 |
+
|
| 33 |
+
virtual size_t HasPhraseTableInd() const {
|
| 34 |
+
return true;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
|
| 38 |
+
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
| 39 |
+
const InputType &input, const Hypothesis &hypo) const;
|
| 40 |
+
|
| 41 |
+
virtual void
|
| 42 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 43 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 44 |
+
SCORE &estimatedScore) const;
|
| 45 |
+
|
| 46 |
+
virtual void
|
| 47 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 48 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 49 |
+
SCORE &estimatedScore) const;
|
| 50 |
+
|
| 51 |
+
virtual void
|
| 52 |
+
EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
|
| 53 |
+
const Phrase<Moses2::Word> &sourcePhrase) const;
|
| 54 |
+
|
| 55 |
+
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
|
| 56 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 57 |
+
FFState &state) const;
|
| 58 |
+
|
| 59 |
+
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 60 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 61 |
+
FFState &state) const;
|
| 62 |
+
|
| 63 |
+
protected:
|
| 64 |
+
std::string m_path;
|
| 65 |
+
FactorList m_FactorsF;
|
| 66 |
+
FactorList m_FactorsE;
|
| 67 |
+
FactorList m_FactorsC;
|
| 68 |
+
|
| 69 |
+
LRModel *m_configuration;
|
| 70 |
+
|
| 71 |
+
virtual void
|
| 72 |
+
EvaluateAfterTablePruning(MemPool &pool, const TargetPhraseImpl &targetPhrase,
|
| 73 |
+
const Phrase<Moses2::Word> &sourcePhrase) const;
|
| 74 |
+
|
| 75 |
+
// PROPERTY IN PT
|
| 76 |
+
int m_propertyInd;
|
| 77 |
+
|
| 78 |
+
// COMPACT MODEL
|
| 79 |
+
#ifdef HAVE_CMPH
|
| 80 |
+
LexicalReorderingTableCompact *m_compactModel;
|
| 81 |
+
#endif
|
| 82 |
+
|
| 83 |
+
Phrase<Moses2::Word> *m_blank;
|
| 84 |
+
|
| 85 |
+
// MEMORY MODEL
|
| 86 |
+
typedef std::pair<const Phrase<Moses2::Word>*, const Phrase<Moses2::Word>* > Key;
|
| 87 |
+
typedef std::vector<SCORE> Values;
|
| 88 |
+
|
| 89 |
+
struct KeyComparer {
|
| 90 |
+
size_t operator()(const Key &obj) const {
|
| 91 |
+
size_t seed = obj.first->hash();
|
| 92 |
+
boost::hash_combine(seed, obj.second->hash());
|
| 93 |
+
return seed;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
bool operator()(const Key& a, const Key& b) const {
|
| 97 |
+
if ((*a.first) != (*b.first)) {
|
| 98 |
+
return false;
|
| 99 |
+
}
|
| 100 |
+
if ((*a.second) != (*b.second)) {
|
| 101 |
+
return false;
|
| 102 |
+
}
|
| 103 |
+
return true;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
+
typedef boost::unordered_map<Key, Values, KeyComparer, KeyComparer> Coll;
|
| 109 |
+
Coll *m_coll;
|
| 110 |
+
|
| 111 |
+
const Values *GetValues(const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const;
|
| 112 |
+
};
|
| 113 |
+
|
| 114 |
+
} /* namespace Moses2 */
|
| 115 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* PhraseLR.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "PhraseBasedReorderingState.h"
|
| 9 |
+
#include "LexicalReordering.h"
|
| 10 |
+
#include "../../PhraseBased/Hypothesis.h"
|
| 11 |
+
#include "../../InputPathBase.h"
|
| 12 |
+
#include "../../PhraseBased/Manager.h"
|
| 13 |
+
|
| 14 |
+
using namespace std;
|
| 15 |
+
|
| 16 |
+
namespace Moses2
|
| 17 |
+
{
|
| 18 |
+
|
| 19 |
+
PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config,
|
| 20 |
+
LRModel::Direction dir, size_t offset) :
|
| 21 |
+
LRState(config, dir, offset)
|
| 22 |
+
{
|
| 23 |
+
// uninitialised
|
| 24 |
+
prevPath = NULL;
|
| 25 |
+
prevTP = NULL;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
void PhraseBasedReorderingState::Init(const LRState *prev,
|
| 29 |
+
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
|
| 30 |
+
const Bitmap *coverage)
|
| 31 |
+
{
|
| 32 |
+
prevTP = &topt;
|
| 33 |
+
prevPath = &path;
|
| 34 |
+
m_first = first;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
size_t PhraseBasedReorderingState::hash() const
|
| 38 |
+
{
|
| 39 |
+
size_t ret;
|
| 40 |
+
ret = (size_t) &prevPath->range;
|
| 41 |
+
boost::hash_combine(ret, m_direction);
|
| 42 |
+
|
| 43 |
+
return ret;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
bool PhraseBasedReorderingState::operator==(const FFState& o) const
|
| 47 |
+
{
|
| 48 |
+
if (&o == this) return true;
|
| 49 |
+
|
| 50 |
+
const PhraseBasedReorderingState &other =
|
| 51 |
+
static_cast<const PhraseBasedReorderingState&>(o);
|
| 52 |
+
if (&prevPath->range == &other.prevPath->range) {
|
| 53 |
+
if (m_direction == LRModel::Forward) {
|
| 54 |
+
int compareScore = ComparePrevScores(other.prevTP);
|
| 55 |
+
return compareScore == 0;
|
| 56 |
+
} else {
|
| 57 |
+
return true;
|
| 58 |
+
}
|
| 59 |
+
} else {
|
| 60 |
+
return false;
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
void PhraseBasedReorderingState::Expand(const ManagerBase &mgr,
|
| 65 |
+
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
|
| 66 |
+
Scores &scores, FFState &state) const
|
| 67 |
+
{
|
| 68 |
+
if ((m_direction != LRModel::Forward) || !m_first) {
|
| 69 |
+
LRModel const& lrmodel = m_configuration;
|
| 70 |
+
Range const &cur = hypo.GetInputPath().range;
|
| 71 |
+
LRModel::ReorderingType reoType = (
|
| 72 |
+
m_first ?
|
| 73 |
+
lrmodel.GetOrientation(cur) :
|
| 74 |
+
lrmodel.GetOrientation(prevPath->range, cur));
|
| 75 |
+
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
PhraseBasedReorderingState &stateCast =
|
| 79 |
+
static_cast<PhraseBasedReorderingState&>(state);
|
| 80 |
+
stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
|
| 81 |
+
NULL);
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* PhraseLR.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 22 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
#include "LRState.h"
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
class InputPathBase;
|
| 15 |
+
|
| 16 |
+
class PhraseBasedReorderingState: public LRState
|
| 17 |
+
{
|
| 18 |
+
public:
|
| 19 |
+
const InputPathBase *prevPath;
|
| 20 |
+
bool m_first;
|
| 21 |
+
|
| 22 |
+
PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir,
|
| 23 |
+
size_t offset);
|
| 24 |
+
|
| 25 |
+
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
|
| 26 |
+
const InputPathBase &path, bool first, const Bitmap *coverage);
|
| 27 |
+
|
| 28 |
+
size_t hash() const;
|
| 29 |
+
virtual bool operator==(const FFState& other) const;
|
| 30 |
+
|
| 31 |
+
virtual std::string ToString() const {
|
| 32 |
+
return "PhraseBasedReorderingState";
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
|
| 36 |
+
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
|
| 37 |
+
FFState &state) const;
|
| 38 |
+
|
| 39 |
+
protected:
|
| 40 |
+
|
| 41 |
+
};
|
| 42 |
+
|
| 43 |
+
} /* namespace Moses2 */
|
| 44 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.cpp
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ReorderingStack.cpp
|
| 3 |
+
** Author: Ankit K. Srivastava
|
| 4 |
+
** Date: Jan 26, 2010
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
#include <vector>
|
| 8 |
+
#include "ReorderingStack.h"
|
| 9 |
+
#include "../../MemPool.h"
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
ReorderingStack::ReorderingStack(MemPool &pool) :
|
| 14 |
+
m_stack(pool)
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
void ReorderingStack::Init()
|
| 20 |
+
{
|
| 21 |
+
m_stack.clear();
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
size_t ReorderingStack::hash() const
|
| 25 |
+
{
|
| 26 |
+
std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end());
|
| 27 |
+
return ret;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
bool ReorderingStack::operator==(const ReorderingStack& o) const
|
| 31 |
+
{
|
| 32 |
+
const ReorderingStack& other = static_cast<const ReorderingStack&>(o);
|
| 33 |
+
return m_stack == other.m_stack;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
// Method to push (shift element into the stack and reduce if reqd)
|
| 37 |
+
int ReorderingStack::ShiftReduce(const Range &input_span)
|
| 38 |
+
{
|
| 39 |
+
int distance; // value to return: the initial distance between this and previous span
|
| 40 |
+
|
| 41 |
+
// stack is empty
|
| 42 |
+
if (m_stack.empty()) {
|
| 43 |
+
m_stack.push_back(input_span);
|
| 44 |
+
return input_span.GetStartPos() + 1; // - (-1)
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
// stack is non-empty
|
| 48 |
+
Range prev_span = m_stack.back(); //access last element added
|
| 49 |
+
|
| 50 |
+
//calculate the distance we are returning
|
| 51 |
+
if (input_span.GetStartPos() > prev_span.GetStartPos()) {
|
| 52 |
+
distance = input_span.GetStartPos() - prev_span.GetEndPos();
|
| 53 |
+
} else {
|
| 54 |
+
distance = input_span.GetEndPos() - prev_span.GetStartPos();
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
if (distance == 1) { //monotone
|
| 58 |
+
m_stack.pop_back();
|
| 59 |
+
Range new_span(prev_span.GetStartPos(), input_span.GetEndPos());
|
| 60 |
+
Reduce(new_span);
|
| 61 |
+
} else if (distance == -1) { //swap
|
| 62 |
+
m_stack.pop_back();
|
| 63 |
+
Range new_span(input_span.GetStartPos(), prev_span.GetEndPos());
|
| 64 |
+
Reduce(new_span);
|
| 65 |
+
} else { // discontinuous
|
| 66 |
+
m_stack.push_back(input_span);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
return distance;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
// Method to reduce, if possible the spans
|
| 73 |
+
void ReorderingStack::Reduce(Range current)
|
| 74 |
+
{
|
| 75 |
+
bool cont_loop = true;
|
| 76 |
+
|
| 77 |
+
while (cont_loop && m_stack.size() > 0) {
|
| 78 |
+
|
| 79 |
+
Range previous = m_stack.back();
|
| 80 |
+
|
| 81 |
+
if (current.GetStartPos() - previous.GetEndPos() == 1) { //mono&merge
|
| 82 |
+
m_stack.pop_back();
|
| 83 |
+
Range t(previous.GetStartPos(), current.GetEndPos());
|
| 84 |
+
current = t;
|
| 85 |
+
} else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
|
| 86 |
+
m_stack.pop_back();
|
| 87 |
+
Range t(current.GetStartPos(), previous.GetEndPos());
|
| 88 |
+
current = t;
|
| 89 |
+
} else { // discontinuous, no more merging
|
| 90 |
+
cont_loop = false;
|
| 91 |
+
}
|
| 92 |
+
} // finished reducing, exit
|
| 93 |
+
|
| 94 |
+
// add to stack
|
| 95 |
+
m_stack.push_back(current);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
}
|
| 99 |
+
|
mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.h
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ReorderingStack.h
|
| 3 |
+
** Author: Ankit K. Srivastava
|
| 4 |
+
** Date: Jan 26, 2010
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
//#include <string>
|
| 10 |
+
#include <vector>
|
| 11 |
+
//#include "Factor.h"
|
| 12 |
+
//#include "Phrase.h"
|
| 13 |
+
//#include "TypeDef.h"
|
| 14 |
+
//#include "Util.h"
|
| 15 |
+
#include "../../legacy/Range.h"
|
| 16 |
+
#include "../../Vector.h"
|
| 17 |
+
|
| 18 |
+
namespace Moses2
|
| 19 |
+
{
|
| 20 |
+
class MemPool;
|
| 21 |
+
|
| 22 |
+
class ReorderingStack
|
| 23 |
+
{
|
| 24 |
+
private:
|
| 25 |
+
|
| 26 |
+
Vector<Range> m_stack;
|
| 27 |
+
|
| 28 |
+
public:
|
| 29 |
+
ReorderingStack(MemPool &pool);
|
| 30 |
+
|
| 31 |
+
size_t hash() const;
|
| 32 |
+
bool operator==(const ReorderingStack& other) const;
|
| 33 |
+
|
| 34 |
+
void Init();
|
| 35 |
+
int ShiftReduce(const Range &input_span);
|
| 36 |
+
|
| 37 |
+
private:
|
| 38 |
+
void Reduce(Range input_span);
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
}
|
mosesdecoder/moses2/FF/OSM/KenOSM.cpp
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "KenOSM.h"
|
| 2 |
+
|
| 3 |
+
namespace Moses2
|
| 4 |
+
{
|
| 5 |
+
|
| 6 |
+
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
|
| 7 |
+
{
|
| 8 |
+
lm::ngram::ModelType model_type;
|
| 9 |
+
lm::ngram::Config config;
|
| 10 |
+
config.load_method = load_method;
|
| 11 |
+
if (lm::ngram::RecognizeBinary(file, model_type)) {
|
| 12 |
+
switch(model_type) {
|
| 13 |
+
case lm::ngram::PROBING:
|
| 14 |
+
return new KenOSM<lm::ngram::ProbingModel>(file, config);
|
| 15 |
+
case lm::ngram::REST_PROBING:
|
| 16 |
+
return new KenOSM<lm::ngram::RestProbingModel>(file, config);
|
| 17 |
+
case lm::ngram::TRIE:
|
| 18 |
+
return new KenOSM<lm::ngram::TrieModel>(file, config);
|
| 19 |
+
case lm::ngram::QUANT_TRIE:
|
| 20 |
+
return new KenOSM<lm::ngram::QuantTrieModel>(file, config);
|
| 21 |
+
case lm::ngram::ARRAY_TRIE:
|
| 22 |
+
return new KenOSM<lm::ngram::ArrayTrieModel>(file, config);
|
| 23 |
+
case lm::ngram::QUANT_ARRAY_TRIE:
|
| 24 |
+
return new KenOSM<lm::ngram::QuantArrayTrieModel>(file, config);
|
| 25 |
+
default:
|
| 26 |
+
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
|
| 27 |
+
}
|
| 28 |
+
} else {
|
| 29 |
+
return new KenOSM<lm::ngram::ProbingModel>(file, config);
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
} // namespace
|
mosesdecoder/moses2/FF/OSM/KenOSM.h
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <string>
|
| 4 |
+
#include "lm/model.hh"
|
| 5 |
+
|
| 6 |
+
namespace Moses2
|
| 7 |
+
{
|
| 8 |
+
|
| 9 |
+
class KenOSMBase
|
| 10 |
+
{
|
| 11 |
+
public:
|
| 12 |
+
virtual ~KenOSMBase() {}
|
| 13 |
+
|
| 14 |
+
virtual float Score(const lm::ngram::State&, StringPiece,
|
| 15 |
+
lm::ngram::State&) const = 0;
|
| 16 |
+
|
| 17 |
+
virtual const lm::ngram::State &BeginSentenceState() const = 0;
|
| 18 |
+
|
| 19 |
+
virtual const lm::ngram::State &NullContextState() const = 0;
|
| 20 |
+
};
|
| 21 |
+
|
| 22 |
+
template <class KenModel>
|
| 23 |
+
class KenOSM : public KenOSMBase
|
| 24 |
+
{
|
| 25 |
+
public:
|
| 26 |
+
KenOSM(const char *file, const lm::ngram::Config &config)
|
| 27 |
+
: m_kenlm(file, config) {}
|
| 28 |
+
|
| 29 |
+
float Score(const lm::ngram::State &in_state,
|
| 30 |
+
StringPiece word,
|
| 31 |
+
lm::ngram::State &out_state) const {
|
| 32 |
+
return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
|
| 33 |
+
out_state);
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
const lm::ngram::State &BeginSentenceState() const {
|
| 37 |
+
return m_kenlm.BeginSentenceState();
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
const lm::ngram::State &NullContextState() const {
|
| 41 |
+
return m_kenlm.NullContextState();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
private:
|
| 45 |
+
KenModel m_kenlm;
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
typedef KenOSMBase OSMLM;
|
| 49 |
+
|
| 50 |
+
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
} // namespace
|
mosesdecoder/moses2/FF/OSM/OpSequenceModel.cpp
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <sstream>
|
| 2 |
+
#include "OpSequenceModel.h"
|
| 3 |
+
#include "osmHyp.h"
|
| 4 |
+
#include "lm/state.hh"
|
| 5 |
+
#include "../../PhraseBased/Manager.h"
|
| 6 |
+
#include "../../PhraseBased/Hypothesis.h"
|
| 7 |
+
#include "../../PhraseBased/TargetPhraseImpl.h"
|
| 8 |
+
#include "../../PhraseBased/Sentence.h"
|
| 9 |
+
#include "../../TranslationModel/UnknownWordPenalty.h"
|
| 10 |
+
#include "../../System.h"
|
| 11 |
+
|
| 12 |
+
using namespace std;
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
////////////////////////////////////////////////////////////////////////////////////////
|
| 18 |
+
|
| 19 |
+
OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) :
|
| 20 |
+
StatefulFeatureFunction(startInd, line)
|
| 21 |
+
{
|
| 22 |
+
sFactor = 0;
|
| 23 |
+
tFactor = 0;
|
| 24 |
+
numFeatures = 5;
|
| 25 |
+
load_method = util::READ;
|
| 26 |
+
|
| 27 |
+
ReadParameters();
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
OpSequenceModel::~OpSequenceModel()
|
| 31 |
+
{
|
| 32 |
+
// TODO Auto-generated destructor stub
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
void OpSequenceModel::Load(System &system)
|
| 36 |
+
{
|
| 37 |
+
readLanguageModel(m_lmPath.c_str());
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
FFState* OpSequenceModel::BlankState(MemPool &pool, const System &sys) const
|
| 41 |
+
{
|
| 42 |
+
return new (pool.Allocate<osmState>()) osmState();
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
void OpSequenceModel::EmptyHypothesisState(FFState &state,
|
| 46 |
+
const ManagerBase &mgr, const InputType &input,
|
| 47 |
+
const Hypothesis &hypo) const
|
| 48 |
+
{
|
| 49 |
+
lm::ngram::State startState = OSM->BeginSentenceState();
|
| 50 |
+
|
| 51 |
+
osmState &stateCast = static_cast<osmState&>(state);
|
| 52 |
+
stateCast.setState(startState);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void OpSequenceModel::EvaluateInIsolation(MemPool &pool,
|
| 56 |
+
const System &system, const Phrase<Moses2::Word> &source,
|
| 57 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 58 |
+
SCORE &estimatedScore) const
|
| 59 |
+
{
|
| 60 |
+
osmHypothesis obj;
|
| 61 |
+
obj.setState(OSM->NullContextState());
|
| 62 |
+
|
| 63 |
+
Bitmap myBitmap (pool, source.GetSize());
|
| 64 |
+
myBitmap.Init(std::vector<bool>());
|
| 65 |
+
|
| 66 |
+
vector <string> mySourcePhrase;
|
| 67 |
+
vector <string> myTargetPhrase;
|
| 68 |
+
vector<float> scoresVec;
|
| 69 |
+
vector <int> alignments;
|
| 70 |
+
int startIndex = 0;
|
| 71 |
+
int endIndex = source.GetSize();
|
| 72 |
+
|
| 73 |
+
const AlignmentInfo &align = targetPhrase.GetAlignTerm();
|
| 74 |
+
AlignmentInfo::const_iterator iter;
|
| 75 |
+
|
| 76 |
+
for (iter = align.begin(); iter != align.end(); ++iter) {
|
| 77 |
+
alignments.push_back(iter->first);
|
| 78 |
+
alignments.push_back(iter->second);
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
|
| 82 |
+
if (&targetPhrase.pt == system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
|
| 83 |
+
myTargetPhrase.push_back("_TRANS_SLF_");
|
| 84 |
+
else
|
| 85 |
+
myTargetPhrase.push_back(targetPhrase[i][tFactor]->GetString().as_string());
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
for (size_t i = 0; i < source.GetSize(); i++) {
|
| 89 |
+
mySourcePhrase.push_back(source[i][sFactor]->GetString().as_string());
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
obj.setPhrases(mySourcePhrase , myTargetPhrase);
|
| 93 |
+
obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
|
| 94 |
+
obj.computeOSMFeature(startIndex,myBitmap);
|
| 95 |
+
obj.calculateOSMProb(*OSM);
|
| 96 |
+
obj.populateScores(scoresVec,numFeatures);
|
| 97 |
+
|
| 98 |
+
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
|
| 99 |
+
scoresVec.data());
|
| 100 |
+
estimatedScore += weightedScore;
|
| 101 |
+
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
void OpSequenceModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 105 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 106 |
+
SCORE &estimatedScore) const
|
| 107 |
+
{
|
| 108 |
+
UTIL_THROW2("Not implemented");
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
void OpSequenceModel::EvaluateWhenApplied(const ManagerBase &mgr,
|
| 112 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 113 |
+
FFState &state) const
|
| 114 |
+
{
|
| 115 |
+
const TargetPhrase<Moses2::Word> &target = hypo.GetTargetPhrase();
|
| 116 |
+
const Bitmap &bitmap = hypo.GetBitmap();
|
| 117 |
+
Bitmap myBitmap(bitmap);
|
| 118 |
+
const ManagerBase &manager = hypo.GetManager();
|
| 119 |
+
const InputType &source = manager.GetInput();
|
| 120 |
+
const Sentence &sourceSentence = static_cast<const Sentence&>(source);
|
| 121 |
+
|
| 122 |
+
osmHypothesis obj;
|
| 123 |
+
vector <string> mySourcePhrase;
|
| 124 |
+
vector <string> myTargetPhrase;
|
| 125 |
+
vector<float> scoresVec;
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
//target.GetWord(0)
|
| 129 |
+
|
| 130 |
+
//cerr << target <<" --- "<<target.GetSourcePhrase()<< endl; // English ...
|
| 131 |
+
|
| 132 |
+
//cerr << align << endl; // Alignments ...
|
| 133 |
+
//cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
|
| 134 |
+
|
| 135 |
+
//cerr << source <<endl;
|
| 136 |
+
|
| 137 |
+
// int a = sourceRange.GetStartPos();
|
| 138 |
+
// cerr << source.GetWord(a);
|
| 139 |
+
//cerr <<a<<endl;
|
| 140 |
+
|
| 141 |
+
//const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
const Range & sourceRange = hypo.GetInputPath().range;
|
| 145 |
+
int startIndex = sourceRange.GetStartPos();
|
| 146 |
+
int endIndex = sourceRange.GetEndPos();
|
| 147 |
+
const AlignmentInfo &align = hypo.GetTargetPhrase().GetAlignTerm();
|
| 148 |
+
// osmState * statePtr;
|
| 149 |
+
|
| 150 |
+
vector <int> alignments;
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
AlignmentInfo::const_iterator iter;
|
| 155 |
+
|
| 156 |
+
for (iter = align.begin(); iter != align.end(); ++iter) {
|
| 157 |
+
//cerr << iter->first << "----" << iter->second << " ";
|
| 158 |
+
alignments.push_back(iter->first);
|
| 159 |
+
alignments.push_back(iter->second);
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
//cerr<<bitmap<<endl;
|
| 164 |
+
//cerr<<startIndex<<" "<<endIndex<<endl;
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
for (int i = startIndex; i <= endIndex; i++) {
|
| 168 |
+
myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
|
| 169 |
+
mySourcePhrase.push_back(sourceSentence[i][sFactor]->GetString().as_string());
|
| 170 |
+
// cerr<<mySourcePhrase[i]<<endl;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
for (size_t i = 0; i < target.GetSize(); i++) {
|
| 174 |
+
if (&target.pt == mgr.system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
|
| 175 |
+
myTargetPhrase.push_back("_TRANS_SLF_");
|
| 176 |
+
else
|
| 177 |
+
myTargetPhrase.push_back(target[i][tFactor]->GetString().as_string());
|
| 178 |
+
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
//cerr<<myBitmap<<endl;
|
| 183 |
+
|
| 184 |
+
obj.setState(&prevState);
|
| 185 |
+
obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
|
| 186 |
+
obj.setPhrases(mySourcePhrase , myTargetPhrase);
|
| 187 |
+
obj.computeOSMFeature(startIndex,myBitmap);
|
| 188 |
+
obj.calculateOSMProb(*OSM);
|
| 189 |
+
obj.populateScores(scoresVec,numFeatures);
|
| 190 |
+
//obj.print();
|
| 191 |
+
|
| 192 |
+
scores.PlusEquals(mgr.system, *this, scoresVec);
|
| 193 |
+
|
| 194 |
+
osmState &stateCast = static_cast<osmState&>(state);
|
| 195 |
+
obj.saveState(stateCast);
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
void OpSequenceModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 199 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 200 |
+
FFState &state) const
|
| 201 |
+
{
|
| 202 |
+
UTIL_THROW2("Not implemented");
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
|
| 206 |
+
{
|
| 207 |
+
|
| 208 |
+
if (key == "path") {
|
| 209 |
+
m_lmPath = value;
|
| 210 |
+
} else if (key == "support-features") {
|
| 211 |
+
if(value == "no")
|
| 212 |
+
numFeatures = 1;
|
| 213 |
+
else
|
| 214 |
+
numFeatures = 5;
|
| 215 |
+
} else if (key == "input-factor") {
|
| 216 |
+
sFactor = Scan<int>(value);
|
| 217 |
+
} else if (key == "output-factor") {
|
| 218 |
+
tFactor = Scan<int>(value);
|
| 219 |
+
} else if (key == "load") {
|
| 220 |
+
if (value == "lazy") {
|
| 221 |
+
load_method = util::LAZY;
|
| 222 |
+
} else if (value == "populate_or_lazy") {
|
| 223 |
+
load_method = util::POPULATE_OR_LAZY;
|
| 224 |
+
} else if (value == "populate_or_read" || value == "populate") {
|
| 225 |
+
load_method = util::POPULATE_OR_READ;
|
| 226 |
+
} else if (value == "read") {
|
| 227 |
+
load_method = util::READ;
|
| 228 |
+
} else if (value == "parallel_read") {
|
| 229 |
+
load_method = util::PARALLEL_READ;
|
| 230 |
+
} else {
|
| 231 |
+
UTIL_THROW2("Unknown KenLM load method " << value);
|
| 232 |
+
}
|
| 233 |
+
} else {
|
| 234 |
+
StatefulFeatureFunction::SetParameter(key, value);
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
void OpSequenceModel :: readLanguageModel(const char *lmFile)
|
| 239 |
+
{
|
| 240 |
+
string unkOp = "_TRANS_SLF_";
|
| 241 |
+
OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
|
| 242 |
+
|
| 243 |
+
lm::ngram::State startState = OSM->NullContextState();
|
| 244 |
+
lm::ngram::State endState;
|
| 245 |
+
unkOpProb = OSM->Score(startState,unkOp,endState);
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
}
|
mosesdecoder/moses2/FF/OSM/OpSequenceModel.h
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "../StatefulFeatureFunction.h"
|
| 2 |
+
#include "util/mmap.hh"
|
| 3 |
+
#include "KenOSM.h"
|
| 4 |
+
|
| 5 |
+
namespace Moses2
|
| 6 |
+
{
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class OpSequenceModel : public StatefulFeatureFunction
|
| 10 |
+
{
|
| 11 |
+
public:
|
| 12 |
+
OSMLM* OSM;
|
| 13 |
+
float unkOpProb;
|
| 14 |
+
int numFeatures; // Number of features used ...
|
| 15 |
+
int sFactor; // Source Factor ...
|
| 16 |
+
int tFactor; // Target Factor ...
|
| 17 |
+
util::LoadMethod load_method; // method to load model
|
| 18 |
+
|
| 19 |
+
OpSequenceModel(size_t startInd, const std::string &line);
|
| 20 |
+
virtual ~OpSequenceModel();
|
| 21 |
+
|
| 22 |
+
virtual void Load(System &system);
|
| 23 |
+
|
| 24 |
+
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
|
| 25 |
+
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
|
| 26 |
+
const InputType &input, const Hypothesis &hypo) const;
|
| 27 |
+
|
| 28 |
+
virtual void
|
| 29 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 30 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 31 |
+
SCORE &estimatedScore) const;
|
| 32 |
+
|
| 33 |
+
virtual void
|
| 34 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 35 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 36 |
+
SCORE &estimatedScore) const;
|
| 37 |
+
|
| 38 |
+
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
|
| 39 |
+
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
|
| 40 |
+
FFState &state) const;
|
| 41 |
+
|
| 42 |
+
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
|
| 43 |
+
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
|
| 44 |
+
FFState &state) const;
|
| 45 |
+
|
| 46 |
+
void SetParameter(const std::string& key, const std::string& value);
|
| 47 |
+
|
| 48 |
+
protected:
|
| 49 |
+
std::string m_lmPath;
|
| 50 |
+
|
| 51 |
+
void readLanguageModel(const char *);
|
| 52 |
+
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
mosesdecoder/moses2/FF/OSM/osmHyp.cpp
ADDED
|
@@ -0,0 +1,601 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "osmHyp.h"
|
| 2 |
+
#include <sstream>
|
| 3 |
+
|
| 4 |
+
using namespace std;
|
| 5 |
+
using namespace lm::ngram;
|
| 6 |
+
|
| 7 |
+
namespace Moses2
|
| 8 |
+
{
|
| 9 |
+
void osmState::setState(const lm::ngram::State & val)
|
| 10 |
+
{
|
| 11 |
+
j = 0;
|
| 12 |
+
E = 0;
|
| 13 |
+
lmState = val;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
|
| 17 |
+
{
|
| 18 |
+
gap.clear();
|
| 19 |
+
gap = gapVal;
|
| 20 |
+
j = jVal;
|
| 21 |
+
E = eVal;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
size_t osmState::hash() const
|
| 25 |
+
{
|
| 26 |
+
size_t ret = j;
|
| 27 |
+
|
| 28 |
+
boost::hash_combine(ret, E);
|
| 29 |
+
boost::hash_combine(ret, gap);
|
| 30 |
+
boost::hash_combine(ret, lmState.length);
|
| 31 |
+
|
| 32 |
+
return ret;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
bool osmState::operator==(const FFState& otherBase) const
|
| 36 |
+
{
|
| 37 |
+
const osmState &other = static_cast<const osmState&>(otherBase);
|
| 38 |
+
if (j != other.j)
|
| 39 |
+
return false;
|
| 40 |
+
if (E != other.E)
|
| 41 |
+
return false;
|
| 42 |
+
if (gap != other.gap)
|
| 43 |
+
return false;
|
| 44 |
+
if (lmState.length != other.lmState.length)
|
| 45 |
+
return false;
|
| 46 |
+
|
| 47 |
+
return true;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
std::string osmState :: getName() const
|
| 51 |
+
{
|
| 52 |
+
|
| 53 |
+
return "done";
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
//////////////////////////////////////////////////
|
| 57 |
+
|
| 58 |
+
osmHypothesis :: osmHypothesis()
|
| 59 |
+
{
|
| 60 |
+
opProb = 0;
|
| 61 |
+
gapWidth = 0;
|
| 62 |
+
gapCount = 0;
|
| 63 |
+
openGapCount = 0;
|
| 64 |
+
deletionCount = 0;
|
| 65 |
+
gapCount = 0;
|
| 66 |
+
j = 0;
|
| 67 |
+
E = 0;
|
| 68 |
+
gap.clear();
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
void osmHypothesis :: setState(const FFState* prev_state)
|
| 72 |
+
{
|
| 73 |
+
|
| 74 |
+
if(prev_state != NULL) {
|
| 75 |
+
|
| 76 |
+
j = static_cast <const osmState *> (prev_state)->getJ();
|
| 77 |
+
E = static_cast <const osmState *> (prev_state)->getE();
|
| 78 |
+
gap = static_cast <const osmState *> (prev_state)->getGap();
|
| 79 |
+
lmState = static_cast <const osmState *> (prev_state)->getLMState();
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
void osmHypothesis :: saveState(osmState &state)
|
| 84 |
+
{
|
| 85 |
+
state.setState(lmState);
|
| 86 |
+
state.saveState(j,E,gap);
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
int osmHypothesis :: isTranslationOperation(int x)
|
| 90 |
+
{
|
| 91 |
+
if (operations[x].find("_JMP_BCK_") != -1)
|
| 92 |
+
return 0;
|
| 93 |
+
|
| 94 |
+
if (operations[x].find("_JMP_FWD_") != -1)
|
| 95 |
+
return 0;
|
| 96 |
+
|
| 97 |
+
if (operations[x].find("_CONT_CEPT_") != -1)
|
| 98 |
+
return 0;
|
| 99 |
+
|
| 100 |
+
if (operations[x].find("_INS_GAP_") != -1)
|
| 101 |
+
return 0;
|
| 102 |
+
|
| 103 |
+
return 1;
|
| 104 |
+
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
void osmHypothesis :: removeReorderingOperations()
|
| 108 |
+
{
|
| 109 |
+
gapCount = 0;
|
| 110 |
+
deletionCount = 0;
|
| 111 |
+
openGapCount = 0;
|
| 112 |
+
gapWidth = 0;
|
| 113 |
+
|
| 114 |
+
std::vector <std::string> tupleSequence;
|
| 115 |
+
|
| 116 |
+
for (int x = 0; x < operations.size(); x++) {
|
| 117 |
+
// cout<<operations[x]<<endl;
|
| 118 |
+
|
| 119 |
+
if(isTranslationOperation(x) == 1) {
|
| 120 |
+
tupleSequence.push_back(operations[x]);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
operations.clear();
|
| 126 |
+
operations = tupleSequence;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
|
| 130 |
+
{
|
| 131 |
+
|
| 132 |
+
opProb = 0;
|
| 133 |
+
State currState = lmState;
|
| 134 |
+
State temp;
|
| 135 |
+
|
| 136 |
+
for (size_t i = 0; i<operations.size(); i++) {
|
| 137 |
+
temp = currState;
|
| 138 |
+
opProb += ptrOp.Score(temp,operations[i],currState);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
lmState = currState;
|
| 142 |
+
|
| 143 |
+
//print();
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
|
| 148 |
+
{
|
| 149 |
+
|
| 150 |
+
int firstOG =-1;
|
| 151 |
+
|
| 152 |
+
for(int nd = 0; nd < coverageVector.size(); nd++) {
|
| 153 |
+
if(coverageVector[nd]==0) {
|
| 154 |
+
firstOG = nd;
|
| 155 |
+
return firstOG;
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
return firstOG;
|
| 160 |
+
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
string osmHypothesis :: intToString(int num)
|
| 164 |
+
{
|
| 165 |
+
return SPrint(num);
|
| 166 |
+
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
|
| 170 |
+
{
|
| 171 |
+
|
| 172 |
+
int gFlag = 0;
|
| 173 |
+
int gp = 0;
|
| 174 |
+
int ans;
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
if ( j < j1) { // j1 is the index of the source word we are about to generate ...
|
| 178 |
+
//if(coverageVector[j]==0) // if source word at j is not generated yet ...
|
| 179 |
+
if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
|
| 180 |
+
operations.push_back("_INS_GAP_");
|
| 181 |
+
gFlag++;
|
| 182 |
+
gap[j]="Unfilled";
|
| 183 |
+
}
|
| 184 |
+
if (j == E) {
|
| 185 |
+
j = j1;
|
| 186 |
+
} else {
|
| 187 |
+
operations.push_back("_JMP_FWD_");
|
| 188 |
+
j=E;
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
if (j1 < j) {
|
| 193 |
+
// if(j < E && coverageVector[j]==0)
|
| 194 |
+
if(j < E && coverageVector.GetValue(j)==0) {
|
| 195 |
+
operations.push_back("_INS_GAP_");
|
| 196 |
+
gFlag++;
|
| 197 |
+
gap[j]="Unfilled";
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
j=closestGap(gap,j1,gp);
|
| 201 |
+
operations.push_back("_JMP_BCK_"+ intToString(gp));
|
| 202 |
+
|
| 203 |
+
//cout<<"I am j "<<j<<endl;
|
| 204 |
+
//cout<<"I am j1 "<<j1<<endl;
|
| 205 |
+
|
| 206 |
+
if(j==j1)
|
| 207 |
+
gap[j]="Filled";
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
if (j < j1) {
|
| 211 |
+
operations.push_back("_INS_GAP_");
|
| 212 |
+
gap[j] = "Unfilled";
|
| 213 |
+
gFlag++;
|
| 214 |
+
j=j1;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
if(contFlag == 0) { // First words of the multi-word cept ...
|
| 218 |
+
|
| 219 |
+
if(english == "_TRANS_SLF_") { // Unknown word ...
|
| 220 |
+
operations.push_back("_TRANS_SLF_");
|
| 221 |
+
} else {
|
| 222 |
+
operations.push_back("_TRANS_" + english + "_TO_" + german);
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
//ans = firstOpenGap(coverageVector);
|
| 226 |
+
ans = coverageVector.GetFirstGapPos();
|
| 227 |
+
|
| 228 |
+
if (ans != -1)
|
| 229 |
+
gapWidth += j - ans;
|
| 230 |
+
|
| 231 |
+
} else if (contFlag == 2) {
|
| 232 |
+
|
| 233 |
+
operations.push_back("_INS_" + german);
|
| 234 |
+
ans = coverageVector.GetFirstGapPos();
|
| 235 |
+
|
| 236 |
+
if (ans != -1)
|
| 237 |
+
gapWidth += j - ans;
|
| 238 |
+
deletionCount++;
|
| 239 |
+
} else {
|
| 240 |
+
operations.push_back("_CONT_CEPT_");
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
//coverageVector[j]=1;
|
| 244 |
+
coverageVector.SetValue(j,1);
|
| 245 |
+
j+=1;
|
| 246 |
+
|
| 247 |
+
if(E<j)
|
| 248 |
+
E=j;
|
| 249 |
+
|
| 250 |
+
if (gFlag > 0)
|
| 251 |
+
gapCount++;
|
| 252 |
+
|
| 253 |
+
openGapCount += getOpenGaps();
|
| 254 |
+
|
| 255 |
+
//if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
|
| 256 |
+
if (j < coverageVector.GetSize()) {
|
| 257 |
+
if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
|
| 258 |
+
j1 = j;
|
| 259 |
+
german = currF[j1-startIndex];
|
| 260 |
+
english = "_INS_";
|
| 261 |
+
generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
void osmHypothesis :: print()
|
| 268 |
+
{
|
| 269 |
+
for (int i = 0; i< operations.size(); i++) {
|
| 270 |
+
cerr<<operations[i]<<" ";
|
| 271 |
+
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
cerr<<endl<<endl;
|
| 275 |
+
|
| 276 |
+
cerr<<"Operation Probability "<<opProb<<endl;
|
| 277 |
+
cerr<<"Gap Count "<<gapCount<<endl;
|
| 278 |
+
cerr<<"Open Gap Count "<<openGapCount<<endl;
|
| 279 |
+
cerr<<"Gap Width "<<gapWidth<<endl;
|
| 280 |
+
cerr<<"Deletion Count "<<deletionCount<<endl;
|
| 281 |
+
|
| 282 |
+
cerr<<"_______________"<<endl;
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
|
| 286 |
+
{
|
| 287 |
+
|
| 288 |
+
int dist=1172;
|
| 289 |
+
int value=-1;
|
| 290 |
+
int temp=0;
|
| 291 |
+
gp=0;
|
| 292 |
+
int opGap=0;
|
| 293 |
+
|
| 294 |
+
map <int,string> :: iterator iter;
|
| 295 |
+
|
| 296 |
+
iter=gap.end();
|
| 297 |
+
|
| 298 |
+
do {
|
| 299 |
+
iter--;
|
| 300 |
+
//cout<<"Trapped "<<iter->first<<endl;
|
| 301 |
+
|
| 302 |
+
if(iter->first==j1 && iter->second== "Unfilled") {
|
| 303 |
+
opGap++;
|
| 304 |
+
gp = opGap;
|
| 305 |
+
return j1;
|
| 306 |
+
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
if(iter->second =="Unfilled") {
|
| 310 |
+
opGap++;
|
| 311 |
+
temp = iter->first - j1;
|
| 312 |
+
|
| 313 |
+
if(temp<0)
|
| 314 |
+
temp=temp * -1;
|
| 315 |
+
|
| 316 |
+
if(dist>temp && iter->first < j1) {
|
| 317 |
+
dist=temp;
|
| 318 |
+
value=iter->first;
|
| 319 |
+
gp=opGap;
|
| 320 |
+
}
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
} while(iter!=gap.begin());
|
| 325 |
+
|
| 326 |
+
return value;
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
int osmHypothesis :: getOpenGaps()
|
| 332 |
+
{
|
| 333 |
+
map <int,string> :: iterator iter;
|
| 334 |
+
|
| 335 |
+
int nd = 0;
|
| 336 |
+
for (iter = gap.begin(); iter!=gap.end(); iter++) {
|
| 337 |
+
if(iter->second == "Unfilled")
|
| 338 |
+
nd++;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
return nd;
|
| 342 |
+
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
|
| 346 |
+
{
|
| 347 |
+
|
| 348 |
+
operations.push_back("_DEL_" + english);
|
| 349 |
+
currTargetIndex++;
|
| 350 |
+
|
| 351 |
+
while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
|
| 352 |
+
currTargetIndex++;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
|
| 356 |
+
english = currE[currTargetIndex];
|
| 357 |
+
generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector)
|
| 363 |
+
{
|
| 364 |
+
|
| 365 |
+
set <int> doneTargetIndexes;
|
| 366 |
+
set <int> eSide;
|
| 367 |
+
set <int> fSide;
|
| 368 |
+
set <int> :: iterator iter;
|
| 369 |
+
string english;
|
| 370 |
+
string source;
|
| 371 |
+
int j1;
|
| 372 |
+
int targetIndex = 0;
|
| 373 |
+
doneTargetIndexes.clear();
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ...
|
| 377 |
+
iter = targetNullWords.begin();
|
| 378 |
+
|
| 379 |
+
if (*iter == startIndex) {
|
| 380 |
+
|
| 381 |
+
j1 = startIndex;
|
| 382 |
+
source = currF[j1-startIndex];
|
| 383 |
+
english = "_INS_";
|
| 384 |
+
generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ...
|
| 389 |
+
english = currE[targetIndex];
|
| 390 |
+
generateDeleteOperations(english,targetIndex, doneTargetIndexes);
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
|
| 395 |
+
source = "";
|
| 396 |
+
english = "";
|
| 397 |
+
|
| 398 |
+
fSide = ceptsInPhrase[i].first;
|
| 399 |
+
eSide = ceptsInPhrase[i].second;
|
| 400 |
+
|
| 401 |
+
iter = eSide.begin();
|
| 402 |
+
targetIndex = *iter;
|
| 403 |
+
english += currE[*iter];
|
| 404 |
+
iter++;
|
| 405 |
+
|
| 406 |
+
for (; iter != eSide.end(); iter++) {
|
| 407 |
+
if(*iter == targetIndex+1)
|
| 408 |
+
targetIndex++;
|
| 409 |
+
else
|
| 410 |
+
doneTargetIndexes.insert(*iter);
|
| 411 |
+
|
| 412 |
+
english += "^_^";
|
| 413 |
+
english += currE[*iter];
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
iter = fSide.begin();
|
| 417 |
+
source += currF[*iter];
|
| 418 |
+
iter++;
|
| 419 |
+
|
| 420 |
+
for (; iter != fSide.end(); iter++) {
|
| 421 |
+
source += "^_^";
|
| 422 |
+
source += currF[*iter];
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
iter = fSide.begin();
|
| 426 |
+
j1 = *iter + startIndex;
|
| 427 |
+
iter++;
|
| 428 |
+
|
| 429 |
+
generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
for (; iter != fSide.end(); iter++) {
|
| 433 |
+
j1 = *iter + startIndex;
|
| 434 |
+
generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
targetIndex++; // Check whether the next target word is unaligned ...
|
| 438 |
+
|
| 439 |
+
while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
|
| 440 |
+
targetIndex++;
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
|
| 444 |
+
english = currE[targetIndex];
|
| 445 |
+
generateDeleteOperations(english,targetIndex, doneTargetIndexes);
|
| 446 |
+
}
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
//removeReorderingOperations();
|
| 450 |
+
|
| 451 |
+
//print();
|
| 452 |
+
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
|
| 456 |
+
{
|
| 457 |
+
set <int> :: iterator iter;
|
| 458 |
+
|
| 459 |
+
int sz = eSide.size();
|
| 460 |
+
vector <int> t;
|
| 461 |
+
|
| 462 |
+
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
|
| 463 |
+
t = tS[*iter];
|
| 464 |
+
|
| 465 |
+
for (size_t i = 0; i < t.size(); i++) {
|
| 466 |
+
fSide.insert(t[i]);
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
for (iter = fSide.begin(); iter != fSide.end(); iter++) {
|
| 472 |
+
|
| 473 |
+
t = sT[*iter];
|
| 474 |
+
|
| 475 |
+
for (size_t i = 0 ; i<t.size(); i++) {
|
| 476 |
+
eSide.insert(t[i]);
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
if (eSide.size () > sz) {
|
| 482 |
+
getMeCepts(eSide,fSide,tS,sT);
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
|
| 488 |
+
{
|
| 489 |
+
|
| 490 |
+
std::map <int , vector <int> > sT;
|
| 491 |
+
std::map <int , vector <int> > tS;
|
| 492 |
+
std::set <int> eSide;
|
| 493 |
+
std::set <int> fSide;
|
| 494 |
+
std::set <int> :: iterator iter;
|
| 495 |
+
std :: map <int , vector <int> > :: iterator iter2;
|
| 496 |
+
std :: pair < set <int> , set <int> > cept;
|
| 497 |
+
int src;
|
| 498 |
+
int tgt;
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
for (size_t i = 0; i < align.size(); i+=2) {
|
| 502 |
+
src = align[i];
|
| 503 |
+
tgt = align[i+1];
|
| 504 |
+
tS[tgt].push_back(src);
|
| 505 |
+
sT[src].push_back(tgt);
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ...
|
| 509 |
+
if (sT.find(i-startIndex) == sT.end()) {
|
| 510 |
+
targetNullWords.insert(i);
|
| 511 |
+
}
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ...
|
| 515 |
+
if (tS.find(i) == tS.end()) {
|
| 516 |
+
sourceNullWords.insert(i);
|
| 517 |
+
}
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
while (tS.size() != 0 && sT.size() != 0) {
|
| 522 |
+
|
| 523 |
+
iter2 = tS.begin();
|
| 524 |
+
|
| 525 |
+
eSide.clear();
|
| 526 |
+
fSide.clear();
|
| 527 |
+
eSide.insert (iter2->first);
|
| 528 |
+
|
| 529 |
+
getMeCepts(eSide, fSide, tS , sT);
|
| 530 |
+
|
| 531 |
+
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
|
| 532 |
+
iter2 = tS.find(*iter);
|
| 533 |
+
tS.erase(iter2);
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
for (iter = fSide.begin(); iter != fSide.end(); iter++) {
|
| 537 |
+
iter2 = sT.find(*iter);
|
| 538 |
+
sT.erase(iter2);
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
cept = make_pair (fSide , eSide);
|
| 542 |
+
ceptsInPhrase.push_back(cept);
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
/*
|
| 548 |
+
|
| 549 |
+
cerr<<"Extracted Cepts "<<endl;
|
| 550 |
+
for (int i = 0; i < ceptsInPhrase.size(); i++)
|
| 551 |
+
{
|
| 552 |
+
|
| 553 |
+
fSide = ceptsInPhrase[i].first;
|
| 554 |
+
eSide = ceptsInPhrase[i].second;
|
| 555 |
+
|
| 556 |
+
for (iter = eSide.begin(); iter != eSide.end(); iter++)
|
| 557 |
+
{
|
| 558 |
+
cerr<<*iter<<" ";
|
| 559 |
+
}
|
| 560 |
+
cerr<<"<---> ";
|
| 561 |
+
|
| 562 |
+
for (iter = fSide.begin(); iter != fSide.end(); iter++)
|
| 563 |
+
{
|
| 564 |
+
cerr<<*iter<<" ";
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
cerr<<endl;
|
| 568 |
+
}
|
| 569 |
+
cerr<<endl;
|
| 570 |
+
|
| 571 |
+
cerr<<"Unaligned Target Words"<<endl;
|
| 572 |
+
|
| 573 |
+
for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
|
| 574 |
+
cerr<<*iter<<"<--->"<<endl;
|
| 575 |
+
|
| 576 |
+
cerr<<"Unaligned Source Words"<<endl;
|
| 577 |
+
|
| 578 |
+
for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
|
| 579 |
+
cerr<<*iter<<"<--->"<<endl;
|
| 580 |
+
|
| 581 |
+
*/
|
| 582 |
+
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
|
| 586 |
+
{
|
| 587 |
+
scores.clear();
|
| 588 |
+
scores.push_back(opProb);
|
| 589 |
+
|
| 590 |
+
if (numFeatures == 1)
|
| 591 |
+
return;
|
| 592 |
+
|
| 593 |
+
scores.push_back(gapWidth);
|
| 594 |
+
scores.push_back(gapCount);
|
| 595 |
+
scores.push_back(openGapCount);
|
| 596 |
+
scores.push_back(deletionCount);
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
} // namespace
|
| 601 |
+
|
mosesdecoder/moses2/FF/OSM/osmHyp.h
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
# include <set>
|
| 4 |
+
# include <map>
|
| 5 |
+
# include <string>
|
| 6 |
+
# include <vector>
|
| 7 |
+
#include "KenOSM.h"
|
| 8 |
+
# include "../FFState.h"
|
| 9 |
+
# include "../../legacy/Bitmap.h"
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
class osmState : public FFState
|
| 15 |
+
{
|
| 16 |
+
public:
|
| 17 |
+
osmState()
|
| 18 |
+
{}
|
| 19 |
+
|
| 20 |
+
void setState(const lm::ngram::State & val);
|
| 21 |
+
|
| 22 |
+
virtual size_t hash() const;
|
| 23 |
+
virtual bool operator==(const FFState& other) const;
|
| 24 |
+
|
| 25 |
+
virtual std::string ToString() const {
|
| 26 |
+
return "osmState";
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
|
| 30 |
+
int getJ()const {
|
| 31 |
+
return j;
|
| 32 |
+
}
|
| 33 |
+
int getE()const {
|
| 34 |
+
return E;
|
| 35 |
+
}
|
| 36 |
+
std::map <int , std::string> getGap() const {
|
| 37 |
+
return gap;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
lm::ngram::State getLMState() const {
|
| 41 |
+
return lmState;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
void print() const;
|
| 45 |
+
std::string getName() const;
|
| 46 |
+
|
| 47 |
+
protected:
|
| 48 |
+
int j, E;
|
| 49 |
+
std::map <int,std::string> gap;
|
| 50 |
+
lm::ngram::State lmState;
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
class osmHypothesis
|
| 54 |
+
{
|
| 55 |
+
|
| 56 |
+
private:
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
std::vector <std::string> operations; // List of operations required to generated this hyp ...
|
| 60 |
+
std::map <int,std::string> gap; // Maintains gap history ...
|
| 61 |
+
int j; // Position after the last source word generated ...
|
| 62 |
+
int E; // Position after the right most source word so far generated ...
|
| 63 |
+
lm::ngram::State lmState; // KenLM's Model State ...
|
| 64 |
+
|
| 65 |
+
int gapCount; // Number of gaps inserted ...
|
| 66 |
+
int deletionCount;
|
| 67 |
+
int openGapCount;
|
| 68 |
+
int gapWidth;
|
| 69 |
+
double opProb;
|
| 70 |
+
|
| 71 |
+
std::vector <std::string> currE;
|
| 72 |
+
std::vector <std::string> currF;
|
| 73 |
+
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
|
| 74 |
+
std::set <int> targetNullWords;
|
| 75 |
+
std::set <int> sourceNullWords;
|
| 76 |
+
|
| 77 |
+
int closestGap(std::map <int,std::string> gap,int j1, int & gp);
|
| 78 |
+
int firstOpenGap(std::vector <int> & coverageVector);
|
| 79 |
+
std::string intToString(int);
|
| 80 |
+
int getOpenGaps();
|
| 81 |
+
int isTranslationOperation(int j);
|
| 82 |
+
void removeReorderingOperations();
|
| 83 |
+
|
| 84 |
+
void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
|
| 85 |
+
|
| 86 |
+
public:
|
| 87 |
+
|
| 88 |
+
osmHypothesis();
|
| 89 |
+
~osmHypothesis() {};
|
| 90 |
+
void generateOperations(int & startIndex, int j1 , int contFlag , Bitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
|
| 91 |
+
void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
|
| 92 |
+
void calculateOSMProb(OSMLM& ptrOp);
|
| 93 |
+
void computeOSMFeature(int startIndex , Bitmap & coverageVector);
|
| 94 |
+
void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
|
| 95 |
+
void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
|
| 96 |
+
currF = val1;
|
| 97 |
+
currE = val2;
|
| 98 |
+
}
|
| 99 |
+
void setState(const FFState* prev_state);
|
| 100 |
+
void saveState(osmState &state);
|
| 101 |
+
void print();
|
| 102 |
+
void populateScores(std::vector <float> & scores , const int numFeatures);
|
| 103 |
+
void setState(const lm::ngram::State & val) {
|
| 104 |
+
lmState = val;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
};
|
| 108 |
+
|
| 109 |
+
} // namespace
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
mosesdecoder/moses2/FF/PhrasePenalty.cpp
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* SkeletonStatefulFF.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "PhrasePenalty.h"
|
| 9 |
+
#include "../Scores.h"
|
| 10 |
+
|
| 11 |
+
namespace Moses2
|
| 12 |
+
{
|
| 13 |
+
|
| 14 |
+
PhrasePenalty::PhrasePenalty(size_t startInd, const std::string &line) :
|
| 15 |
+
StatelessFeatureFunction(startInd, line)
|
| 16 |
+
{
|
| 17 |
+
ReadParameters();
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
PhrasePenalty::~PhrasePenalty()
|
| 21 |
+
{
|
| 22 |
+
// TODO Auto-generated destructor stub
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system,
|
| 26 |
+
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 27 |
+
SCORE &estimatedScore) const
|
| 28 |
+
{
|
| 29 |
+
scores.PlusEquals(system, *this, 1);
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 33 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 34 |
+
SCORE &estimatedScore) const
|
| 35 |
+
{
|
| 36 |
+
scores.PlusEquals(system, *this, 1);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
}
|
| 40 |
+
|
mosesdecoder/moses2/FF/PhrasePenalty.h
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* SkeletonStatefulFF.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 27 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include "StatelessFeatureFunction.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses2
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
class PhrasePenalty: public StatelessFeatureFunction
|
| 16 |
+
{
|
| 17 |
+
public:
|
| 18 |
+
PhrasePenalty(size_t startInd, const std::string &line);
|
| 19 |
+
virtual ~PhrasePenalty();
|
| 20 |
+
|
| 21 |
+
virtual void
|
| 22 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
|
| 23 |
+
const TargetPhraseImpl &targetPhrase, Scores &scores,
|
| 24 |
+
SCORE &estimatedScore) const;
|
| 25 |
+
|
| 26 |
+
virtual void
|
| 27 |
+
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
|
| 28 |
+
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
|
| 29 |
+
SCORE &estimatedScore) const;
|
| 30 |
+
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
}
|
| 34 |
+
|
mosesdecoder/moses2/FF/PointerState.cpp
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "PointerState.h"
|
| 2 |
+
|
| 3 |
+
namespace Moses2
|
| 4 |
+
{
|
| 5 |
+
|
| 6 |
+
}
|