suricodes commited on
Commit
d9f6fc9
·
verified ·
1 Parent(s): ebb7bf2

Upload 440 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. mosesdecoder/moses2/AlignmentInfo.cpp +176 -0
  3. mosesdecoder/moses2/AlignmentInfo.h +148 -0
  4. mosesdecoder/moses2/AlignmentInfoCollection.cpp +62 -0
  5. mosesdecoder/moses2/AlignmentInfoCollection.h +81 -0
  6. mosesdecoder/moses2/ArcLists.cpp +127 -0
  7. mosesdecoder/moses2/ArcLists.h +43 -0
  8. mosesdecoder/moses2/Array.h +83 -0
  9. mosesdecoder/moses2/DLLEntryApi.cpp +74 -0
  10. mosesdecoder/moses2/EstimatedScores.cpp +117 -0
  11. mosesdecoder/moses2/EstimatedScores.h +59 -0
  12. mosesdecoder/moses2/FF/Distortion.cpp +182 -0
  13. mosesdecoder/moses2/FF/Distortion.h +59 -0
  14. mosesdecoder/moses2/FF/ExampleStatefulFF.cpp +96 -0
  15. mosesdecoder/moses2/FF/ExampleStatefulFF.h +46 -0
  16. mosesdecoder/moses2/FF/ExampleStatelessFF.cpp +40 -0
  17. mosesdecoder/moses2/FF/ExampleStatelessFF.h +34 -0
  18. mosesdecoder/moses2/FF/FFState.cpp +1 -0
  19. mosesdecoder/moses2/FF/FFState.h +50 -0
  20. mosesdecoder/moses2/FF/FeatureFunction.cpp +82 -0
  21. mosesdecoder/moses2/FF/FeatureFunction.h +118 -0
  22. mosesdecoder/moses2/FF/FeatureFunctions.cpp +291 -0
  23. mosesdecoder/moses2/FF/FeatureFunctions.h +113 -0
  24. mosesdecoder/moses2/FF/FeatureRegistry.cpp +128 -0
  25. mosesdecoder/moses2/FF/FeatureRegistry.h +52 -0
  26. mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp +79 -0
  27. mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.h +40 -0
  28. mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp +71 -0
  29. mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.h +37 -0
  30. mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.cpp +87 -0
  31. mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.h +41 -0
  32. mosesdecoder/moses2/FF/LexicalReordering/LRModel.cpp +196 -0
  33. mosesdecoder/moses2/FF/LexicalReordering/LRModel.h +99 -0
  34. mosesdecoder/moses2/FF/LexicalReordering/LRState.cpp +93 -0
  35. mosesdecoder/moses2/FF/LexicalReordering/LRState.h +48 -0
  36. mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.cpp +226 -0
  37. mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.h +115 -0
  38. mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp +84 -0
  39. mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h +44 -0
  40. mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.cpp +99 -0
  41. mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.h +41 -0
  42. mosesdecoder/moses2/FF/OSM/KenOSM.cpp +33 -0
  43. mosesdecoder/moses2/FF/OSM/KenOSM.h +53 -0
  44. mosesdecoder/moses2/FF/OSM/OpSequenceModel.cpp +248 -0
  45. mosesdecoder/moses2/FF/OSM/OpSequenceModel.h +57 -0
  46. mosesdecoder/moses2/FF/OSM/osmHyp.cpp +601 -0
  47. mosesdecoder/moses2/FF/OSM/osmHyp.h +112 -0
  48. mosesdecoder/moses2/FF/PhrasePenalty.cpp +40 -0
  49. mosesdecoder/moses2/FF/PhrasePenalty.h +34 -0
  50. mosesdecoder/moses2/FF/PointerState.cpp +6 -0
.gitattributes CHANGED
@@ -98,3 +98,6 @@ mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryPhraseTable
98
  mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
99
  mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
100
  mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-9/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
 
 
 
 
98
  mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
99
  mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
100
  mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-9/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
101
+ mosesdecoder/moses2/bin/gcc-9/release/link-static/threading-multi/libmoses2_lib.a filter=lfs diff=lfs merge=lfs -text
102
+ mosesdecoder/moses2/bin/gcc-9/release/link-static/threading-multi/libmoses2decoder.a filter=lfs diff=lfs merge=lfs -text
103
+ mosesdecoder/moses2/bin/gcc-9/release/link-static/threading-multi/moses2 filter=lfs diff=lfs merge=lfs -text
mosesdecoder/moses2/AlignmentInfo.cpp ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+ #include <algorithm>
20
+ #include <set>
21
+ #include <sstream>
22
+ #include "AlignmentInfo.h"
23
+ #include "legacy/Util2.h"
24
+ #include "util/exception.hh"
25
+
26
+ namespace Moses2
27
+ {
28
+
29
+ AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
30
+ : m_collection(pairs)
31
+ {
32
+ BuildNonTermIndexMaps();
33
+ }
34
+
35
+ AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
36
+ {
37
+ assert(aln.size()%2==0);
38
+ for (size_t i = 0; i < aln.size(); i+= 2)
39
+ m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
40
+ BuildNonTermIndexMaps();
41
+ }
42
+
43
+ AlignmentInfo::AlignmentInfo(const std::string &str)
44
+ {
45
+ std::vector<std::string> points = Tokenize(str, " ");
46
+ std::vector<std::string>::const_iterator iter;
47
+ for (iter = points.begin(); iter != points.end(); iter++) {
48
+ std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
49
+ UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
50
+ Add(point[0], point[1]);
51
+ }
52
+ }
53
+
54
+ void AlignmentInfo::BuildNonTermIndexMaps()
55
+ {
56
+ if (m_collection.empty()) {
57
+ return;
58
+ }
59
+ const_iterator p = begin();
60
+ size_t maxIndex = p->second;
61
+ for (++p; p != end(); ++p) {
62
+ if (p->second > maxIndex) {
63
+ maxIndex = p->second;
64
+ }
65
+ }
66
+ m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
67
+ m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
68
+ size_t i = 0;
69
+ for (p = begin(); p != end(); ++p) {
70
+ if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
71
+ // 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
72
+ m_nonTermIndexMap.clear();
73
+ m_nonTermIndexMap2.clear();
74
+ return;
75
+ }
76
+ m_nonTermIndexMap[p->second] = i++;
77
+ m_nonTermIndexMap2[p->second] = p->first;
78
+ }
79
+ }
80
+
81
+ std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
82
+ {
83
+ std::set<size_t> ret;
84
+ CollType::const_iterator iter;
85
+ for (iter = begin(); iter != end(); ++iter) {
86
+ // const std::pair<size_t,size_t> &align = *iter;
87
+ if (iter->first == sourcePos) {
88
+ ret.insert(iter->second);
89
+ }
90
+ }
91
+ return ret;
92
+ }
93
+
94
+ std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
95
+ {
96
+ std::set<size_t> ret;
97
+ CollType::const_iterator iter;
98
+ for (iter = begin(); iter != end(); ++iter) {
99
+ // const std::pair<size_t,size_t> &align = *iter;
100
+ if (iter->second == targetPos) {
101
+ ret.insert(iter->first);
102
+ }
103
+ }
104
+ return ret;
105
+ }
106
+
107
+
108
+ bool
109
+ compare_target(std::pair<size_t,size_t> const* a,
110
+ std::pair<size_t,size_t> const* b)
111
+ {
112
+ if(a->second < b->second) return true;
113
+ if(a->second == b->second) return (a->first < b->first);
114
+ return false;
115
+ }
116
+
117
+
118
+ std::vector< const std::pair<size_t,size_t>* >
119
+ AlignmentInfo::
120
+ GetSortedAlignments(WordAlignmentSort SortOrder) const
121
+ {
122
+ std::vector< const std::pair<size_t,size_t>* > ret;
123
+
124
+ CollType::const_iterator iter;
125
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
126
+ const std::pair<size_t,size_t> &alignPair = *iter;
127
+ ret.push_back(&alignPair);
128
+ }
129
+
130
+ switch (SortOrder) {
131
+ case NoSort:
132
+ break;
133
+
134
+ case TargetOrder:
135
+ std::sort(ret.begin(), ret.end(), compare_target);
136
+ break;
137
+
138
+ default:
139
+ UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
140
+ << SortOrder);
141
+ }
142
+
143
+ return ret;
144
+
145
+ }
146
+
147
+ std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
148
+ {
149
+ std::set<size_t> sourcePoses;
150
+
151
+ CollType::const_iterator iter;
152
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
153
+ size_t sourcePos = iter->first;
154
+ sourcePoses.insert(sourcePos);
155
+ }
156
+ std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
157
+ return ret;
158
+ }
159
+
160
+ std::string AlignmentInfo::Debug(const System &system) const
161
+ {
162
+ std::stringstream out;
163
+ out << *this;
164
+ return out.str();
165
+ }
166
+
167
+ std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj)
168
+ {
169
+ AlignmentInfo::const_iterator iter;
170
+ for (iter = obj.begin(); iter != obj.end(); ++iter) {
171
+ out << iter->first << "-" << iter->second << " ";
172
+ }
173
+ return out;
174
+ }
175
+
176
+ }
mosesdecoder/moses2/AlignmentInfo.h ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include <iostream>
23
+ #include <ostream>
24
+ #include <set>
25
+ #include <vector>
26
+ #include <cstdlib>
27
+
28
+ #include <boost/functional/hash.hpp>
29
+ #include "TypeDef.h"
30
+
31
+ namespace Moses2
32
+ {
33
+
34
+ class AlignmentInfoCollection;
35
+ class System;
36
+
37
+ /** Collection of non-terminal alignment pairs, ordered by source index.
38
+ * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
39
+ */
40
+ class AlignmentInfo
41
+ {
42
+ friend struct AlignmentInfoOrderer;
43
+ friend struct AlignmentInfoHasher;
44
+ friend class AlignmentInfoCollection;
45
+ friend class VW;
46
+
47
+ friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
48
+
49
+ public:
50
+ typedef std::set<std::pair<size_t,size_t> > CollType;
51
+ typedef std::vector<size_t> NonTermIndexMap;
52
+ typedef CollType::const_iterator const_iterator;
53
+
54
+ const_iterator begin() const {
55
+ return m_collection.begin();
56
+ }
57
+ const_iterator end() const {
58
+ return m_collection.end();
59
+ }
60
+
61
+ void Add(size_t sourcePos, size_t targetPos) {
62
+ m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
63
+ }
64
+ /** Provides a map from target-side to source-side non-terminal indices.
65
+ * The target-side index should be the rule symbol index (COUNTING terminals).
66
+ * The index returned is the rule non-terminal index (IGNORING terminals).
67
+ */
68
+ const NonTermIndexMap &GetNonTermIndexMap() const {
69
+ return m_nonTermIndexMap;
70
+ }
71
+
72
+ /** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
73
+ * the index counting both terminals and non-terminals) */
74
+ const NonTermIndexMap &GetNonTermIndexMap2() const {
75
+ return m_nonTermIndexMap2;
76
+ }
77
+
78
+ const CollType &GetAlignments() const {
79
+ return m_collection;
80
+ }
81
+
82
+ std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
83
+ std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
84
+
85
+ size_t GetSize() const {
86
+ return m_collection.size();
87
+ }
88
+
89
+ std::vector< const std::pair<size_t,size_t>* >
90
+ GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
91
+
92
+ std::vector<size_t> GetSourceIndex2PosMap() const;
93
+
94
+ bool operator==(const AlignmentInfo& rhs) const {
95
+ return m_collection == rhs.m_collection &&
96
+ m_nonTermIndexMap == rhs.m_nonTermIndexMap;
97
+ }
98
+
99
+ std::string Debug(const System &system) const;
100
+
101
+ private:
102
+ //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
103
+ explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
104
+ explicit AlignmentInfo(const std::vector<unsigned char> &aln);
105
+
106
+ // used only by VW to load word alignment between sentences
107
+ explicit AlignmentInfo(const std::string &str);
108
+
109
+ void BuildNonTermIndexMaps();
110
+
111
+ CollType m_collection;
112
+ NonTermIndexMap m_nonTermIndexMap;
113
+ NonTermIndexMap m_nonTermIndexMap2;
114
+ };
115
+
116
+ /** Define an arbitrary strict weak ordering between AlignmentInfo objects
117
+ * for use by AlignmentInfoCollection.
118
+ */
119
+ struct AlignmentInfoOrderer {
120
+ bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
121
+ if (a.m_collection == b.m_collection) {
122
+ return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
123
+ } else {
124
+ return a.m_collection < b.m_collection;
125
+ }
126
+ }
127
+ };
128
+
129
+ /**
130
+ * Hashing functoid
131
+ **/
132
+ struct AlignmentInfoHasher {
133
+ size_t operator()(const AlignmentInfo& a) const {
134
+ size_t seed = 0;
135
+ boost::hash_combine(seed,a.m_collection);
136
+ boost::hash_combine(seed,a.m_nonTermIndexMap);
137
+ return seed;
138
+ }
139
+
140
+ };
141
+
142
+ inline size_t hash_value(const AlignmentInfo& a)
143
+ {
144
+ static AlignmentInfoHasher hasher;
145
+ return hasher(a);
146
+ }
147
+
148
+ }
mosesdecoder/moses2/AlignmentInfoCollection.cpp ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include "AlignmentInfoCollection.h"
21
+
22
+ using namespace std;
23
+
24
+ namespace Moses2
25
+ {
26
+
27
+ AlignmentInfoCollection AlignmentInfoCollection::s_instance;
28
+
29
+ AlignmentInfoCollection::AlignmentInfoCollection()
30
+ {
31
+ std::set<std::pair<size_t,size_t> > pairs;
32
+ m_emptyAlignmentInfo = Add(pairs);
33
+ }
34
+
35
+ AlignmentInfoCollection::~AlignmentInfoCollection()
36
+ {}
37
+
38
+ const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
39
+ {
40
+ return *m_emptyAlignmentInfo;
41
+ }
42
+
43
+ AlignmentInfo const *
44
+ AlignmentInfoCollection::
45
+ Add(AlignmentInfo const& ainfo)
46
+ {
47
+ #ifdef WITH_THREADS
48
+ {
49
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
50
+ AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
51
+ if (i != m_collection.end())
52
+ return &*i;
53
+ }
54
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
55
+ #endif
56
+ std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
57
+ return &(*ret.first);
58
+ }
59
+
60
+
61
+
62
+ }
mosesdecoder/moses2/AlignmentInfoCollection.h ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "AlignmentInfo.h"
23
+
24
+ #include <set>
25
+
26
+ #ifdef WITH_THREADS
27
+ #include <boost/thread/shared_mutex.hpp>
28
+ #include <boost/thread/locks.hpp>
29
+ #endif
30
+
31
+ namespace Moses2
32
+ {
33
+
34
+ /** Singleton collection of all AlignmentInfo objects.
35
+ * Used as a cache of all alignment info to save space.
36
+ */
37
+ class AlignmentInfoCollection
38
+ {
39
+ public:
40
+ static AlignmentInfoCollection &Instance() {
41
+ return s_instance;
42
+ }
43
+
44
+ /** Returns a pointer to an AlignmentInfo object with the same source-target
45
+ * alignment pairs as given in the argument. If the collection already
46
+ * contains such an object then returns a pointer to it; otherwise a new
47
+ * one is inserted.
48
+ */
49
+ private:
50
+ const AlignmentInfo* Add(AlignmentInfo const& ainfo);
51
+
52
+ public:
53
+ template<typename ALNREP>
54
+ AlignmentInfo const *
55
+ Add(ALNREP const & aln) {
56
+ return this->Add(AlignmentInfo(aln));
57
+ }
58
+
59
+ //! Returns a pointer to an empty AlignmentInfo object.
60
+ const AlignmentInfo &GetEmptyAlignmentInfo() const;
61
+
62
+ private:
63
+ typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
64
+
65
+
66
+ //! Only a single static variable should be created.
67
+ AlignmentInfoCollection();
68
+ ~AlignmentInfoCollection();
69
+
70
+ static AlignmentInfoCollection s_instance;
71
+
72
+ #ifdef WITH_THREADS
73
+ //reader-writer lock
74
+ mutable boost::shared_mutex m_accessLock;
75
+ #endif
76
+
77
+ AlignmentInfoSet m_collection;
78
+ const AlignmentInfo *m_emptyAlignmentInfo;
79
+ };
80
+
81
+ }
mosesdecoder/moses2/ArcLists.cpp ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ArcList.cpp
3
+ *
4
+ * Created on: 26 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include <iostream>
8
+ #include <sstream>
9
+ #include <algorithm>
10
+ #include <boost/foreach.hpp>
11
+ #include "ArcLists.h"
12
+ #include "HypothesisBase.h"
13
+ #include "util/exception.hh"
14
+
15
+ using namespace std;
16
+
17
+ namespace Moses2
18
+ {
19
+
20
+ ArcLists::ArcLists()
21
+ {
22
+ // TODO Auto-generated constructor stub
23
+
24
+ }
25
+
26
+ ArcLists::~ArcLists()
27
+ {
28
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
29
+ const ArcList *arcList = collPair.second;
30
+ delete arcList;
31
+ }
32
+ }
33
+
34
+ void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
35
+ const HypothesisBase *otherHypo)
36
+ {
37
+ //cerr << added << " " << currHypo << " " << otherHypo << endl;
38
+ ArcList *arcList;
39
+ if (added) {
40
+ // we're winners!
41
+ if (otherHypo) {
42
+ // there was a existing losing hypo
43
+ arcList = &GetAndDetachArcList(otherHypo);
44
+ } else {
45
+ // there was no existing hypo
46
+ arcList = new ArcList;
47
+ }
48
+ m_coll[currHypo] = arcList;
49
+ } else {
50
+ // we're losers!
51
+ // there should be a winner, we're not doing beam pruning
52
+ UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
53
+ arcList = &GetArcList(otherHypo);
54
+ }
55
+
56
+ // in any case, add the curr hypo
57
+ arcList->push_back(currHypo);
58
+ }
59
+
60
+ ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
61
+ {
62
+ Coll::iterator iter = m_coll.find(hypo);
63
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
64
+ ArcList &arcList = *iter->second;
65
+ return arcList;
66
+ }
67
+
68
+ const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
69
+ {
70
+ Coll::const_iterator iter = m_coll.find(hypo);
71
+
72
+ if (iter == m_coll.end()) {
73
+ cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
74
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
75
+ const HypothesisBase *hypo = collPair.first;
76
+ cerr << hypo << " ";
77
+ }
78
+ }
79
+
80
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
81
+ ArcList &arcList = *iter->second;
82
+ return arcList;
83
+ }
84
+
85
+ ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
86
+ {
87
+ Coll::iterator iter = m_coll.find(hypo);
88
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
89
+ ArcList &arcList = *iter->second;
90
+
91
+ m_coll.erase(iter);
92
+
93
+ return arcList;
94
+ }
95
+
96
+ void ArcLists::Sort()
97
+ {
98
+ BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
99
+ ArcList &list = *collPair.second;
100
+ std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
101
+ }
102
+ }
103
+
104
+ void ArcLists::Delete(const HypothesisBase *hypo)
105
+ {
106
+ //cerr << "hypo=" << hypo->Debug() << endl;
107
+ //cerr << "m_coll=" << m_coll.size() << endl;
108
+ Coll::iterator iter = m_coll.find(hypo);
109
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
110
+ ArcList *arcList = iter->second;
111
+
112
+ m_coll.erase(iter);
113
+ delete arcList;
114
+ }
115
+
116
+ std::string ArcLists::Debug(const System &system) const
117
+ {
118
+ stringstream strm;
119
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
120
+ const ArcList *arcList = collPair.second;
121
+ strm << arcList << "(" << arcList->size() << ") ";
122
+ }
123
+ return strm.str();
124
+ }
125
+
126
+ }
127
+
mosesdecoder/moses2/ArcLists.h ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ArcList.h
3
+ *
4
+ * Created on: 26 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include <vector>
9
+ #include <boost/unordered_map.hpp>
10
+
11
+ namespace Moses2
12
+ {
13
+ class System;
14
+
15
+ class HypothesisBase;
16
+
17
+ typedef std::vector<const HypothesisBase*> ArcList;
18
+
19
+ class ArcLists
20
+ {
21
+ public:
22
+ ArcLists();
23
+ virtual ~ArcLists();
24
+
25
+ void AddArc(bool added, const HypothesisBase *currHypo,
26
+ const HypothesisBase *otherHypo);
27
+ void Sort();
28
+ void Delete(const HypothesisBase *hypo);
29
+
30
+ const ArcList &GetArcList(const HypothesisBase *hypo) const;
31
+
32
+ std::string Debug(const System &system) const;
33
+ protected:
34
+ typedef boost::unordered_map<const HypothesisBase*, ArcList*> Coll;
35
+ Coll m_coll;
36
+
37
+ ArcList &GetArcList(const HypothesisBase *hypo);
38
+ ArcList &GetAndDetachArcList(const HypothesisBase *hypo);
39
+
40
+ };
41
+
42
+ }
43
+
mosesdecoder/moses2/Array.h ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include <cassert>
3
+ #include <boost/functional/hash.hpp>
4
+ #include "MemPool.h"
5
+
6
+ namespace Moses2
7
+ {
8
+
9
+ template<typename T>
10
+ class Array
11
+ {
12
+ public:
13
+ typedef T* iterator;
14
+ typedef const T* const_iterator;
15
+ //! iterators
16
+ const_iterator begin() const {
17
+ return m_arr;
18
+ }
19
+ const_iterator end() const {
20
+ return m_arr + m_size;
21
+ }
22
+
23
+ iterator begin() {
24
+ return m_arr;
25
+ }
26
+ iterator end() {
27
+ return m_arr + m_size;
28
+ }
29
+
30
+ Array(MemPool &pool, size_t size = 0, const T &val = T()) {
31
+ m_size = size;
32
+ m_maxSize = size;
33
+ m_arr = pool.Allocate<T>(size);
34
+ for (size_t i = 0; i < size; ++i) {
35
+ m_arr[i] = val;
36
+ }
37
+ }
38
+
39
+ size_t size() const {
40
+ return m_size;
41
+ }
42
+
43
+ const T& operator[](size_t ind) const {
44
+ return m_arr[ind];
45
+ }
46
+
47
+ T& operator[](size_t ind) {
48
+ return m_arr[ind];
49
+ }
50
+
51
+ T *GetArray() {
52
+ return m_arr;
53
+ }
54
+
55
+ size_t hash() const {
56
+ size_t seed = 0;
57
+ for (size_t i = 0; i < m_size; ++i) {
58
+ boost::hash_combine(seed, m_arr[i]);
59
+ }
60
+ return seed;
61
+ }
62
+
63
+ int Compare(const Array &compare) const {
64
+
65
+ int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
66
+ return cmp;
67
+ }
68
+
69
+ bool operator==(const Array &compare) const {
70
+ int cmp = Compare(compare);
71
+ return cmp == 0;
72
+ }
73
+
74
+ void resize(size_t newSize) {
75
+ assert(m_size <= m_maxSize);
76
+ m_size = newSize;
77
+ }
78
+ protected:
79
+ size_t m_size, m_maxSize;
80
+ T *m_arr;
81
+ };
82
+
83
+ }
mosesdecoder/moses2/DLLEntryApi.cpp ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "Moses2Wrapper.h"
2
+ #include <iostream>
3
+ #include <string.h>
4
+
5
+
6
+ // Generic helper definitions for shared library support
7
+ #if defined _WIN32
8
+ #define IMPORT __declspec(dllimport)
9
+ #define EXPORT __declspec(dllexport)
10
+ #else // !(defined _WIN32 || defined __CYGWIN__) -- i.e., not Windows
11
+ #define __stdcall
12
+ #if __GNUC__ >= 4
13
+ #define IMPORT __attribute__ ((visibility ("default")))
14
+ #define EXPORT __attribute__ ((visibility ("default")))
15
+ #else // __GNUC__ < 4, which does not support the __attribute__ tag
16
+ #define IMPORT
17
+ #define EXPORT
18
+ #endif // __GNUC__ >= 4
19
+ #endif
20
+
21
+
22
+ using namespace std;
23
+ using namespace Moses2;
24
+
25
+ extern "C" EXPORT MosesApiErrorCode __stdcall GetMosesSystem(const char* filePath, Moses2::Moses2Wrapper * *pObject) {
26
+
27
+ if (*pObject == NULL) {
28
+ *pObject = new Moses2::Moses2Wrapper(filePath);
29
+ return MS_API_OK;
30
+ }
31
+ else {
32
+ return MS_API_E_FAILURE;
33
+ }
34
+ }
35
+
36
+ extern "C" EXPORT MosesApiErrorCode __stdcall Translate(Moses2::Moses2Wrapper * pObject, long id, const char* input, char** output) {
37
+ if (pObject != NULL)
38
+ {
39
+ std::string tr = pObject->Translate(input, id);
40
+ *output = Moses2Wrapper::CopyString(tr.c_str());
41
+ return MS_API_OK;
42
+ }
43
+ else {
44
+ return MS_API_E_FAILURE;
45
+ }
46
+ }
47
+
48
+ extern "C" EXPORT MosesApiErrorCode __stdcall FreeMemory(char* output) {
49
+ if (output != nullptr) {
50
+ Moses2Wrapper::Free(output);
51
+ return MS_API_OK;
52
+ }
53
+ else {
54
+ return MS_API_E_FAILURE;
55
+ }
56
+ }
57
+
58
+ extern "C" EXPORT MosesApiErrorCode __stdcall ReleaseSystem(Moses2::Moses2Wrapper **pObject) {
59
+ if (*pObject != NULL)
60
+ {
61
+ delete* pObject;
62
+ *pObject = NULL;
63
+ return MS_API_OK;
64
+ }
65
+ else {
66
+ return MS_API_E_FAILURE;
67
+ }
68
+ }
69
+
70
+ extern "C" EXPORT MosesApiErrorCode __stdcall EngineVersion() {
71
+ //std::cout << "windows build on v1142/ msvc 14.27.29110"<< std::endl;
72
+ std::cout << "0.0.1" << std::endl;
73
+ return MS_API_OK;
74
+ }
mosesdecoder/moses2/EstimatedScores.cpp ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (C) 2006 University of Edinburgh
7
+
8
+ This library is free software; you can redistribute it and/or
9
+ modify it under the terms of the GNU Lesser General Public
10
+ License as published by the Free Software Foundation; either
11
+ version 2.1 of the License, or (at your option) any later version.
12
+
13
+ This library is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public
19
+ License along with this library; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ ***********************************************************************/
22
+
23
+ #include <string>
24
+ #include <iostream>
25
+ #include "EstimatedScores.h"
26
+
27
+ using namespace std;
28
+
29
+ namespace Moses2
30
+ {
31
+ /**
32
+ * Calculate future score estimate for a given coverage bitmap
33
+ *
34
+ * /param bitmap coverage bitmap
35
+ */
36
+
37
+ float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap) const
38
+ {
39
+ const size_t notInGap = numeric_limits<size_t>::max();
40
+ size_t startGap = notInGap;
41
+ float estimatedScore = 0.0f;
42
+ for (size_t currPos = 0; currPos < bitmap.GetSize(); currPos++) {
43
+ // start of a new gap?
44
+ if (bitmap.GetValue(currPos) == false && startGap == notInGap) {
45
+ startGap = currPos;
46
+ }
47
+ // end of a gap?
48
+ else if (bitmap.GetValue(currPos) == true && startGap != notInGap) {
49
+ estimatedScore += GetValue(startGap, currPos - 1);
50
+ startGap = notInGap;
51
+ }
52
+ }
53
+ // coverage ending with gap?
54
+ if (startGap != notInGap) {
55
+ estimatedScore += GetValue(startGap, bitmap.GetSize() - 1);
56
+ }
57
+
58
+ return estimatedScore;
59
+ }
60
+
61
+ /**
62
+ * Calculare future score estimate for a given coverage bitmap
63
+ * and an additional span that is also covered. This function is used
64
+ * to compute future score estimates for hypotheses that we may want
65
+ * build, but first want to check.
66
+ *
67
+ * Note: this function is implemented a bit more complex than
68
+ * the basic one (w/o additional phrase) for speed reasons,
69
+ * which is probably overkill.
70
+ *
71
+ * /param bitmap coverage bitmap
72
+ * /param startPos start of the span that is added to the coverage
73
+ * /param endPos end of the span that is added to the coverage
74
+ */
75
+
76
+ float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap, size_t startPos,
77
+ size_t endPos) const
78
+ {
79
+ const size_t notInGap = numeric_limits<size_t>::max();
80
+ float estimatedScore = 0.0f;
81
+ size_t startGap = bitmap.GetFirstGapPos();
82
+ if (startGap == NOT_FOUND) return estimatedScore; // everything filled
83
+
84
+ // start loop at first gap
85
+ size_t startLoop = startGap + 1;
86
+ if (startPos == startGap) { // unless covered by phrase
87
+ startGap = notInGap;
88
+ startLoop = endPos + 1; // -> postpone start
89
+ }
90
+
91
+ size_t lastCovered = bitmap.GetLastPos();
92
+ if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;
93
+
94
+ for (size_t currPos = startLoop; currPos <= lastCovered; currPos++) {
95
+ // start of a new gap?
96
+ if (startGap == notInGap && bitmap.GetValue(currPos) == false
97
+ && (currPos < startPos || currPos > endPos)) {
98
+ startGap = currPos;
99
+ }
100
+ // end of a gap?
101
+ else if (startGap != notInGap
102
+ && (bitmap.GetValue(currPos) == true
103
+ || (startPos <= currPos && currPos <= endPos))) {
104
+ estimatedScore += GetValue(startGap, currPos - 1);
105
+ startGap = notInGap;
106
+ }
107
+ }
108
+ // coverage ending with gap?
109
+ if (lastCovered != bitmap.GetSize() - 1) {
110
+ estimatedScore += GetValue(lastCovered + 1, bitmap.GetSize() - 1);
111
+ }
112
+
113
+ return estimatedScore;
114
+ }
115
+
116
+ }
117
+
mosesdecoder/moses2/EstimatedScores.h ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #pragma once
23
+
24
+ #include <iostream>
25
+ #include "legacy/Util2.h"
26
+ #include "legacy/Bitmap.h"
27
+ #include "legacy/Matrix.h"
28
+
29
+ namespace Moses2
30
+ {
31
+ class MemPool;
32
+ class System;
33
+
34
+ //! A square array of floats to store future costs in the phrase-based decoder
35
+ class EstimatedScores: public Matrix<float>
36
+ {
37
+ public:
38
+ EstimatedScores(MemPool &pool, size_t size) :
39
+ Matrix<float>(pool, size, size) {
40
+ }
41
+
42
+ ~EstimatedScores(); // not implemented
43
+
44
+ float CalcEstimatedScore(Bitmap const&) const;
45
+ float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
46
+
47
+ std::ostream &Debug(std::ostream &out, const System &system) const {
48
+ for (size_t endPos = 0; endPos < GetSize(); endPos++) {
49
+ for (size_t startPos = 0; startPos < GetSize(); startPos++)
50
+ out << GetValue(startPos, endPos) << " ";
51
+ out << std::endl;
52
+ }
53
+ return out;
54
+ }
55
+
56
+ };
57
+
58
+ }
59
+
mosesdecoder/moses2/FF/Distortion.cpp ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Distortion.cpp
3
+ *
4
+ * Created on: 28 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include <sstream>
8
+ #include "Distortion.h"
9
+ #include "../PhraseBased/Hypothesis.h"
10
+ #include "../PhraseBased/Manager.h"
11
+ #include "../legacy/Range.h"
12
+ #include "../legacy/Bitmap.h"
13
+
14
+ using namespace std;
15
+
16
+ namespace Moses2
17
+ {
18
+
19
+ struct DistortionState_traditional: public FFState {
20
+ Range range;
21
+ int first_gap;
22
+
23
+ DistortionState_traditional() :
24
+ range() {
25
+ // uninitialised
26
+ }
27
+
28
+ void Set(const Range& wr, int fg) {
29
+ range = wr;
30
+ first_gap = fg;
31
+ }
32
+
33
+ size_t hash() const {
34
+ return range.GetEndPos();
35
+ }
36
+ virtual bool operator==(const FFState& other) const {
37
+ const DistortionState_traditional& o =
38
+ static_cast<const DistortionState_traditional&>(other);
39
+ return range.GetEndPos() == o.range.GetEndPos();
40
+ }
41
+
42
+ virtual std::string ToString() const {
43
+ stringstream sb;
44
+ sb << first_gap << " " << range;
45
+ return sb.str();
46
+ }
47
+
48
+ };
49
+
50
+ ///////////////////////////////////////////////////////////////////////
51
+ Distortion::Distortion(size_t startInd, const std::string &line) :
52
+ StatefulFeatureFunction(startInd, line)
53
+ {
54
+ ReadParameters();
55
+ }
56
+
57
+ Distortion::~Distortion()
58
+ {
59
+ // TODO Auto-generated destructor stub
60
+ }
61
+
62
+ FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
63
+ {
64
+ return new (pool.Allocate<DistortionState_traditional>()) DistortionState_traditional();
65
+ }
66
+
67
+ void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
68
+ const InputType &input, const Hypothesis &hypo) const
69
+ {
70
+ DistortionState_traditional &stateCast =
71
+ static_cast<DistortionState_traditional&>(state);
72
+
73
+ // fake previous translated phrase start and end
74
+ size_t start = NOT_FOUND;
75
+ size_t end = NOT_FOUND;
76
+ /*
77
+ if (input.m_frontSpanCoveredLength > 0) {
78
+ // can happen with --continue-partial-translation
79
+ start = 0;
80
+ end = input.m_frontSpanCoveredLength -1;
81
+ }
82
+ */
83
+
84
+ stateCast.range = Range(start, end);
85
+ stateCast.first_gap = NOT_FOUND;
86
+ }
87
+
88
+ void Distortion::EvaluateInIsolation(MemPool &pool, const System &system,
89
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
90
+ SCORE &estimatedScore) const
91
+ {
92
+ }
93
+
94
+ void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
95
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
96
+ SCORE &estimatedScore) const
97
+ {
98
+ }
99
+
100
+ void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
101
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
102
+ FFState &state) const
103
+ {
104
+ const DistortionState_traditional &prev =
105
+ static_cast<const DistortionState_traditional&>(prevState);
106
+ SCORE distortionScore = CalculateDistortionScore(prev.range,
107
+ hypo.GetInputPath().range, prev.first_gap);
108
+ //cerr << "distortionScore=" << distortionScore << endl;
109
+
110
+ scores.PlusEquals(mgr.system, *this, distortionScore);
111
+
112
+ DistortionState_traditional &stateCast =
113
+ static_cast<DistortionState_traditional&>(state);
114
+ stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos());
115
+
116
+ //cerr << "hypo=" << hypo.Debug(mgr.system) << endl;
117
+ }
118
+
119
+ SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
120
+ const int FirstGap) const
121
+ {
122
+ bool useEarlyDistortionCost = false;
123
+ if (!useEarlyDistortionCost) {
124
+ return -(SCORE) ComputeDistortionDistance(prev, curr);
125
+ } else {
126
+ /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
127
+ Definitions:
128
+ S : current source range
129
+ S' : last translated source phrase range
130
+ S'' : longest fully-translated initial segment
131
+ */
132
+
133
+ int prefixEndPos = (int) FirstGap - 1;
134
+ if ((int) FirstGap == -1) prefixEndPos = -1;
135
+
136
+ // case1: S is adjacent to S'' => return 0
137
+ if ((int) curr.GetStartPos() == prefixEndPos + 1) {
138
+ //IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
139
+ return 0;
140
+ }
141
+
142
+ // case2: S is to the left of S' => return 2(length(S))
143
+ if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
144
+ //IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
145
+ return (float) -2 * (int) curr.GetNumWordsCovered();
146
+ }
147
+
148
+ // case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
149
+ if ((int) prev.GetEndPos() <= prefixEndPos) {
150
+ //IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
151
+ int z = (int) curr.GetStartPos() - prefixEndPos - 1;
152
+ return (float) -2 * (z + (int) curr.GetNumWordsCovered());
153
+ }
154
+
155
+ // case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
156
+ //IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
157
+ return (float) -2
158
+ * ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered());
159
+
160
+ }
161
+ }
162
+
163
+ int Distortion::ComputeDistortionDistance(const Range& prev,
164
+ const Range& current) const
165
+ {
166
+ int dist = 0;
167
+ if (prev.GetNumWordsCovered() == 0) {
168
+ dist = current.GetStartPos();
169
+ } else {
170
+ dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
171
+ }
172
+ return abs(dist);
173
+ }
174
+
175
+ void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
176
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
177
+ FFState &state) const
178
+ {
179
+ UTIL_THROW2("Not implemented");
180
+ }
181
+
182
+ }
mosesdecoder/moses2/FF/Distortion.h ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Distortion.h
3
+ *
4
+ * Created on: 28 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #ifndef DISTORTION_H_
9
+ #define DISTORTION_H_
10
+
11
+ #include "StatefulFeatureFunction.h"
12
+ #include "../legacy/Range.h"
13
+ #include "../TypeDef.h"
14
+
15
+ namespace Moses2
16
+ {
17
+
18
+ class Distortion: public StatefulFeatureFunction
19
+ {
20
+ public:
21
+ Distortion(size_t startInd, const std::string &line);
22
+ virtual ~Distortion();
23
+
24
+ virtual FFState* BlankState(MemPool &pool, const System &sys) const;
25
+ virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
26
+ const InputType &input, const Hypothesis &hypo) const;
27
+
28
+ virtual void
29
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
30
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
31
+ SCORE &estimatedScore) const;
32
+
33
+ virtual void
34
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
35
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
36
+ SCORE &estimatedScore) const;
37
+
38
+ virtual void EvaluateWhenApplied(const std::deque<Hypothesis*> &hypos) const {
39
+ }
40
+
41
+ virtual void EvaluateWhenApplied(const ManagerBase &mgr,
42
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
43
+ FFState &state) const;
44
+
45
+ virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
46
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
47
+ FFState &state) const;
48
+
49
+ protected:
50
+ SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
51
+ const int FirstGap) const;
52
+
53
+ int ComputeDistortionDistance(const Range& prev, const Range& current) const;
54
+
55
+ };
56
+
57
+ }
58
+
59
+ #endif /* DISTORTION_H_ */
mosesdecoder/moses2/FF/ExampleStatefulFF.cpp ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ExampleStatefulFF.cpp
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include <sstream>
8
+ #include "ExampleStatefulFF.h"
9
+ #include "../PhraseBased/Manager.h"
10
+ #include "../PhraseBased/Hypothesis.h"
11
+
12
+ using namespace std;
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ class ExampleState: public FFState
18
+ {
19
+ public:
20
+ int targetLen;
21
+
22
+ ExampleState() {
23
+ // uninitialised
24
+ }
25
+
26
+ virtual size_t hash() const {
27
+ return (size_t) targetLen;
28
+ }
29
+ virtual bool operator==(const FFState& o) const {
30
+ const ExampleState& other = static_cast<const ExampleState&>(o);
31
+ return targetLen == other.targetLen;
32
+ }
33
+
34
+ virtual std::string ToString() const {
35
+ stringstream sb;
36
+ sb << targetLen;
37
+ return sb.str();
38
+ }
39
+
40
+ };
41
+
42
+ ////////////////////////////////////////////////////////////////////////////////////////
43
+ ExampleStatefulFF::ExampleStatefulFF(size_t startInd, const std::string &line) :
44
+ StatefulFeatureFunction(startInd, line)
45
+ {
46
+ ReadParameters();
47
+ }
48
+
49
+ ExampleStatefulFF::~ExampleStatefulFF()
50
+ {
51
+ // TODO Auto-generated destructor stub
52
+ }
53
+
54
+ FFState* ExampleStatefulFF::BlankState(MemPool &pool, const System &sys) const
55
+ {
56
+ return new (pool.Allocate<ExampleState>()) ExampleState();
57
+ }
58
+
59
+ void ExampleStatefulFF::EmptyHypothesisState(FFState &state,
60
+ const ManagerBase &mgr, const InputType &input,
61
+ const Hypothesis &hypo) const
62
+ {
63
+ ExampleState &stateCast = static_cast<ExampleState&>(state);
64
+ stateCast.targetLen = 0;
65
+ }
66
+
67
+ void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool,
68
+ const System &system, const Phrase<Moses2::Word> &source,
69
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
70
+ SCORE &estimatedScore) const
71
+ {
72
+ }
73
+
74
+ void ExampleStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
75
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
76
+ SCORE &estimatedScore) const
77
+ {
78
+ }
79
+
80
+ void ExampleStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr,
81
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
82
+ FFState &state) const
83
+ {
84
+ ExampleState &stateCast = static_cast<ExampleState&>(state);
85
+ stateCast.targetLen = hypo.GetTargetPhrase().GetSize();
86
+ }
87
+
88
+ void ExampleStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr,
89
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
90
+ FFState &state) const
91
+ {
92
+ UTIL_THROW2("Not implemented");
93
+ }
94
+
95
+ }
96
+
mosesdecoder/moses2/FF/ExampleStatefulFF.h ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ExampleStatefulFF.h
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include "StatefulFeatureFunction.h"
11
+
12
+ namespace Moses2
13
+ {
14
+
15
+ class ExampleStatefulFF: public StatefulFeatureFunction
16
+ {
17
+ public:
18
+ ExampleStatefulFF(size_t startInd, const std::string &line);
19
+ virtual ~ExampleStatefulFF();
20
+
21
+ virtual FFState* BlankState(MemPool &pool, const System &sys) const;
22
+ virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
23
+ const InputType &input, const Hypothesis &hypo) const;
24
+
25
+ virtual void
26
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
27
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
28
+ SCORE &estimatedScore) const;
29
+
30
+ virtual void
31
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
32
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
33
+ SCORE &estimatedScore) const;
34
+
35
+ virtual void EvaluateWhenApplied(const ManagerBase &mgr,
36
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
37
+ FFState &state) const;
38
+
39
+ virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
40
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
41
+ FFState &state) const;
42
+
43
+ };
44
+
45
+ }
46
+
mosesdecoder/moses2/FF/ExampleStatelessFF.cpp ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SkeletonStatefulFF.cpp
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include "../Scores.h"
8
+
9
+ #include "ExampleStatelessFF.h"
10
+
11
+ namespace Moses2
12
+ {
13
+
14
+ ExampleStatelessFF::ExampleStatelessFF(size_t startInd,
15
+ const std::string &line) :
16
+ StatelessFeatureFunction(startInd, line)
17
+ {
18
+ ReadParameters();
19
+ }
20
+
21
+ ExampleStatelessFF::~ExampleStatelessFF()
22
+ {
23
+ // TODO Auto-generated destructor stub
24
+ }
25
+
26
+ void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool,
27
+ const System &system, const Phrase<Moses2::Word> &source,
28
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
29
+ SCORE &estimatedScore) const
30
+ {
31
+ }
32
+
33
+ void ExampleStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
34
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
35
+ SCORE &estimatedScore) const
36
+ {
37
+ }
38
+
39
+ }
40
+
mosesdecoder/moses2/FF/ExampleStatelessFF.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SkeletonStatefulFF.h
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include "StatelessFeatureFunction.h"
11
+
12
+ namespace Moses2
13
+ {
14
+
15
+ class ExampleStatelessFF: public StatelessFeatureFunction
16
+ {
17
+ public:
18
+ ExampleStatelessFF(size_t startInd, const std::string &line);
19
+ virtual ~ExampleStatelessFF();
20
+
21
+ virtual void
22
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
23
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
24
+ SCORE &estimatedScore) const;
25
+
26
+ virtual void
27
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
28
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
29
+ SCORE &estimatedScore) const;
30
+
31
+ };
32
+
33
+ }
34
+
mosesdecoder/moses2/FF/FFState.cpp ADDED
@@ -0,0 +1 @@
 
 
1
+ #include "FFState.h"
mosesdecoder/moses2/FF/FFState.h ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <stddef.h>
5
+ #include "util/exception.hh"
6
+
7
+ namespace Moses2
8
+ {
9
+
10
+ class FFState
11
+ {
12
+ public:
13
+ virtual ~FFState() {
14
+ }
15
+ virtual size_t hash() const = 0;
16
+ virtual bool operator==(const FFState& other) const = 0;
17
+
18
+ virtual bool operator!=(const FFState& other) const {
19
+ return !(*this == other);
20
+ }
21
+
22
+ virtual std::string ToString() const = 0;
23
+ };
24
+
25
+ ////////////////////////////////////////////////////////////////////////////////////////
26
+ inline std::ostream& operator<<(std::ostream& out, const FFState& obj)
27
+ {
28
+ out << obj.ToString();
29
+ return out;
30
+ }
31
+
32
+ ////////////////////////////////////////////////////////////////////////////////////////
33
+ class DummyState: public FFState
34
+ {
35
+ public:
36
+ DummyState() {
37
+ }
38
+
39
+ virtual size_t hash() const {
40
+ return 0;
41
+ }
42
+
43
+ virtual bool operator==(const FFState& other) const {
44
+ return true;
45
+ }
46
+
47
+ };
48
+
49
+ }
50
+
mosesdecoder/moses2/FF/FeatureFunction.cpp ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * FeatureFunction.cpp
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include <string>
8
+ #include <vector>
9
+ #include "FeatureFunction.h"
10
+ #include "../System.h"
11
+ #include "../legacy/Util2.h"
12
+ #include "util/exception.hh"
13
+
14
+ using namespace std;
15
+
16
+ namespace Moses2
17
+ {
18
+
19
+ FeatureFunction::FeatureFunction(size_t startInd, const std::string &line)
20
+ :m_startInd(startInd)
21
+ ,m_numScores(1)
22
+ ,m_PhraseTableInd(NOT_FOUND)
23
+ ,m_tuneable(true)
24
+ {
25
+ ParseLine(line);
26
+ //cerr << GetName() << " " << m_startInd << "-" << (m_startInd + m_numScores - 1) << endl;
27
+ }
28
+
29
+ FeatureFunction::~FeatureFunction()
30
+ {
31
+ // TODO Auto-generated destructor stub
32
+ }
33
+
34
+ void FeatureFunction::ParseLine(const std::string &line)
35
+ {
36
+ vector<string> toks = Tokenize(line);
37
+ UTIL_THROW_IF2(toks.empty(), "Empty line");
38
+
39
+ string nameStub = toks[0];
40
+
41
+ set<string> keys;
42
+
43
+ for (size_t i = 1; i < toks.size(); ++i) {
44
+ vector<string> args = TokenizeFirstOnly(toks[i], "=");
45
+ UTIL_THROW_IF2(args.size() != 2,
46
+ "Incorrect format for feature function arg: " << toks[i]);
47
+
48
+ pair<set<string>::iterator, bool> ret = keys.insert(args[0]);
49
+ UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
50
+
51
+ if (args[0] == "num-features") {
52
+ m_numScores = Scan<size_t>(args[1]);
53
+ } else if (args[0] == "name") {
54
+ m_name = args[1];
55
+ } else {
56
+ m_args.push_back(args);
57
+ }
58
+ }
59
+ }
60
+
61
+ void FeatureFunction::ReadParameters()
62
+ {
63
+ while (!m_args.empty()) {
64
+ const vector<string> &args = m_args[0];
65
+ SetParameter(args[0], args[1]);
66
+
67
+ m_args.erase(m_args.begin());
68
+ }
69
+ }
70
+
71
+ void FeatureFunction::SetParameter(const std::string& key,
72
+ const std::string& value)
73
+ {
74
+ if (key == "tuneable") {
75
+ m_tuneable = Scan<bool>(value);
76
+ } else {
77
+ UTIL_THROW2(GetName() << ": Unknown argument " << key << "=" << value);
78
+ }
79
+ }
80
+
81
+ }
82
+
mosesdecoder/moses2/FF/FeatureFunction.h ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * FeatureFunction.h
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstddef>
11
+ #include <string>
12
+ #include <vector>
13
+ #include "../TypeDef.h"
14
+ #include "../Phrase.h"
15
+
16
+ namespace Moses2
17
+ {
18
+ template<typename WORD>
19
+ class TargetPhrase;
20
+
21
+ class System;
22
+ class PhraseImpl;
23
+ class TargetPhrases;
24
+ class TargetPhraseImpl;
25
+ class Scores;
26
+ class ManagerBase;
27
+ class MemPool;
28
+ class InputType;
29
+
30
+ namespace SCFG
31
+ {
32
+ class TargetPhrase;
33
+ class TargetPhrases;
34
+ class Word;
35
+ }
36
+
37
+ class FeatureFunction
38
+ {
39
+ public:
40
+
41
+ FeatureFunction(size_t startInd, const std::string &line);
42
+ virtual ~FeatureFunction();
43
+ virtual void Load(System &system) {
44
+ }
45
+
46
+ size_t GetStartInd() const {
47
+ return m_startInd;
48
+ }
49
+ size_t GetNumScores() const {
50
+ return m_numScores;
51
+ }
52
+ const std::string &GetName() const {
53
+ return m_name;
54
+ }
55
+ void SetName(const std::string &val) {
56
+ m_name = val;
57
+ }
58
+
59
+ virtual size_t HasPhraseTableInd() const {
60
+ return false;
61
+ }
62
+ void SetPhraseTableInd(size_t ind) {
63
+ m_PhraseTableInd = ind;
64
+ }
65
+ size_t GetPhraseTableInd() const {
66
+ return m_PhraseTableInd;
67
+ }
68
+
69
+ //! if false, then this feature is not displayed in the n-best list.
70
+ // use with care
71
+ virtual bool IsTuneable() const {
72
+ return m_tuneable;
73
+ }
74
+
75
+ virtual void SetParameter(const std::string& key, const std::string& value);
76
+
77
+ // may have more factors than actually need, but not guaranteed.
78
+ virtual void
79
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
80
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
81
+ SCORE &estimatedScore) const = 0;
82
+
83
+ // For SCFG decoding, the source can contain non-terminals, NOT the raw
84
+ // source from the input sentence
85
+ virtual void
86
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
87
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
88
+ SCORE &estimatedScore) const = 0;
89
+
90
+ // used by lexicalised reordering model to add scores to tp data structures
91
+ virtual void EvaluateAfterTablePruning(MemPool &pool,
92
+ const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const {
93
+ }
94
+
95
+ virtual void EvaluateAfterTablePruning(MemPool &pool,
96
+ const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const {
97
+ }
98
+
99
+ virtual void InitializeForInput(const ManagerBase &mgr, const InputType &input) { };
100
+
101
+ // clean up temporary memory, called after processing each sentence
102
+ virtual void CleanUpAfterSentenceProcessing(const System &system, const InputType &input) const {
103
+ }
104
+
105
+ protected:
106
+ size_t m_startInd;
107
+ size_t m_numScores;
108
+ size_t m_PhraseTableInd;
109
+ std::string m_name;
110
+ std::vector<std::vector<std::string> > m_args;
111
+ bool m_tuneable;
112
+
113
+ virtual void ReadParameters();
114
+ void ParseLine(const std::string &line);
115
+ };
116
+
117
+ }
118
+
mosesdecoder/moses2/FF/FeatureFunctions.cpp ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * FeatureFunctions.cpp
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #include <boost/foreach.hpp>
9
+ #include "FeatureRegistry.h"
10
+ #include "FeatureFunctions.h"
11
+ #include "StatefulFeatureFunction.h"
12
+ #include "../System.h"
13
+ #include "../Scores.h"
14
+ #include "../MemPool.h"
15
+
16
+ #include "../TranslationModel/PhraseTable.h"
17
+ #include "../TranslationModel/UnknownWordPenalty.h"
18
+ #include "../SCFG/TargetPhraseImpl.h"
19
+ #include "../SCFG/Word.h"
20
+ #include "../PhraseBased/TargetPhraseImpl.h"
21
+ #include "util/exception.hh"
22
+
23
+ using namespace std;
24
+
25
+ namespace Moses2
26
+ {
27
+ FeatureFunctions::FeatureFunctions(System &system) :
28
+ m_system(system), m_ffStartInd(0)
29
+ {
30
+ }
31
+
32
+ FeatureFunctions::~FeatureFunctions()
33
+ {
34
+ RemoveAllInColl(m_featureFunctions);
35
+ }
36
+
37
+ void FeatureFunctions::Load()
38
+ {
39
+ // load, everything but pts
40
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
41
+ FeatureFunction *nonConstFF = const_cast<FeatureFunction*>(ff);
42
+ PhraseTable *pt = dynamic_cast<PhraseTable*>(nonConstFF);
43
+
44
+ if (pt) {
45
+ // do nothing. load pt last
46
+ } else {
47
+ cerr << "Loading " << nonConstFF->GetName() << endl;
48
+ nonConstFF->Load(m_system);
49
+ cerr << "Finished loading " << nonConstFF->GetName() << endl;
50
+ }
51
+ }
52
+
53
+ // load pt
54
+ BOOST_FOREACH(const PhraseTable *pt, phraseTables) {
55
+ PhraseTable *nonConstPT = const_cast<PhraseTable*>(pt);
56
+ cerr << "Loading " << nonConstPT->GetName() << endl;
57
+ nonConstPT->Load(m_system);
58
+ cerr << "Finished loading " << nonConstPT->GetName() << endl;
59
+ }
60
+ }
61
+
62
+ void FeatureFunctions::Create()
63
+ {
64
+ const Parameter &params = m_system.params;
65
+
66
+ const PARAM_VEC *ffParams = params.GetParam("feature");
67
+ UTIL_THROW_IF2(ffParams == NULL, "Must have [feature] section");
68
+
69
+ BOOST_FOREACH(const std::string &line, *ffParams) {
70
+ FeatureFunction *ff = Create(line);
71
+
72
+ m_featureFunctions.push_back(ff);
73
+
74
+ StatefulFeatureFunction *sfff = dynamic_cast<StatefulFeatureFunction*>(ff);
75
+ if (sfff) {
76
+ sfff->SetStatefulInd(m_statefulFeatureFunctions.size());
77
+ m_statefulFeatureFunctions.push_back(sfff);
78
+ }
79
+
80
+ if (ff->HasPhraseTableInd()) {
81
+ ff->SetPhraseTableInd(m_withPhraseTableInd.size());
82
+ m_withPhraseTableInd.push_back(ff);
83
+ }
84
+
85
+ PhraseTable *pt = dynamic_cast<PhraseTable*>(ff);
86
+ if (pt) {
87
+ pt->SetPtInd(phraseTables.size());
88
+ phraseTables.push_back(pt);
89
+ }
90
+
91
+ UnknownWordPenalty *unkWP = dynamic_cast<UnknownWordPenalty *>(pt);
92
+ if (unkWP) {
93
+ m_unkWP = unkWP;
94
+
95
+ // legacy support
96
+ if (m_system.options.unk.drop) {
97
+ unkWP->SetParameter("drop", "true");
98
+ }
99
+ if (m_system.options.unk.mark) {
100
+ unkWP->SetParameter("prefix", m_system.options.unk.prefix);
101
+ unkWP->SetParameter("suffix", m_system.options.unk.suffix);
102
+ }
103
+ }
104
+ }
105
+
106
+ OverrideFeatures();
107
+ }
108
+
109
+ FeatureFunction *FeatureFunctions::Create(const std::string &line)
110
+ {
111
+ vector<string> toks = Tokenize(line);
112
+
113
+ FeatureFunction *ff = FeatureRegistry::Instance().Construct(m_ffStartInd, toks[0], line);
114
+ UTIL_THROW_IF2(ff == NULL, "Feature function not created");
115
+
116
+ // name
117
+ if (ff->GetName() == "") {
118
+ ff->SetName(GetDefaultName(toks[0]));
119
+ }
120
+
121
+ m_ffStartInd += ff->GetNumScores();
122
+
123
+ return ff;
124
+ }
125
+
126
+ std::string FeatureFunctions::GetDefaultName(const std::string &stub)
127
+ {
128
+ size_t ind;
129
+ boost::unordered_map<std::string, size_t>::iterator iter =
130
+ m_defaultNames.find(stub);
131
+ if (iter == m_defaultNames.end()) {
132
+ m_defaultNames[stub] = 0;
133
+ ind = 0;
134
+ } else {
135
+ ind = ++(iter->second);
136
+ }
137
+ return stub + SPrint(ind);
138
+ }
139
+
140
+ const FeatureFunction *FeatureFunctions::FindFeatureFunction(
141
+ const std::string &name) const
142
+ {
143
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
144
+ if (ff->GetName() == name) {
145
+ return ff;
146
+ }
147
+ }
148
+ return NULL;
149
+ }
150
+
151
+ FeatureFunction *FeatureFunctions::FindFeatureFunction(
152
+ const std::string &name)
153
+ {
154
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
155
+ if (ff->GetName() == name) {
156
+ return const_cast<FeatureFunction *>(ff);
157
+ }
158
+ }
159
+ return NULL;
160
+ }
161
+
162
+ const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
163
+ {
164
+ // assume only 1 unk wp
165
+ std::vector<const PhraseTable*> tmpVec(phraseTables);
166
+ std::vector<const PhraseTable*>::iterator iter;
167
+ for (iter = tmpVec.begin(); iter != tmpVec.end(); ++iter) {
168
+ const PhraseTable *pt = *iter;
169
+ if (pt == m_unkWP) {
170
+ tmpVec.erase(iter);
171
+ break;
172
+ }
173
+ }
174
+
175
+ const PhraseTable *pt = tmpVec[ptInd];
176
+ return pt;
177
+ }
178
+
179
+ void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system,
180
+ const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const
181
+ {
182
+ SCORE estimatedScore = 0;
183
+
184
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
185
+ Scores& scores = targetPhrase.GetScores();
186
+ ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
187
+ }
188
+
189
+ targetPhrase.SetEstimatedScore(estimatedScore);
190
+ }
191
+
192
+ void FeatureFunctions::EvaluateInIsolation(
193
+ MemPool &pool,
194
+ const System &system,
195
+ const Phrase<SCFG::Word> &source,
196
+ SCFG::TargetPhraseImpl &targetPhrase) const
197
+ {
198
+ SCORE estimatedScore = 0;
199
+
200
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
201
+ Scores& scores = targetPhrase.GetScores();
202
+ ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
203
+ }
204
+
205
+ targetPhrase.SetEstimatedScore(estimatedScore);
206
+ }
207
+
208
+ void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool,
209
+ const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
210
+ {
211
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
212
+ ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
213
+ }
214
+ }
215
+
216
+ void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
217
+ const Phrase<SCFG::Word> &sourcePhrase) const
218
+ {
219
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
220
+ ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
221
+ }
222
+ }
223
+
224
+ void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const
225
+ {
226
+ BOOST_FOREACH(const StatefulFeatureFunction *ff, m_statefulFeatureFunctions) {
227
+ ff->EvaluateWhenAppliedBatch(m_system, batch);
228
+ }
229
+ }
230
+
231
+ void FeatureFunctions::InitializeForInput(const ManagerBase &mgr, const InputType &input)
232
+ {
233
+ BOOST_FOREACH(FeatureFunction *ff, m_featureFunctions) {
234
+ ff->InitializeForInput(mgr, input);
235
+ }
236
+ }
237
+
238
+ void FeatureFunctions::CleanUpAfterSentenceProcessing(const InputType &input) const
239
+ {
240
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
241
+ ff->CleanUpAfterSentenceProcessing(m_system, input);
242
+ }
243
+ }
244
+
245
+ void FeatureFunctions::ShowWeights(const Weights &allWeights)
246
+ {
247
+ BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
248
+ cout << ff->GetName();
249
+ if (ff->IsTuneable()) {
250
+ cout << "=";
251
+ vector<SCORE> weights = allWeights.GetWeights(*ff);
252
+ for (size_t i = 0; i < weights.size(); ++i) {
253
+ cout << " " << weights[i];
254
+ }
255
+ cout << endl;
256
+ } else {
257
+ cout << " UNTUNEABLE" << endl;
258
+ }
259
+ }
260
+ }
261
+
262
+ void FeatureFunctions::OverrideFeatures()
263
+ {
264
+ const Parameter &parameter = m_system.params;
265
+
266
+ const PARAM_VEC *params = parameter.GetParam("feature-overwrite");
267
+ for (size_t i = 0; params && i < params->size(); ++i) {
268
+ const string &str = params->at(i);
269
+ vector<string> toks = Tokenize(str);
270
+ UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
271
+
272
+ FeatureFunction *ff = FindFeatureFunction(toks[0]);
273
+ UTIL_THROW_IF2(ff == NULL, "Feature function not found: " << toks[0]);
274
+
275
+ for (size_t j = 1; j < toks.size(); ++j) {
276
+ const string &keyValStr = toks[j];
277
+ vector<string> keyVal = Tokenize(keyValStr, "=");
278
+ UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
279
+
280
+ cerr << "Override " << ff->GetName() << " "
281
+ << keyVal[0] << "=" << keyVal[1] << endl;
282
+
283
+ ff->SetParameter(keyVal[0], keyVal[1]);
284
+
285
+ }
286
+ }
287
+
288
+ }
289
+
290
+ }
291
+
mosesdecoder/moses2/FF/FeatureFunctions.h ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * FeatureFunctions.h
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <boost/unordered_map.hpp>
11
+ #include <vector>
12
+ #include <string>
13
+ #include "../legacy/Parameter.h"
14
+ #include "../Phrase.h"
15
+
16
+ namespace Moses2
17
+ {
18
+ template<typename WORD>
19
+ class TargetPhrase;
20
+
21
+ class System;
22
+ class FeatureFunction;
23
+ class StatefulFeatureFunction;
24
+ class PhraseTable;
25
+ class Manager;
26
+ class MemPool;
27
+ class PhraseImpl;
28
+ class TargetPhrases;
29
+ class TargetPhraseImpl;
30
+ class Scores;
31
+ class Hypothesis;
32
+ class UnknownWordPenalty;
33
+ class Weights;
34
+ class InputType;
35
+
36
+ namespace SCFG
37
+ {
38
+ class TargetPhraseImpl;
39
+ class TargetPhrases;
40
+ class Word;
41
+ }
42
+
43
+ class FeatureFunctions
44
+ {
45
+ public:
46
+ std::vector<const PhraseTable*> phraseTables;
47
+
48
+ FeatureFunctions(System &system);
49
+ virtual ~FeatureFunctions();
50
+
51
+ const std::vector<FeatureFunction*> &GetFeatureFunctions() const {
52
+ return m_featureFunctions;
53
+ }
54
+
55
+ const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const {
56
+ return m_statefulFeatureFunctions;
57
+ }
58
+
59
+ const std::vector<const FeatureFunction*> &GetWithPhraseTableInd() const {
60
+ return m_withPhraseTableInd;
61
+ }
62
+
63
+ size_t GetNumScores() const {
64
+ return m_ffStartInd;
65
+ }
66
+
67
+ void Create();
68
+ void Load();
69
+
70
+ const FeatureFunction *FindFeatureFunction(const std::string &name) const;
71
+
72
+ const PhraseTable *GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd);
73
+ const UnknownWordPenalty *GetUnknownWordPenalty() const {
74
+ return m_unkWP;
75
+ }
76
+
77
+ // the pool here must be the system pool if the rule was loaded during load, or the mgr pool if it was loaded on demand
78
+ void EvaluateInIsolation(MemPool &pool, const System &system,
79
+ const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const;
80
+ void EvaluateInIsolation(MemPool &pool, const System &system,
81
+ const Phrase<SCFG::Word> &source, SCFG::TargetPhraseImpl &targetPhrase) const;
82
+
83
+ void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
84
+ const Phrase<Moses2::Word> &sourcePhrase) const;
85
+ void EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
86
+ const Phrase<SCFG::Word> &sourcePhrase) const;
87
+
88
+ void EvaluateWhenAppliedBatch(const Batch &batch) const;
89
+
90
+ void InitializeForInput(const ManagerBase &mgr, const InputType &input);
91
+ void CleanUpAfterSentenceProcessing(const InputType &input) const;
92
+
93
+ void ShowWeights(const Weights &allWeights);
94
+
95
+ protected:
96
+ std::vector<FeatureFunction*> m_featureFunctions;
97
+ std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
98
+ std::vector<const FeatureFunction*> m_withPhraseTableInd;
99
+ const UnknownWordPenalty *m_unkWP;
100
+
101
+ boost::unordered_map<std::string, size_t> m_defaultNames;
102
+ System &m_system;
103
+ size_t m_ffStartInd;
104
+
105
+ FeatureFunction *Create(const std::string &line);
106
+ std::string GetDefaultName(const std::string &stub);
107
+ void OverrideFeatures();
108
+ FeatureFunction *FindFeatureFunction(const std::string &name);
109
+
110
+ };
111
+
112
+ }
113
+
mosesdecoder/moses2/FF/FeatureRegistry.cpp ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "FeatureRegistry.h"
2
+
3
+ #include "../TranslationModel/Memory/PhraseTableMemory.h"
4
+ #include "../TranslationModel/ProbingPT.h"
5
+ #include "../TranslationModel/UnknownWordPenalty.h"
6
+ #include "../TranslationModel/Transliteration.h"
7
+ #include "../TranslationModel/Dynamic/DynamicPhraseTable.h"
8
+
9
+ #include "../LM/KENLM.h"
10
+ #include "../LM/KENLMBatch.h"
11
+ #include "../LM/LanguageModel.h"
12
+ #include "../LM/GPULM.h"
13
+
14
+ #include "Distortion.h"
15
+ #include "LexicalReordering/LexicalReordering.h"
16
+ #include "PhrasePenalty.h"
17
+ #include "WordPenalty.h"
18
+ #include "OSM/OpSequenceModel.h"
19
+
20
+ #include "ExampleStatefulFF.h"
21
+ #include "ExampleStatelessFF.h"
22
+
23
+ using namespace std;
24
+
25
+
26
+ namespace Moses2
27
+ {
28
+ FeatureRegistry FeatureRegistry::s_instance;
29
+
30
+ template<class F>
31
+ class DefaultFeatureFactory: public FeatureFactory
32
+ {
33
+ public:
34
+ FeatureFunction *Create(size_t startInd, const std::string &line) const {
35
+ return new F(startInd, line);
36
+ }
37
+ };
38
+
39
+ ////////////////////////////////////////////////////////////////////
40
+ class KenFactory: public FeatureFactory
41
+ {
42
+ public:
43
+ FeatureFunction *Create(size_t startInd, const std::string &line) const {
44
+ return ConstructKenLM(startInd, line);
45
+ }
46
+ };
47
+
48
+ ////////////////////////////////////////////////////////////////////
49
+ FeatureRegistry::FeatureRegistry()
50
+ {
51
+ // Feature with same name as class
52
+ #define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
53
+ // Feature with different name than class.
54
+ #define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
55
+
56
+ MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory);
57
+ MOSES_FNAME(ProbingPT);
58
+ MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration);
59
+ MOSES_FNAME(UnknownWordPenalty);
60
+ MOSES_FNAME(DynamicPhraseTable);
61
+
62
+ Add("KENLM", new KenFactory());
63
+
64
+ MOSES_FNAME(KENLMBatch);
65
+ MOSES_FNAME(GPULM);
66
+
67
+ MOSES_FNAME(LanguageModel);
68
+
69
+ MOSES_FNAME(Distortion);
70
+ MOSES_FNAME(LexicalReordering);
71
+ MOSES_FNAME(PhrasePenalty);
72
+ MOSES_FNAME(WordPenalty);
73
+ MOSES_FNAME(OpSequenceModel);
74
+
75
+ MOSES_FNAME(ExampleStatefulFF);
76
+ MOSES_FNAME(ExampleStatelessFF);
77
+ }
78
+
79
+ FeatureRegistry::~FeatureRegistry()
80
+ {
81
+
82
+ }
83
+
84
+ void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
85
+ {
86
+ std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name,
87
+ boost::shared_ptr<FeatureFactory>(factory));
88
+ if (!registry_.insert(to_ins).second) {
89
+ cerr << "Duplicate feature name " << name << endl;
90
+ abort();
91
+ }
92
+ }
93
+
94
+ FeatureFunction *FeatureRegistry::Construct(size_t startInd,
95
+ const std::string &name, const std::string &line) const
96
+ {
97
+ Map::const_iterator i = registry_.find(name);
98
+ if (i == registry_.end()) {
99
+ cerr << "Feature name " << name << " is not registered.";
100
+ abort();
101
+ }
102
+ FeatureFactory *fact = i->second.get();
103
+ FeatureFunction *ff = fact->Create(startInd, line);
104
+ return ff;
105
+ }
106
+
107
+ void FeatureRegistry::PrintFF() const
108
+ {
109
+ std::vector<std::string> ffs;
110
+ std::cerr << "Available feature functions:" << std::endl;
111
+ Map::const_iterator iter;
112
+ for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
113
+ const std::string &ffName = iter->first;
114
+ ffs.push_back(ffName);
115
+ }
116
+
117
+ std::vector<std::string>::const_iterator iterVec;
118
+ std::sort(ffs.begin(), ffs.end());
119
+ for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
120
+ const std::string &ffName = *iterVec;
121
+ std::cerr << ffName << " ";
122
+ }
123
+
124
+ std::cerr << std::endl;
125
+ }
126
+
127
+ }
128
+
mosesdecoder/moses2/FF/FeatureRegistry.h ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include <boost/unordered_map.hpp>
3
+ #include <boost/shared_ptr.hpp>
4
+
5
+ namespace Moses2
6
+ {
7
+ class FeatureFunction;
8
+
9
+ ////////////////////////////////////////////////////////////////////
10
+ class FeatureFactory
11
+ {
12
+ public:
13
+ virtual ~FeatureFactory() {
14
+ }
15
+
16
+ virtual FeatureFunction *Create(size_t startInd, const std::string &line) const = 0;
17
+
18
+ protected:
19
+ FeatureFactory() {
20
+ }
21
+ };
22
+
23
+ ////////////////////////////////////////////////////////////////////
24
+ class FeatureRegistry
25
+ {
26
+ public:
27
+ static const FeatureRegistry &Instance() {
28
+ return s_instance;
29
+ }
30
+
31
+ ~FeatureRegistry();
32
+
33
+ FeatureFunction *Construct(size_t startInd, const std::string &name,
34
+ const std::string &line) const;
35
+ void PrintFF() const;
36
+
37
+ private:
38
+ static FeatureRegistry s_instance;
39
+
40
+ typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
41
+ Map registry_;
42
+
43
+ FeatureRegistry();
44
+
45
+ void Add(const std::string &name, FeatureFactory *factory);
46
+
47
+ };
48
+
49
+ ////////////////////////////////////////////////////////////////////
50
+
51
+ }
52
+
mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.cpp ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * BidirectionalReorderingState.cpp
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #include <boost/functional/hash_fwd.hpp>
8
+ #include "BidirectionalReorderingState.h"
9
+ #include "../../legacy/Util2.h"
10
+ #include "../../PhraseBased/Manager.h"
11
+
12
+ using namespace std;
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ BidirectionalReorderingState::BidirectionalReorderingState(
18
+ const LRModel &config, LRState *bw, LRState *fw, size_t offset) :
19
+ LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward(
20
+ fw)
21
+ {
22
+ }
23
+
24
+ BidirectionalReorderingState::~BidirectionalReorderingState()
25
+ {
26
+ // TODO Auto-generated destructor stub
27
+ }
28
+
29
+ void BidirectionalReorderingState::Init(const LRState *prev,
30
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
31
+ const Bitmap *coverage)
32
+ {
33
+ if (m_backward) {
34
+ m_backward->Init(prev, topt, path, first, coverage);
35
+ }
36
+ if (m_forward) {
37
+ m_forward->Init(prev, topt, path, first, coverage);
38
+ }
39
+ }
40
+
41
+ std::string BidirectionalReorderingState::ToString() const
42
+ {
43
+ return "BidirectionalReorderingState " + SPrint(this) + " "
44
+ + SPrint(m_backward) + " " + SPrint(m_forward);
45
+ }
46
+
47
+ size_t BidirectionalReorderingState::hash() const
48
+ {
49
+ size_t ret = m_backward->hash();
50
+ boost::hash_combine(ret, m_forward->hash());
51
+
52
+ return ret;
53
+ }
54
+
55
+ bool BidirectionalReorderingState::operator==(const FFState& o) const
56
+ {
57
+ if (&o == this) return true;
58
+
59
+ BidirectionalReorderingState const &other =
60
+ static_cast<BidirectionalReorderingState const&>(o);
61
+
62
+ bool ret = (*m_backward == *other.m_backward)
63
+ && (*m_forward == *other.m_forward);
64
+ return ret;
65
+ }
66
+
67
+ void BidirectionalReorderingState::Expand(const ManagerBase &mgr,
68
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
69
+ Scores &scores, FFState &state) const
70
+ {
71
+ BidirectionalReorderingState &stateCast =
72
+ static_cast<BidirectionalReorderingState&>(state);
73
+ m_backward->Expand(mgr, ff, hypo, phraseTableInd, scores,
74
+ *stateCast.m_backward);
75
+ m_forward->Expand(mgr, ff, hypo, phraseTableInd, scores,
76
+ *stateCast.m_forward);
77
+ }
78
+
79
+ } /* namespace Moses2 */
mosesdecoder/moses2/FF/LexicalReordering/BidirectionalReorderingState.h ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * BidirectionalReorderingState.h
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include "LRState.h"
9
+
10
+ namespace Moses2
11
+ {
12
+
13
+ class BidirectionalReorderingState: public LRState
14
+ {
15
+ public:
16
+ BidirectionalReorderingState(const LRModel &config, LRState *bw, LRState *fw,
17
+ size_t offset);
18
+
19
+ virtual ~BidirectionalReorderingState();
20
+
21
+ void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
22
+ const InputPathBase &path, bool first, const Bitmap *coverage);
23
+
24
+ size_t hash() const;
25
+ virtual bool operator==(const FFState& other) const;
26
+
27
+ virtual std::string ToString() const;
28
+
29
+ void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
30
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
31
+ FFState &state) const;
32
+
33
+ protected:
34
+ LRState *m_backward;
35
+ LRState *m_forward;
36
+
37
+ };
38
+
39
+ } /* namespace Moses2 */
40
+
mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.cpp ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * HReorderingBackwardState.cpp
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "HReorderingBackwardState.h"
9
+ #include "../../PhraseBased/Hypothesis.h"
10
+ #include "../../PhraseBased/Manager.h"
11
+
12
+ namespace Moses2
13
+ {
14
+
15
+ HReorderingBackwardState::HReorderingBackwardState(MemPool &pool,
16
+ const LRModel &config, size_t offset) :
17
+ LRState(config, LRModel::Backward, offset), reoStack(pool)
18
+ {
19
+ // TODO Auto-generated constructor stub
20
+
21
+ }
22
+
23
+ HReorderingBackwardState::~HReorderingBackwardState()
24
+ {
25
+ // TODO Auto-generated destructor stub
26
+ }
27
+
28
+ void HReorderingBackwardState::Init(const LRState *prev,
29
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
30
+ const Bitmap *coverage)
31
+ {
32
+ prevTP = &topt;
33
+ reoStack.Init();
34
+ }
35
+
36
+ size_t HReorderingBackwardState::hash() const
37
+ {
38
+ size_t ret = reoStack.hash();
39
+ return ret;
40
+ }
41
+
42
+ bool HReorderingBackwardState::operator==(const FFState& o) const
43
+ {
44
+ const HReorderingBackwardState& other =
45
+ static_cast<const HReorderingBackwardState&>(o);
46
+ bool ret = reoStack == other.reoStack;
47
+ return ret;
48
+ }
49
+
50
+ std::string HReorderingBackwardState::ToString() const
51
+ {
52
+ return "HReorderingBackwardState " + SPrint(m_offset);
53
+ }
54
+
55
+ void HReorderingBackwardState::Expand(const ManagerBase &mgr,
56
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
57
+ Scores &scores, FFState &state) const
58
+ {
59
+ HReorderingBackwardState &nextState =
60
+ static_cast<HReorderingBackwardState&>(state);
61
+ nextState.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
62
+ NULL);
63
+ nextState.reoStack = reoStack;
64
+
65
+ const Range &swrange = hypo.GetInputPath().range;
66
+ int reoDistance = nextState.reoStack.ShiftReduce(swrange);
67
+ ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
68
+ CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
69
+ }
70
+
71
+ } /* namespace Moses2 */
mosesdecoder/moses2/FF/LexicalReordering/HReorderingBackwardState.h ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * HReorderingBackwardState.h
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include "LRState.h"
9
+ #include "ReorderingStack.h"
10
+
11
+ namespace Moses2
12
+ {
13
+
14
+ class HReorderingBackwardState: public LRState
15
+ {
16
+ private:
17
+ ReorderingStack reoStack;
18
+
19
+ public:
20
+ HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset);
21
+
22
+ virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
23
+ const InputPathBase &path, bool first, const Bitmap *coverage);
24
+
25
+ virtual ~HReorderingBackwardState();
26
+
27
+ size_t hash() const;
28
+ virtual bool operator==(const FFState& other) const;
29
+ virtual std::string ToString() const;
30
+ void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
31
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
32
+ FFState &state) const;
33
+
34
+ };
35
+
36
+ } /* namespace Moses2 */
37
+
mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.cpp ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * HReorderingForwardState.cpp
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "HReorderingForwardState.h"
9
+ #include "../../InputPathBase.h"
10
+ #include "../../PhraseBased/Manager.h"
11
+ #include "../../PhraseBased/Hypothesis.h"
12
+
13
+ namespace Moses2
14
+ {
15
+
16
+ HReorderingForwardState::HReorderingForwardState(const LRModel &config,
17
+ size_t offset) :
18
+ LRState(config, LRModel::Forward, offset), m_first(true)
19
+ {
20
+ prevPath = NULL;
21
+ m_coverage = NULL;
22
+ }
23
+
24
+ HReorderingForwardState::~HReorderingForwardState()
25
+ {
26
+ // TODO Auto-generated destructor stub
27
+ }
28
+
29
+ void HReorderingForwardState::Init(const LRState *prev,
30
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
31
+ const Bitmap *coverage)
32
+ {
33
+ prevTP = &topt;
34
+ prevPath = &path;
35
+ m_first = first;
36
+ m_coverage = coverage;
37
+ }
38
+
39
+ size_t HReorderingForwardState::hash() const
40
+ {
41
+ size_t ret;
42
+ ret = hash_value(prevPath->range);
43
+ return ret;
44
+ }
45
+
46
+ bool HReorderingForwardState::operator==(const FFState& o) const
47
+ {
48
+ if (&o == this) return true;
49
+
50
+ HReorderingForwardState const& other =
51
+ static_cast<HReorderingForwardState const&>(o);
52
+
53
+ int compareScores = (
54
+ (prevPath->range == other.prevPath->range) ?
55
+ ComparePrevScores(other.prevTP) :
56
+ (prevPath->range < other.prevPath->range) ? -1 : 1);
57
+ return compareScores == 0;
58
+ }
59
+
60
+ std::string HReorderingForwardState::ToString() const
61
+ {
62
+ return "HReorderingForwardState " + SPrint(m_offset);
63
+ }
64
+
65
+ void HReorderingForwardState::Expand(const ManagerBase &mgr,
66
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
67
+ Scores &scores, FFState &state) const
68
+ {
69
+ const Range &cur = hypo.GetInputPath().range;
70
+ // keep track of the current coverage ourselves so we don't need the hypothesis
71
+ Manager &mgrCast = const_cast<Manager&>(static_cast<const Manager&>(mgr));
72
+ Bitmaps &bms = mgrCast.GetBitmaps();
73
+ const Bitmap &cov = bms.GetBitmap(*m_coverage, cur);
74
+
75
+ if (!m_first) {
76
+ LRModel::ReorderingType reoType;
77
+ reoType = m_configuration.GetOrientation(prevPath->range, cur, cov);
78
+ CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
79
+ }
80
+
81
+ HReorderingForwardState &stateCast =
82
+ static_cast<HReorderingForwardState&>(state);
83
+ stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
84
+ &cov);
85
+ }
86
+
87
+ } /* namespace Moses2 */
mosesdecoder/moses2/FF/LexicalReordering/HReorderingForwardState.h ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * HReorderingForwardState.h
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include "LRState.h"
9
+
10
+ namespace Moses2
11
+ {
12
+ class Range;
13
+ class Bitmap;
14
+ class InputPathBase;
15
+
16
+ class HReorderingForwardState: public LRState
17
+ {
18
+ public:
19
+ HReorderingForwardState(const LRModel &config, size_t offset);
20
+ virtual ~HReorderingForwardState();
21
+
22
+ void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
23
+ const InputPathBase &path, bool first, const Bitmap *coverage);
24
+
25
+ size_t hash() const;
26
+ virtual bool operator==(const FFState& other) const;
27
+ virtual std::string ToString() const;
28
+ void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
29
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
30
+ FFState &state) const;
31
+
32
+ protected:
33
+ bool m_first;
34
+ //const Range &m_prevRange;
35
+ const InputPathBase *prevPath;
36
+ const Bitmap *m_coverage;
37
+
38
+ };
39
+
40
+ } /* namespace Moses2 */
41
+
mosesdecoder/moses2/FF/LexicalReordering/LRModel.cpp ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * LRModel.cpp
3
+ *
4
+ * Created on: 23 Mar 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "LRModel.h"
9
+ #include "../../legacy/Util2.h"
10
+ #include "../../legacy/Range.h"
11
+ #include "../../legacy/Bitmap.h"
12
+ #include "../../MemPool.h"
13
+ #include "util/exception.hh"
14
+ #include "PhraseBasedReorderingState.h"
15
+ #include "BidirectionalReorderingState.h"
16
+ #include "HReorderingBackwardState.h"
17
+ #include "HReorderingForwardState.h"
18
+
19
+ using namespace std;
20
+
21
+ namespace Moses2
22
+ {
23
+
24
+ bool IsMonotonicStep(Range const& prev, // words range of last source phrase
25
+ Range const& cur, // words range of current source phrase
26
+ Bitmap const& cov) // coverage bitmap
27
+ {
28
+ size_t e = prev.GetEndPos() + 1;
29
+ size_t s = cur.GetStartPos();
30
+ return (s == e || (s >= e && !cov.GetValue(e)));
31
+ }
32
+
33
+ bool IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
34
+ {
35
+ size_t s = prev.GetStartPos();
36
+ size_t e = cur.GetEndPos();
37
+ return (e + 1 == s || (e < s && !cov.GetValue(s - 1)));
38
+ }
39
+
40
+ LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
41
+ m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(
42
+ Backward), m_scoreProducer(&ff)
43
+ {
44
+ std::vector<std::string> config = Tokenize(modelType, "-");
45
+
46
+ for (size_t i = 0; i < config.size(); ++i) {
47
+ if (config[i] == "hier") {
48
+ m_phraseBased = false;
49
+ } else if (config[i] == "phrase") {
50
+ m_phraseBased = true;
51
+ } else if (config[i] == "wbe") {
52
+ m_phraseBased = true;
53
+ }
54
+ // no word-based decoding available, fall-back to phrase-based
55
+ // This is the old lexical reordering model combination of moses
56
+
57
+ else if (config[i] == "msd") {
58
+ m_modelType = MSD;
59
+ } else if (config[i] == "mslr") {
60
+ m_modelType = MSLR;
61
+ } else if (config[i] == "monotonicity") {
62
+ m_modelType = Monotonic;
63
+ } else if (config[i] == "leftright") {
64
+ m_modelType = LeftRight;
65
+ }
66
+
67
+ // unidirectional is deprecated, use backward instead
68
+ else if (config[i] == "unidirectional") {
69
+ m_direction = Backward;
70
+ } else if (config[i] == "backward") {
71
+ m_direction = Backward;
72
+ } else if (config[i] == "forward") {
73
+ m_direction = Forward;
74
+ } else if (config[i] == "bidirectional") {
75
+ m_direction = Bidirectional;
76
+ }
77
+
78
+ else if (config[i] == "f") {
79
+ m_condition = F;
80
+ } else if (config[i] == "fe") {
81
+ m_condition = FE;
82
+ }
83
+
84
+ else if (config[i] == "collapseff") {
85
+ m_collapseScores = true;
86
+ } else if (config[i] == "allff") {
87
+ m_collapseScores = false;
88
+ } else {
89
+ std::cerr
90
+ << "Illegal part in the lexical reordering configuration string: "
91
+ << config[i] << std::endl;
92
+ exit(1);
93
+ }
94
+ }
95
+
96
+ if (m_modelType == None) {
97
+ std::cerr << "You need to specify the type of the reordering model "
98
+ << "(msd, monotonicity,...)" << std::endl;
99
+ exit(1);
100
+ }
101
+
102
+ }
103
+
104
+ LRModel::~LRModel()
105
+ {
106
+ // TODO Auto-generated destructor stub
107
+ }
108
+
109
+ size_t LRModel::GetNumberOfTypes() const
110
+ {
111
+ return ((m_modelType == MSD) ? 3 : (m_modelType == MSLR) ? 4 : 2);
112
+ }
113
+
114
+ /// return orientation for the first phrase
115
+ LRModel::ReorderingType LRModel::GetOrientation(Range const& cur) const
116
+ {
117
+ UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
118
+ return ((m_modelType == LeftRight) ? R : (cur.GetStartPos() == 0) ? M :
119
+ (m_modelType == MSD) ? D : (m_modelType == MSLR) ? DR : NM);
120
+ }
121
+
122
+ LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
123
+ Range const& cur) const
124
+ {
125
+ UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
126
+ return (
127
+ (m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
128
+ : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M :
129
+ (m_modelType == Monotonic) ? NM :
130
+ (prev.GetStartPos() == cur.GetEndPos() + 1) ? S :
131
+ (m_modelType == MSD) ? D :
132
+ (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
133
+ }
134
+
135
+ LRModel::ReorderingType LRModel::GetOrientation(int const reoDistance) const
136
+ {
137
+ // this one is for HierarchicalReorderingBackwardState
138
+ return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L
139
+ : (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM :
140
+ (reoDistance == -1) ? S : (m_modelType == MSD) ? D :
141
+ (reoDistance > 1) ? DR : DL);
142
+ }
143
+
144
+ LRState *LRModel::CreateLRState(MemPool &pool) const
145
+ {
146
+ LRState *bwd = NULL, *fwd = NULL;
147
+ size_t offset = 0;
148
+
149
+ switch (m_direction) {
150
+ case Backward:
151
+ case Bidirectional:
152
+ if (m_phraseBased) {
153
+ bwd =
154
+ new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
155
+ *this, Backward, offset);
156
+ //cerr << "bwd=" << bwd << bwd->ToString() << endl;
157
+ } else {
158
+ bwd =
159
+ new (pool.Allocate<HReorderingBackwardState>()) HReorderingBackwardState(
160
+ pool, *this, offset);
161
+ }
162
+ offset += m_collapseScores ? 1 : GetNumberOfTypes();
163
+ if (m_direction == Backward) return bwd; // else fall through
164
+ case Forward:
165
+ if (m_phraseBased) {
166
+ fwd =
167
+ new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
168
+ *this, Forward, offset);
169
+ //cerr << "fwd=" << fwd << fwd->ToString() << endl;
170
+ } else {
171
+ fwd =
172
+ new (pool.Allocate<HReorderingForwardState>()) HReorderingForwardState(
173
+ *this, offset);
174
+ }
175
+ offset += m_collapseScores ? 1 : GetNumberOfTypes();
176
+ if (m_direction == Forward) return fwd;
177
+ }
178
+
179
+ //cerr << "LRStates:" << *bwd << endl << *fwd << endl;
180
+ BidirectionalReorderingState *ret =
181
+ new (pool.Allocate<BidirectionalReorderingState>()) BidirectionalReorderingState(
182
+ *this, bwd, fwd, 0);
183
+ return ret;
184
+ }
185
+
186
+ LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
187
+ Range const& cur, Bitmap const& cov) const
188
+ {
189
+ return (
190
+ (m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L
191
+ : IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM :
192
+ IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D :
193
+ cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
194
+ }
195
+
196
+ } /* namespace Moses2 */
mosesdecoder/moses2/FF/LexicalReordering/LRModel.h ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * LRModel.h
3
+ *
4
+ * Created on: 23 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include <string>
9
+
10
+ namespace Moses2
11
+ {
12
+
13
+ class MemPool;
14
+ class Range;
15
+ class Bitmap;
16
+ class LRState;
17
+ class LexicalReordering;
18
+
19
+ class LRModel
20
+ {
21
+ public:
22
+ enum ModelType {
23
+ Monotonic, MSD, MSLR, LeftRight, None
24
+ };
25
+ enum Direction {
26
+ Forward, Backward, Bidirectional
27
+ };
28
+ enum Condition {
29
+ F, E, FE
30
+ };
31
+
32
+ enum ReorderingType {
33
+ M = 0, // monotonic
34
+ NM = 1, // non-monotonic
35
+ S = 1, // swap
36
+ D = 2, // discontinuous
37
+ DL = 2, // discontinuous, left
38
+ DR = 3, // discontinuous, right
39
+ R = 0, // right
40
+ L = 1, // left
41
+ MAX = 3, // largest possible
42
+ NONE = 4 // largest possible
43
+ };
44
+
45
+ LRModel(const std::string &modelType, LexicalReordering &ff);
46
+ virtual ~LRModel();
47
+
48
+ ModelType GetModelType() const {
49
+ return m_modelType;
50
+ }
51
+ Direction GetDirection() const {
52
+ return m_direction;
53
+ }
54
+ Condition GetCondition() const {
55
+ return m_condition;
56
+ }
57
+
58
+ bool IsPhraseBased() const {
59
+ return m_phraseBased;
60
+ }
61
+
62
+ bool CollapseScores() const {
63
+ return m_collapseScores;
64
+ }
65
+
66
+ size_t GetNumberOfTypes() const;
67
+
68
+ LexicalReordering*
69
+ GetScoreProducer() const {
70
+ return m_scoreProducer;
71
+ }
72
+
73
+ LRState *CreateLRState(MemPool &pool) const;
74
+
75
+ ReorderingType // for first phrase in phrase-based
76
+ GetOrientation(Range const& cur) const;
77
+
78
+ ReorderingType // for non-first phrases in phrase-based
79
+ GetOrientation(Range const& prev, Range const& cur) const;
80
+
81
+ ReorderingType // for HReorderingForwardState
82
+ GetOrientation(Range const& prev, Range const& cur, Bitmap const& cov) const;
83
+
84
+ ReorderingType // for HReorderingBackwarddState
85
+ GetOrientation(int const reoDistance) const;
86
+
87
+ protected:
88
+
89
+ ModelType m_modelType;
90
+ bool m_phraseBased;
91
+ bool m_collapseScores;
92
+ Direction m_direction;
93
+ Condition m_condition;
94
+ LexicalReordering *m_scoreProducer;
95
+
96
+ };
97
+
98
+ } /* namespace Moses2 */
99
+
mosesdecoder/moses2/FF/LexicalReordering/LRState.cpp ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * LRState.cpp
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #include "LRState.h"
8
+ #include "LexicalReordering.h"
9
+ #include "../../Scores.h"
10
+ #include "../../TargetPhrase.h"
11
+
12
+ using namespace std;
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ class InputType;
18
+
19
+ LRState::LRState(const LRModel &config, LRModel::Direction dir, size_t offset) :
20
+ m_configuration(config), m_direction(dir), m_offset(offset)
21
+ {
22
+ }
23
+
24
+ int LRState::ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const
25
+ {
26
+ LexicalReordering* producer = m_configuration.GetScoreProducer();
27
+ size_t phraseTableInd = producer->GetPhraseTableInd();
28
+ const SCORE *myScores = (const SCORE*) prevTP->ffData[phraseTableInd]; //producer->
29
+ const SCORE *yrScores = (const SCORE*) other->ffData[phraseTableInd]; //producer->
30
+
31
+ if (myScores == yrScores) return 0;
32
+
33
+ // The pointers are NULL if a phrase pair isn't found in the reordering table.
34
+ if (yrScores == NULL) return -1;
35
+ if (myScores == NULL) return 1;
36
+
37
+ size_t stop = m_offset + m_configuration.GetNumberOfTypes();
38
+ for (size_t i = m_offset; i < stop; i++) {
39
+ if ((myScores)[i] < (yrScores)[i]) return -1;
40
+ if ((myScores)[i] > (yrScores)[i]) return 1;
41
+ }
42
+ return 0;
43
+ }
44
+
45
+ void LRState::CopyScores(const System &system, Scores &accum,
46
+ const TargetPhrase<Moses2::Word> &topt, ReorderingType reoType) const
47
+ {
48
+ // don't call this on a bidirectional object
49
+ UTIL_THROW_IF2(
50
+ m_direction != LRModel::Backward && m_direction != LRModel::Forward,
51
+ "Unknown direction: " << m_direction);
52
+
53
+ TargetPhrase<Moses2::Word> const* relevantOpt = (
54
+ (m_direction == LRModel::Backward) ? &topt : prevTP);
55
+
56
+ LexicalReordering* producer = m_configuration.GetScoreProducer();
57
+ size_t phraseTableInd = producer->GetPhraseTableInd();
58
+ const SCORE *cached = (const SCORE*) relevantOpt->ffData[phraseTableInd]; //producer->
59
+
60
+ if (cached == NULL) {
61
+ return;
62
+ }
63
+
64
+ size_t off_remote = m_offset + reoType;
65
+ size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
66
+
67
+ UTIL_THROW_IF2(off_local >= producer->GetNumScores(),
68
+ "offset out of vector bounds!");
69
+
70
+ // look up applicable score from vector of scores
71
+ //UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
72
+ //Scores scores(producer->GetNumScoreComponents(),0);
73
+ SCORE score = cached[off_remote];
74
+ accum.PlusEquals(system, *producer, score, off_local);
75
+
76
+ // else: use default scores (if specified)
77
+ /*
78
+ else if (producer->GetHaveDefaultScores()) {
79
+ Scores scores(producer->GetNumScoreComponents(),0);
80
+ scores[off_local] = producer->GetDefaultScore(off_remote);
81
+ accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
82
+ }
83
+ */
84
+ // note: if no default score, no cost
85
+ /*
86
+ const SparseReordering* sparse = m_configuration.GetSparseReordering();
87
+ if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
88
+ m_direction, accum);
89
+ */
90
+ }
91
+
92
+ }
93
+
mosesdecoder/moses2/FF/LexicalReordering/LRState.h ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include "../FFState.h"
3
+ #include "LRModel.h"
4
+
5
+ namespace Moses2
6
+ {
7
+ template<typename WORD>
8
+ class TargetPhrase;
9
+
10
+ class LexicalReordering;
11
+ class Hypothesis;
12
+ class System;
13
+ class Scores;
14
+ class Bitmap;
15
+ class ManagerBase;
16
+ class InputType;
17
+ class InputPathBase;
18
+ class Word;
19
+
20
+ class LRState: public FFState
21
+ {
22
+ public:
23
+ typedef LRModel::ReorderingType ReorderingType;
24
+ const TargetPhrase<Moses2::Word> *prevTP;
25
+
26
+ LRState(const LRModel &config, LRModel::Direction dir, size_t offset);
27
+
28
+ virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
29
+ const InputPathBase &path, bool first, const Bitmap *coverage) = 0;
30
+
31
+ virtual void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
32
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
33
+ FFState &state) const = 0;
34
+
35
+ void CopyScores(const System &system, Scores &accum, const TargetPhrase<Moses2::Word> &topt,
36
+ ReorderingType reoType) const;
37
+
38
+ protected:
39
+ const LRModel& m_configuration;
40
+ LRModel::Direction m_direction;
41
+ size_t m_offset;
42
+
43
+ int
44
+ ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const;
45
+
46
+ };
47
+
48
+ }
mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.cpp ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * LexicalReordering.cpp
3
+ *
4
+ * Created on: 15 Dec 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #include <boost/foreach.hpp>
9
+ #include "util/exception.hh"
10
+ #include "LexicalReordering.h"
11
+ #include "LRModel.h"
12
+ #include "PhraseBasedReorderingState.h"
13
+ #include "BidirectionalReorderingState.h"
14
+ #include "../../TranslationModel/PhraseTable.h"
15
+ #include "../../System.h"
16
+ #include "../../PhraseBased/PhraseImpl.h"
17
+ #include "../../PhraseBased/Manager.h"
18
+ #include "../../PhraseBased/Hypothesis.h"
19
+ #include "../../PhraseBased/TargetPhrases.h"
20
+ #include "../../PhraseBased/TargetPhraseImpl.h"
21
+ #include "../../legacy/InputFileStream.h"
22
+ #include "../../legacy/Util2.h"
23
+
24
+ #ifdef HAVE_CMPH
25
+ #include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
26
+ #endif
27
+
28
+
29
+ using namespace std;
30
+
31
+ namespace Moses2
32
+ {
33
+
34
+ ///////////////////////////////////////////////////////////////////////
35
+
36
+ LexicalReordering::LexicalReordering(size_t startInd, const std::string &line)
37
+ : StatefulFeatureFunction(startInd, line)
38
+ , m_blank(NULL)
39
+ , m_propertyInd(-1)
40
+ , m_coll(NULL)
41
+ , m_configuration(NULL)
42
+ #ifdef HAVE_CMPH
43
+ , m_compactModel(NULL)
44
+ #endif
45
+ {
46
+ ReadParameters();
47
+ assert(m_configuration);
48
+ //assert(m_numScores == 6);
49
+ }
50
+
51
+ LexicalReordering::~LexicalReordering()
52
+ {
53
+ delete m_coll;
54
+ delete m_configuration;
55
+ #ifdef HAVE_CMPH
56
+ delete m_compactModel;
57
+ #endif
58
+ }
59
+
60
+ void LexicalReordering::Load(System &system)
61
+ {
62
+ MemPool &pool = system.GetSystemPool();
63
+
64
+ if (m_propertyInd >= 0) {
65
+ // Using integrate Lex RO. No loading needed
66
+ #ifdef HAVE_CMPH
67
+ } else if (FileExists(m_path + ".minlexr")) {
68
+ m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr",
69
+ m_FactorsF, m_FactorsE, m_FactorsC);
70
+ m_blank = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, 0);
71
+ #endif
72
+ } else {
73
+ m_coll = new Coll();
74
+ InputFileStream file(m_path);
75
+ string line;
76
+ size_t lineNum = 0;
77
+
78
+ while (getline(file, line)) {
79
+ if (++lineNum % 1000000 == 0) {
80
+ cerr << lineNum << " ";
81
+ }
82
+
83
+ std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
84
+ assert(toks.size() == 3);
85
+ PhraseImpl *source = PhraseImpl::CreateFromString(pool, system.GetVocab(),
86
+ system, toks[0]);
87
+ PhraseImpl *target = PhraseImpl::CreateFromString(pool, system.GetVocab(),
88
+ system, toks[1]);
89
+ std::vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
90
+ std::transform(scores.begin(), scores.end(), scores.begin(),
91
+ TransformScore);
92
+ std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
93
+
94
+ Key key(source, target);
95
+ (*m_coll)[key] = scores;
96
+ }
97
+ }
98
+ }
99
+
100
+ void LexicalReordering::SetParameter(const std::string& key,
101
+ const std::string& value)
102
+ {
103
+ if (key == "path") {
104
+ m_path = value;
105
+ } else if (key == "type") {
106
+ m_configuration = new LRModel(value, *this);
107
+ } else if (key == "input-factor") {
108
+ m_FactorsF = Tokenize<FactorType>(value);
109
+ } else if (key == "output-factor") {
110
+ m_FactorsE = Tokenize<FactorType>(value);
111
+ } else if (key == "property-index") {
112
+ m_propertyInd = Scan<int>(value);
113
+ } else {
114
+ StatefulFeatureFunction::SetParameter(key, value);
115
+ }
116
+ }
117
+
118
+ FFState* LexicalReordering::BlankState(MemPool &pool, const System &sys) const
119
+ {
120
+ FFState *ret = m_configuration->CreateLRState(pool);
121
+ return ret;
122
+ }
123
+
124
+ void LexicalReordering::EmptyHypothesisState(FFState &state,
125
+ const ManagerBase &mgr, const InputType &input,
126
+ const Hypothesis &hypo) const
127
+ {
128
+ BidirectionalReorderingState &stateCast =
129
+ static_cast<BidirectionalReorderingState&>(state);
130
+ stateCast.Init(NULL, hypo.GetTargetPhrase(), hypo.GetInputPath(), true,
131
+ &hypo.GetBitmap());
132
+ }
133
+
134
+ void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system,
135
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
136
+ SCORE &estimatedScore) const
137
+ {
138
+ }
139
+
140
+ void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
141
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
142
+ SCORE &estimatedScore) const
143
+ {
144
+ UTIL_THROW2("Don't use with SCFG models");
145
+ }
146
+
147
+
148
+ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
149
+ const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
150
+ {
151
+ BOOST_FOREACH(const TargetPhraseImpl *tp, tps) {
152
+ EvaluateAfterTablePruning(pool, *tp, sourcePhrase);
153
+ }
154
+ }
155
+
156
+ void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
157
+ const TargetPhraseImpl &targetPhrase, const Phrase<Moses2::Word> &sourcePhrase) const
158
+ {
159
+ if (m_propertyInd >= 0) {
160
+ SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd);
161
+ targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
162
+ #ifdef HAVE_CMPH
163
+ } else if (m_compactModel) {
164
+ // using external compact binary model
165
+ const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase,
166
+ *m_blank);
167
+ if (values.size()) {
168
+ assert(values.size() == m_numScores);
169
+
170
+ SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
171
+ for (size_t i = 0; i < m_numScores; ++i) {
172
+ scoreArr[i] = values[i];
173
+ }
174
+ targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
175
+ } else {
176
+ targetPhrase.ffData[m_PhraseTableInd] = NULL;
177
+ }
178
+ #endif
179
+ } else if (m_coll) {
180
+ // using external memory model
181
+
182
+ // cache data in target phrase
183
+ const Values *values = GetValues(sourcePhrase, targetPhrase);
184
+ assert(values->size() == m_numScores);
185
+
186
+ if (values) {
187
+ SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
188
+ for (size_t i = 0; i < m_numScores; ++i) {
189
+ scoreArr[i] = (*values)[i];
190
+ }
191
+ targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
192
+ } else {
193
+ targetPhrase.ffData[m_PhraseTableInd] = NULL;
194
+ }
195
+ }
196
+ }
197
+
198
+ void LexicalReordering::EvaluateWhenApplied(const ManagerBase &mgr,
199
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
200
+ FFState &state) const
201
+ {
202
+ const LRState &prevStateCast = static_cast<const LRState&>(prevState);
203
+ prevStateCast.Expand(mgr, *this, hypo, m_PhraseTableInd, scores, state);
204
+ }
205
+
206
+ const LexicalReordering::Values *LexicalReordering::GetValues(
207
+ const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const
208
+ {
209
+ Key key(&source, &target);
210
+ Coll::const_iterator iter;
211
+ iter = m_coll->find(key);
212
+ if (iter == m_coll->end()) {
213
+ return NULL;
214
+ } else {
215
+ return &iter->second;
216
+ }
217
+ }
218
+
219
+ void LexicalReordering::EvaluateWhenApplied(const SCFG::Manager &mgr,
220
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
221
+ FFState &state) const
222
+ {
223
+ UTIL_THROW2("Not implemented");
224
+ }
225
+
226
+ } /* namespace Moses2 */
mosesdecoder/moses2/FF/LexicalReordering/LexicalReordering.h ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * LexicalReordering.h
3
+ *
4
+ * Created on: 15 Dec 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+ #include <vector>
10
+ #include <boost/unordered_map.hpp>
11
+ #include "../StatefulFeatureFunction.h"
12
+ #include "../../TypeDef.h"
13
+ #include "../../Phrase.h"
14
+ #include "../../legacy/Range.h"
15
+
16
+ namespace Moses2
17
+ {
18
+
19
+ class LexicalReorderingTableCompact;
20
+ class LRModel;
21
+ class TargetPhraseImpl;
22
+
23
+ class LexicalReordering: public StatefulFeatureFunction
24
+ {
25
+ public:
26
+ LexicalReordering(size_t startInd, const std::string &line);
27
+ virtual ~LexicalReordering();
28
+
29
+ virtual void Load(System &system);
30
+
31
+ virtual void SetParameter(const std::string& key, const std::string& value);
32
+
33
+ virtual size_t HasPhraseTableInd() const {
34
+ return true;
35
+ }
36
+
37
+ virtual FFState* BlankState(MemPool &pool, const System &sys) const;
38
+ virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
39
+ const InputType &input, const Hypothesis &hypo) const;
40
+
41
+ virtual void
42
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
43
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
44
+ SCORE &estimatedScore) const;
45
+
46
+ virtual void
47
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
48
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
49
+ SCORE &estimatedScore) const;
50
+
51
+ virtual void
52
+ EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
53
+ const Phrase<Moses2::Word> &sourcePhrase) const;
54
+
55
+ virtual void EvaluateWhenApplied(const ManagerBase &mgr,
56
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
57
+ FFState &state) const;
58
+
59
+ virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
60
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
61
+ FFState &state) const;
62
+
63
+ protected:
64
+ std::string m_path;
65
+ FactorList m_FactorsF;
66
+ FactorList m_FactorsE;
67
+ FactorList m_FactorsC;
68
+
69
+ LRModel *m_configuration;
70
+
71
+ virtual void
72
+ EvaluateAfterTablePruning(MemPool &pool, const TargetPhraseImpl &targetPhrase,
73
+ const Phrase<Moses2::Word> &sourcePhrase) const;
74
+
75
+ // PROPERTY IN PT
76
+ int m_propertyInd;
77
+
78
+ // COMPACT MODEL
79
+ #ifdef HAVE_CMPH
80
+ LexicalReorderingTableCompact *m_compactModel;
81
+ #endif
82
+
83
+ Phrase<Moses2::Word> *m_blank;
84
+
85
+ // MEMORY MODEL
86
+ typedef std::pair<const Phrase<Moses2::Word>*, const Phrase<Moses2::Word>* > Key;
87
+ typedef std::vector<SCORE> Values;
88
+
89
+ struct KeyComparer {
90
+ size_t operator()(const Key &obj) const {
91
+ size_t seed = obj.first->hash();
92
+ boost::hash_combine(seed, obj.second->hash());
93
+ return seed;
94
+ }
95
+
96
+ bool operator()(const Key& a, const Key& b) const {
97
+ if ((*a.first) != (*b.first)) {
98
+ return false;
99
+ }
100
+ if ((*a.second) != (*b.second)) {
101
+ return false;
102
+ }
103
+ return true;
104
+ }
105
+
106
+ };
107
+
108
+ typedef boost::unordered_map<Key, Values, KeyComparer, KeyComparer> Coll;
109
+ Coll *m_coll;
110
+
111
+ const Values *GetValues(const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const;
112
+ };
113
+
114
+ } /* namespace Moses2 */
115
+
mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.cpp ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * PhraseLR.cpp
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "PhraseBasedReorderingState.h"
9
+ #include "LexicalReordering.h"
10
+ #include "../../PhraseBased/Hypothesis.h"
11
+ #include "../../InputPathBase.h"
12
+ #include "../../PhraseBased/Manager.h"
13
+
14
+ using namespace std;
15
+
16
+ namespace Moses2
17
+ {
18
+
19
+ PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config,
20
+ LRModel::Direction dir, size_t offset) :
21
+ LRState(config, dir, offset)
22
+ {
23
+ // uninitialised
24
+ prevPath = NULL;
25
+ prevTP = NULL;
26
+ }
27
+
28
+ void PhraseBasedReorderingState::Init(const LRState *prev,
29
+ const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
30
+ const Bitmap *coverage)
31
+ {
32
+ prevTP = &topt;
33
+ prevPath = &path;
34
+ m_first = first;
35
+ }
36
+
37
+ size_t PhraseBasedReorderingState::hash() const
38
+ {
39
+ size_t ret;
40
+ ret = (size_t) &prevPath->range;
41
+ boost::hash_combine(ret, m_direction);
42
+
43
+ return ret;
44
+ }
45
+
46
+ bool PhraseBasedReorderingState::operator==(const FFState& o) const
47
+ {
48
+ if (&o == this) return true;
49
+
50
+ const PhraseBasedReorderingState &other =
51
+ static_cast<const PhraseBasedReorderingState&>(o);
52
+ if (&prevPath->range == &other.prevPath->range) {
53
+ if (m_direction == LRModel::Forward) {
54
+ int compareScore = ComparePrevScores(other.prevTP);
55
+ return compareScore == 0;
56
+ } else {
57
+ return true;
58
+ }
59
+ } else {
60
+ return false;
61
+ }
62
+ }
63
+
64
+ void PhraseBasedReorderingState::Expand(const ManagerBase &mgr,
65
+ const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
66
+ Scores &scores, FFState &state) const
67
+ {
68
+ if ((m_direction != LRModel::Forward) || !m_first) {
69
+ LRModel const& lrmodel = m_configuration;
70
+ Range const &cur = hypo.GetInputPath().range;
71
+ LRModel::ReorderingType reoType = (
72
+ m_first ?
73
+ lrmodel.GetOrientation(cur) :
74
+ lrmodel.GetOrientation(prevPath->range, cur));
75
+ CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
76
+ }
77
+
78
+ PhraseBasedReorderingState &stateCast =
79
+ static_cast<PhraseBasedReorderingState&>(state);
80
+ stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
81
+ NULL);
82
+ }
83
+
84
+ } /* namespace Moses2 */
mosesdecoder/moses2/FF/LexicalReordering/PhraseBasedReorderingState.h ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * PhraseLR.h
3
+ *
4
+ * Created on: 22 Mar 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+ #include "LRState.h"
10
+
11
+ namespace Moses2
12
+ {
13
+
14
+ class InputPathBase;
15
+
16
+ class PhraseBasedReorderingState: public LRState
17
+ {
18
+ public:
19
+ const InputPathBase *prevPath;
20
+ bool m_first;
21
+
22
+ PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir,
23
+ size_t offset);
24
+
25
+ void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
26
+ const InputPathBase &path, bool first, const Bitmap *coverage);
27
+
28
+ size_t hash() const;
29
+ virtual bool operator==(const FFState& other) const;
30
+
31
+ virtual std::string ToString() const {
32
+ return "PhraseBasedReorderingState";
33
+ }
34
+
35
+ void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
36
+ const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
37
+ FFState &state) const;
38
+
39
+ protected:
40
+
41
+ };
42
+
43
+ } /* namespace Moses2 */
44
+
mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.cpp ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ReorderingStack.cpp
3
+ ** Author: Ankit K. Srivastava
4
+ ** Date: Jan 26, 2010
5
+ */
6
+
7
+ #include <vector>
8
+ #include "ReorderingStack.h"
9
+ #include "../../MemPool.h"
10
+
11
+ namespace Moses2
12
+ {
13
+ ReorderingStack::ReorderingStack(MemPool &pool) :
14
+ m_stack(pool)
15
+ {
16
+
17
+ }
18
+
19
+ void ReorderingStack::Init()
20
+ {
21
+ m_stack.clear();
22
+ }
23
+
24
+ size_t ReorderingStack::hash() const
25
+ {
26
+ std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end());
27
+ return ret;
28
+ }
29
+
30
+ bool ReorderingStack::operator==(const ReorderingStack& o) const
31
+ {
32
+ const ReorderingStack& other = static_cast<const ReorderingStack&>(o);
33
+ return m_stack == other.m_stack;
34
+ }
35
+
36
+ // Method to push (shift element into the stack and reduce if reqd)
37
+ int ReorderingStack::ShiftReduce(const Range &input_span)
38
+ {
39
+ int distance; // value to return: the initial distance between this and previous span
40
+
41
+ // stack is empty
42
+ if (m_stack.empty()) {
43
+ m_stack.push_back(input_span);
44
+ return input_span.GetStartPos() + 1; // - (-1)
45
+ }
46
+
47
+ // stack is non-empty
48
+ Range prev_span = m_stack.back(); //access last element added
49
+
50
+ //calculate the distance we are returning
51
+ if (input_span.GetStartPos() > prev_span.GetStartPos()) {
52
+ distance = input_span.GetStartPos() - prev_span.GetEndPos();
53
+ } else {
54
+ distance = input_span.GetEndPos() - prev_span.GetStartPos();
55
+ }
56
+
57
+ if (distance == 1) { //monotone
58
+ m_stack.pop_back();
59
+ Range new_span(prev_span.GetStartPos(), input_span.GetEndPos());
60
+ Reduce(new_span);
61
+ } else if (distance == -1) { //swap
62
+ m_stack.pop_back();
63
+ Range new_span(input_span.GetStartPos(), prev_span.GetEndPos());
64
+ Reduce(new_span);
65
+ } else { // discontinuous
66
+ m_stack.push_back(input_span);
67
+ }
68
+
69
+ return distance;
70
+ }
71
+
72
+ // Method to reduce, if possible the spans
73
+ void ReorderingStack::Reduce(Range current)
74
+ {
75
+ bool cont_loop = true;
76
+
77
+ while (cont_loop && m_stack.size() > 0) {
78
+
79
+ Range previous = m_stack.back();
80
+
81
+ if (current.GetStartPos() - previous.GetEndPos() == 1) { //mono&merge
82
+ m_stack.pop_back();
83
+ Range t(previous.GetStartPos(), current.GetEndPos());
84
+ current = t;
85
+ } else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
86
+ m_stack.pop_back();
87
+ Range t(current.GetStartPos(), previous.GetEndPos());
88
+ current = t;
89
+ } else { // discontinuous, no more merging
90
+ cont_loop = false;
91
+ }
92
+ } // finished reducing, exit
93
+
94
+ // add to stack
95
+ m_stack.push_back(current);
96
+ }
97
+
98
+ }
99
+
mosesdecoder/moses2/FF/LexicalReordering/ReorderingStack.h ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ReorderingStack.h
3
+ ** Author: Ankit K. Srivastava
4
+ ** Date: Jan 26, 2010
5
+ */
6
+
7
+ #pragma once
8
+
9
+ //#include <string>
10
+ #include <vector>
11
+ //#include "Factor.h"
12
+ //#include "Phrase.h"
13
+ //#include "TypeDef.h"
14
+ //#include "Util.h"
15
+ #include "../../legacy/Range.h"
16
+ #include "../../Vector.h"
17
+
18
+ namespace Moses2
19
+ {
20
+ class MemPool;
21
+
22
+ class ReorderingStack
23
+ {
24
+ private:
25
+
26
+ Vector<Range> m_stack;
27
+
28
+ public:
29
+ ReorderingStack(MemPool &pool);
30
+
31
+ size_t hash() const;
32
+ bool operator==(const ReorderingStack& other) const;
33
+
34
+ void Init();
35
+ int ShiftReduce(const Range &input_span);
36
+
37
+ private:
38
+ void Reduce(Range input_span);
39
+ };
40
+
41
+ }
mosesdecoder/moses2/FF/OSM/KenOSM.cpp ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "KenOSM.h"
2
+
3
+ namespace Moses2
4
+ {
5
+
6
+ OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
7
+ {
8
+ lm::ngram::ModelType model_type;
9
+ lm::ngram::Config config;
10
+ config.load_method = load_method;
11
+ if (lm::ngram::RecognizeBinary(file, model_type)) {
12
+ switch(model_type) {
13
+ case lm::ngram::PROBING:
14
+ return new KenOSM<lm::ngram::ProbingModel>(file, config);
15
+ case lm::ngram::REST_PROBING:
16
+ return new KenOSM<lm::ngram::RestProbingModel>(file, config);
17
+ case lm::ngram::TRIE:
18
+ return new KenOSM<lm::ngram::TrieModel>(file, config);
19
+ case lm::ngram::QUANT_TRIE:
20
+ return new KenOSM<lm::ngram::QuantTrieModel>(file, config);
21
+ case lm::ngram::ARRAY_TRIE:
22
+ return new KenOSM<lm::ngram::ArrayTrieModel>(file, config);
23
+ case lm::ngram::QUANT_ARRAY_TRIE:
24
+ return new KenOSM<lm::ngram::QuantArrayTrieModel>(file, config);
25
+ default:
26
+ UTIL_THROW2("Unrecognized kenlm model type " << model_type);
27
+ }
28
+ } else {
29
+ return new KenOSM<lm::ngram::ProbingModel>(file, config);
30
+ }
31
+ }
32
+
33
+ } // namespace
mosesdecoder/moses2/FF/OSM/KenOSM.h ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include "lm/model.hh"
5
+
6
+ namespace Moses2
7
+ {
8
+
9
+ class KenOSMBase
10
+ {
11
+ public:
12
+ virtual ~KenOSMBase() {}
13
+
14
+ virtual float Score(const lm::ngram::State&, StringPiece,
15
+ lm::ngram::State&) const = 0;
16
+
17
+ virtual const lm::ngram::State &BeginSentenceState() const = 0;
18
+
19
+ virtual const lm::ngram::State &NullContextState() const = 0;
20
+ };
21
+
22
+ template <class KenModel>
23
+ class KenOSM : public KenOSMBase
24
+ {
25
+ public:
26
+ KenOSM(const char *file, const lm::ngram::Config &config)
27
+ : m_kenlm(file, config) {}
28
+
29
+ float Score(const lm::ngram::State &in_state,
30
+ StringPiece word,
31
+ lm::ngram::State &out_state) const {
32
+ return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
33
+ out_state);
34
+ }
35
+
36
+ const lm::ngram::State &BeginSentenceState() const {
37
+ return m_kenlm.BeginSentenceState();
38
+ }
39
+
40
+ const lm::ngram::State &NullContextState() const {
41
+ return m_kenlm.NullContextState();
42
+ }
43
+
44
+ private:
45
+ KenModel m_kenlm;
46
+ };
47
+
48
+ typedef KenOSMBase OSMLM;
49
+
50
+ OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
51
+
52
+
53
+ } // namespace
mosesdecoder/moses2/FF/OSM/OpSequenceModel.cpp ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <sstream>
2
+ #include "OpSequenceModel.h"
3
+ #include "osmHyp.h"
4
+ #include "lm/state.hh"
5
+ #include "../../PhraseBased/Manager.h"
6
+ #include "../../PhraseBased/Hypothesis.h"
7
+ #include "../../PhraseBased/TargetPhraseImpl.h"
8
+ #include "../../PhraseBased/Sentence.h"
9
+ #include "../../TranslationModel/UnknownWordPenalty.h"
10
+ #include "../../System.h"
11
+
12
+ using namespace std;
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ ////////////////////////////////////////////////////////////////////////////////////////
18
+
19
+ OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) :
20
+ StatefulFeatureFunction(startInd, line)
21
+ {
22
+ sFactor = 0;
23
+ tFactor = 0;
24
+ numFeatures = 5;
25
+ load_method = util::READ;
26
+
27
+ ReadParameters();
28
+ }
29
+
30
+ OpSequenceModel::~OpSequenceModel()
31
+ {
32
+ // TODO Auto-generated destructor stub
33
+ }
34
+
35
+ void OpSequenceModel::Load(System &system)
36
+ {
37
+ readLanguageModel(m_lmPath.c_str());
38
+ }
39
+
40
+ FFState* OpSequenceModel::BlankState(MemPool &pool, const System &sys) const
41
+ {
42
+ return new (pool.Allocate<osmState>()) osmState();
43
+ }
44
+
45
+ void OpSequenceModel::EmptyHypothesisState(FFState &state,
46
+ const ManagerBase &mgr, const InputType &input,
47
+ const Hypothesis &hypo) const
48
+ {
49
+ lm::ngram::State startState = OSM->BeginSentenceState();
50
+
51
+ osmState &stateCast = static_cast<osmState&>(state);
52
+ stateCast.setState(startState);
53
+ }
54
+
55
+ void OpSequenceModel::EvaluateInIsolation(MemPool &pool,
56
+ const System &system, const Phrase<Moses2::Word> &source,
57
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
58
+ SCORE &estimatedScore) const
59
+ {
60
+ osmHypothesis obj;
61
+ obj.setState(OSM->NullContextState());
62
+
63
+ Bitmap myBitmap (pool, source.GetSize());
64
+ myBitmap.Init(std::vector<bool>());
65
+
66
+ vector <string> mySourcePhrase;
67
+ vector <string> myTargetPhrase;
68
+ vector<float> scoresVec;
69
+ vector <int> alignments;
70
+ int startIndex = 0;
71
+ int endIndex = source.GetSize();
72
+
73
+ const AlignmentInfo &align = targetPhrase.GetAlignTerm();
74
+ AlignmentInfo::const_iterator iter;
75
+
76
+ for (iter = align.begin(); iter != align.end(); ++iter) {
77
+ alignments.push_back(iter->first);
78
+ alignments.push_back(iter->second);
79
+ }
80
+
81
+ for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
82
+ if (&targetPhrase.pt == system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
83
+ myTargetPhrase.push_back("_TRANS_SLF_");
84
+ else
85
+ myTargetPhrase.push_back(targetPhrase[i][tFactor]->GetString().as_string());
86
+ }
87
+
88
+ for (size_t i = 0; i < source.GetSize(); i++) {
89
+ mySourcePhrase.push_back(source[i][sFactor]->GetString().as_string());
90
+ }
91
+
92
+ obj.setPhrases(mySourcePhrase , myTargetPhrase);
93
+ obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
94
+ obj.computeOSMFeature(startIndex,myBitmap);
95
+ obj.calculateOSMProb(*OSM);
96
+ obj.populateScores(scoresVec,numFeatures);
97
+
98
+ SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
99
+ scoresVec.data());
100
+ estimatedScore += weightedScore;
101
+
102
+ }
103
+
104
+ void OpSequenceModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
105
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
106
+ SCORE &estimatedScore) const
107
+ {
108
+ UTIL_THROW2("Not implemented");
109
+ }
110
+
111
+ void OpSequenceModel::EvaluateWhenApplied(const ManagerBase &mgr,
112
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
113
+ FFState &state) const
114
+ {
115
+ const TargetPhrase<Moses2::Word> &target = hypo.GetTargetPhrase();
116
+ const Bitmap &bitmap = hypo.GetBitmap();
117
+ Bitmap myBitmap(bitmap);
118
+ const ManagerBase &manager = hypo.GetManager();
119
+ const InputType &source = manager.GetInput();
120
+ const Sentence &sourceSentence = static_cast<const Sentence&>(source);
121
+
122
+ osmHypothesis obj;
123
+ vector <string> mySourcePhrase;
124
+ vector <string> myTargetPhrase;
125
+ vector<float> scoresVec;
126
+
127
+
128
+ //target.GetWord(0)
129
+
130
+ //cerr << target <<" --- "<<target.GetSourcePhrase()<< endl; // English ...
131
+
132
+ //cerr << align << endl; // Alignments ...
133
+ //cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
134
+
135
+ //cerr << source <<endl;
136
+
137
+ // int a = sourceRange.GetStartPos();
138
+ // cerr << source.GetWord(a);
139
+ //cerr <<a<<endl;
140
+
141
+ //const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
142
+
143
+
144
+ const Range & sourceRange = hypo.GetInputPath().range;
145
+ int startIndex = sourceRange.GetStartPos();
146
+ int endIndex = sourceRange.GetEndPos();
147
+ const AlignmentInfo &align = hypo.GetTargetPhrase().GetAlignTerm();
148
+ // osmState * statePtr;
149
+
150
+ vector <int> alignments;
151
+
152
+
153
+
154
+ AlignmentInfo::const_iterator iter;
155
+
156
+ for (iter = align.begin(); iter != align.end(); ++iter) {
157
+ //cerr << iter->first << "----" << iter->second << " ";
158
+ alignments.push_back(iter->first);
159
+ alignments.push_back(iter->second);
160
+ }
161
+
162
+
163
+ //cerr<<bitmap<<endl;
164
+ //cerr<<startIndex<<" "<<endIndex<<endl;
165
+
166
+
167
+ for (int i = startIndex; i <= endIndex; i++) {
168
+ myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
169
+ mySourcePhrase.push_back(sourceSentence[i][sFactor]->GetString().as_string());
170
+ // cerr<<mySourcePhrase[i]<<endl;
171
+ }
172
+
173
+ for (size_t i = 0; i < target.GetSize(); i++) {
174
+ if (&target.pt == mgr.system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
175
+ myTargetPhrase.push_back("_TRANS_SLF_");
176
+ else
177
+ myTargetPhrase.push_back(target[i][tFactor]->GetString().as_string());
178
+
179
+ }
180
+
181
+
182
+ //cerr<<myBitmap<<endl;
183
+
184
+ obj.setState(&prevState);
185
+ obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
186
+ obj.setPhrases(mySourcePhrase , myTargetPhrase);
187
+ obj.computeOSMFeature(startIndex,myBitmap);
188
+ obj.calculateOSMProb(*OSM);
189
+ obj.populateScores(scoresVec,numFeatures);
190
+ //obj.print();
191
+
192
+ scores.PlusEquals(mgr.system, *this, scoresVec);
193
+
194
+ osmState &stateCast = static_cast<osmState&>(state);
195
+ obj.saveState(stateCast);
196
+ }
197
+
198
+ void OpSequenceModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
199
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
200
+ FFState &state) const
201
+ {
202
+ UTIL_THROW2("Not implemented");
203
+ }
204
+
205
+ void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
206
+ {
207
+
208
+ if (key == "path") {
209
+ m_lmPath = value;
210
+ } else if (key == "support-features") {
211
+ if(value == "no")
212
+ numFeatures = 1;
213
+ else
214
+ numFeatures = 5;
215
+ } else if (key == "input-factor") {
216
+ sFactor = Scan<int>(value);
217
+ } else if (key == "output-factor") {
218
+ tFactor = Scan<int>(value);
219
+ } else if (key == "load") {
220
+ if (value == "lazy") {
221
+ load_method = util::LAZY;
222
+ } else if (value == "populate_or_lazy") {
223
+ load_method = util::POPULATE_OR_LAZY;
224
+ } else if (value == "populate_or_read" || value == "populate") {
225
+ load_method = util::POPULATE_OR_READ;
226
+ } else if (value == "read") {
227
+ load_method = util::READ;
228
+ } else if (value == "parallel_read") {
229
+ load_method = util::PARALLEL_READ;
230
+ } else {
231
+ UTIL_THROW2("Unknown KenLM load method " << value);
232
+ }
233
+ } else {
234
+ StatefulFeatureFunction::SetParameter(key, value);
235
+ }
236
+ }
237
+
238
+ void OpSequenceModel :: readLanguageModel(const char *lmFile)
239
+ {
240
+ string unkOp = "_TRANS_SLF_";
241
+ OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
242
+
243
+ lm::ngram::State startState = OSM->NullContextState();
244
+ lm::ngram::State endState;
245
+ unkOpProb = OSM->Score(startState,unkOp,endState);
246
+ }
247
+
248
+ }
mosesdecoder/moses2/FF/OSM/OpSequenceModel.h ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "../StatefulFeatureFunction.h"
2
+ #include "util/mmap.hh"
3
+ #include "KenOSM.h"
4
+
5
+ namespace Moses2
6
+ {
7
+
8
+
9
+ class OpSequenceModel : public StatefulFeatureFunction
10
+ {
11
+ public:
12
+ OSMLM* OSM;
13
+ float unkOpProb;
14
+ int numFeatures; // Number of features used ...
15
+ int sFactor; // Source Factor ...
16
+ int tFactor; // Target Factor ...
17
+ util::LoadMethod load_method; // method to load model
18
+
19
+ OpSequenceModel(size_t startInd, const std::string &line);
20
+ virtual ~OpSequenceModel();
21
+
22
+ virtual void Load(System &system);
23
+
24
+ virtual FFState* BlankState(MemPool &pool, const System &sys) const;
25
+ virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
26
+ const InputType &input, const Hypothesis &hypo) const;
27
+
28
+ virtual void
29
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
30
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
31
+ SCORE &estimatedScore) const;
32
+
33
+ virtual void
34
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
35
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
36
+ SCORE &estimatedScore) const;
37
+
38
+ virtual void EvaluateWhenApplied(const ManagerBase &mgr,
39
+ const Hypothesis &hypo, const FFState &prevState, Scores &scores,
40
+ FFState &state) const;
41
+
42
+ virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
43
+ const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
44
+ FFState &state) const;
45
+
46
+ void SetParameter(const std::string& key, const std::string& value);
47
+
48
+ protected:
49
+ std::string m_lmPath;
50
+
51
+ void readLanguageModel(const char *);
52
+
53
+ };
54
+
55
+ }
56
+
57
+
mosesdecoder/moses2/FF/OSM/osmHyp.cpp ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "osmHyp.h"
2
+ #include <sstream>
3
+
4
+ using namespace std;
5
+ using namespace lm::ngram;
6
+
7
+ namespace Moses2
8
+ {
9
+ void osmState::setState(const lm::ngram::State & val)
10
+ {
11
+ j = 0;
12
+ E = 0;
13
+ lmState = val;
14
+ }
15
+
16
+ void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
17
+ {
18
+ gap.clear();
19
+ gap = gapVal;
20
+ j = jVal;
21
+ E = eVal;
22
+ }
23
+
24
+ size_t osmState::hash() const
25
+ {
26
+ size_t ret = j;
27
+
28
+ boost::hash_combine(ret, E);
29
+ boost::hash_combine(ret, gap);
30
+ boost::hash_combine(ret, lmState.length);
31
+
32
+ return ret;
33
+ }
34
+
35
+ bool osmState::operator==(const FFState& otherBase) const
36
+ {
37
+ const osmState &other = static_cast<const osmState&>(otherBase);
38
+ if (j != other.j)
39
+ return false;
40
+ if (E != other.E)
41
+ return false;
42
+ if (gap != other.gap)
43
+ return false;
44
+ if (lmState.length != other.lmState.length)
45
+ return false;
46
+
47
+ return true;
48
+ }
49
+
50
+ std::string osmState :: getName() const
51
+ {
52
+
53
+ return "done";
54
+ }
55
+
56
+ //////////////////////////////////////////////////
57
+
58
+ osmHypothesis :: osmHypothesis()
59
+ {
60
+ opProb = 0;
61
+ gapWidth = 0;
62
+ gapCount = 0;
63
+ openGapCount = 0;
64
+ deletionCount = 0;
65
+ gapCount = 0;
66
+ j = 0;
67
+ E = 0;
68
+ gap.clear();
69
+ }
70
+
71
+ void osmHypothesis :: setState(const FFState* prev_state)
72
+ {
73
+
74
+ if(prev_state != NULL) {
75
+
76
+ j = static_cast <const osmState *> (prev_state)->getJ();
77
+ E = static_cast <const osmState *> (prev_state)->getE();
78
+ gap = static_cast <const osmState *> (prev_state)->getGap();
79
+ lmState = static_cast <const osmState *> (prev_state)->getLMState();
80
+ }
81
+ }
82
+
83
+ void osmHypothesis :: saveState(osmState &state)
84
+ {
85
+ state.setState(lmState);
86
+ state.saveState(j,E,gap);
87
+ }
88
+
89
+ int osmHypothesis :: isTranslationOperation(int x)
90
+ {
91
+ if (operations[x].find("_JMP_BCK_") != -1)
92
+ return 0;
93
+
94
+ if (operations[x].find("_JMP_FWD_") != -1)
95
+ return 0;
96
+
97
+ if (operations[x].find("_CONT_CEPT_") != -1)
98
+ return 0;
99
+
100
+ if (operations[x].find("_INS_GAP_") != -1)
101
+ return 0;
102
+
103
+ return 1;
104
+
105
+ }
106
+
107
+ void osmHypothesis :: removeReorderingOperations()
108
+ {
109
+ gapCount = 0;
110
+ deletionCount = 0;
111
+ openGapCount = 0;
112
+ gapWidth = 0;
113
+
114
+ std::vector <std::string> tupleSequence;
115
+
116
+ for (int x = 0; x < operations.size(); x++) {
117
+ // cout<<operations[x]<<endl;
118
+
119
+ if(isTranslationOperation(x) == 1) {
120
+ tupleSequence.push_back(operations[x]);
121
+ }
122
+
123
+ }
124
+
125
+ operations.clear();
126
+ operations = tupleSequence;
127
+ }
128
+
129
+ void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
130
+ {
131
+
132
+ opProb = 0;
133
+ State currState = lmState;
134
+ State temp;
135
+
136
+ for (size_t i = 0; i<operations.size(); i++) {
137
+ temp = currState;
138
+ opProb += ptrOp.Score(temp,operations[i],currState);
139
+ }
140
+
141
+ lmState = currState;
142
+
143
+ //print();
144
+ }
145
+
146
+
147
+ int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
148
+ {
149
+
150
+ int firstOG =-1;
151
+
152
+ for(int nd = 0; nd < coverageVector.size(); nd++) {
153
+ if(coverageVector[nd]==0) {
154
+ firstOG = nd;
155
+ return firstOG;
156
+ }
157
+ }
158
+
159
+ return firstOG;
160
+
161
+ }
162
+
163
+ string osmHypothesis :: intToString(int num)
164
+ {
165
+ return SPrint(num);
166
+
167
+ }
168
+
169
+ void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
170
+ {
171
+
172
+ int gFlag = 0;
173
+ int gp = 0;
174
+ int ans;
175
+
176
+
177
+ if ( j < j1) { // j1 is the index of the source word we are about to generate ...
178
+ //if(coverageVector[j]==0) // if source word at j is not generated yet ...
179
+ if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
180
+ operations.push_back("_INS_GAP_");
181
+ gFlag++;
182
+ gap[j]="Unfilled";
183
+ }
184
+ if (j == E) {
185
+ j = j1;
186
+ } else {
187
+ operations.push_back("_JMP_FWD_");
188
+ j=E;
189
+ }
190
+ }
191
+
192
+ if (j1 < j) {
193
+ // if(j < E && coverageVector[j]==0)
194
+ if(j < E && coverageVector.GetValue(j)==0) {
195
+ operations.push_back("_INS_GAP_");
196
+ gFlag++;
197
+ gap[j]="Unfilled";
198
+ }
199
+
200
+ j=closestGap(gap,j1,gp);
201
+ operations.push_back("_JMP_BCK_"+ intToString(gp));
202
+
203
+ //cout<<"I am j "<<j<<endl;
204
+ //cout<<"I am j1 "<<j1<<endl;
205
+
206
+ if(j==j1)
207
+ gap[j]="Filled";
208
+ }
209
+
210
+ if (j < j1) {
211
+ operations.push_back("_INS_GAP_");
212
+ gap[j] = "Unfilled";
213
+ gFlag++;
214
+ j=j1;
215
+ }
216
+
217
+ if(contFlag == 0) { // First words of the multi-word cept ...
218
+
219
+ if(english == "_TRANS_SLF_") { // Unknown word ...
220
+ operations.push_back("_TRANS_SLF_");
221
+ } else {
222
+ operations.push_back("_TRANS_" + english + "_TO_" + german);
223
+ }
224
+
225
+ //ans = firstOpenGap(coverageVector);
226
+ ans = coverageVector.GetFirstGapPos();
227
+
228
+ if (ans != -1)
229
+ gapWidth += j - ans;
230
+
231
+ } else if (contFlag == 2) {
232
+
233
+ operations.push_back("_INS_" + german);
234
+ ans = coverageVector.GetFirstGapPos();
235
+
236
+ if (ans != -1)
237
+ gapWidth += j - ans;
238
+ deletionCount++;
239
+ } else {
240
+ operations.push_back("_CONT_CEPT_");
241
+ }
242
+
243
+ //coverageVector[j]=1;
244
+ coverageVector.SetValue(j,1);
245
+ j+=1;
246
+
247
+ if(E<j)
248
+ E=j;
249
+
250
+ if (gFlag > 0)
251
+ gapCount++;
252
+
253
+ openGapCount += getOpenGaps();
254
+
255
+ //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
256
+ if (j < coverageVector.GetSize()) {
257
+ if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
258
+ j1 = j;
259
+ german = currF[j1-startIndex];
260
+ english = "_INS_";
261
+ generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
262
+ }
263
+ }
264
+
265
+ }
266
+
267
+ void osmHypothesis :: print()
268
+ {
269
+ for (int i = 0; i< operations.size(); i++) {
270
+ cerr<<operations[i]<<" ";
271
+
272
+ }
273
+
274
+ cerr<<endl<<endl;
275
+
276
+ cerr<<"Operation Probability "<<opProb<<endl;
277
+ cerr<<"Gap Count "<<gapCount<<endl;
278
+ cerr<<"Open Gap Count "<<openGapCount<<endl;
279
+ cerr<<"Gap Width "<<gapWidth<<endl;
280
+ cerr<<"Deletion Count "<<deletionCount<<endl;
281
+
282
+ cerr<<"_______________"<<endl;
283
+ }
284
+
285
+ int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
286
+ {
287
+
288
+ int dist=1172;
289
+ int value=-1;
290
+ int temp=0;
291
+ gp=0;
292
+ int opGap=0;
293
+
294
+ map <int,string> :: iterator iter;
295
+
296
+ iter=gap.end();
297
+
298
+ do {
299
+ iter--;
300
+ //cout<<"Trapped "<<iter->first<<endl;
301
+
302
+ if(iter->first==j1 && iter->second== "Unfilled") {
303
+ opGap++;
304
+ gp = opGap;
305
+ return j1;
306
+
307
+ }
308
+
309
+ if(iter->second =="Unfilled") {
310
+ opGap++;
311
+ temp = iter->first - j1;
312
+
313
+ if(temp<0)
314
+ temp=temp * -1;
315
+
316
+ if(dist>temp && iter->first < j1) {
317
+ dist=temp;
318
+ value=iter->first;
319
+ gp=opGap;
320
+ }
321
+ }
322
+
323
+
324
+ } while(iter!=gap.begin());
325
+
326
+ return value;
327
+ }
328
+
329
+
330
+
331
+ int osmHypothesis :: getOpenGaps()
332
+ {
333
+ map <int,string> :: iterator iter;
334
+
335
+ int nd = 0;
336
+ for (iter = gap.begin(); iter!=gap.end(); iter++) {
337
+ if(iter->second == "Unfilled")
338
+ nd++;
339
+ }
340
+
341
+ return nd;
342
+
343
+ }
344
+
345
+ void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
346
+ {
347
+
348
+ operations.push_back("_DEL_" + english);
349
+ currTargetIndex++;
350
+
351
+ while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
352
+ currTargetIndex++;
353
+ }
354
+
355
+ if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
356
+ english = currE[currTargetIndex];
357
+ generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
358
+ }
359
+
360
+ }
361
+
362
+ void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector)
363
+ {
364
+
365
+ set <int> doneTargetIndexes;
366
+ set <int> eSide;
367
+ set <int> fSide;
368
+ set <int> :: iterator iter;
369
+ string english;
370
+ string source;
371
+ int j1;
372
+ int targetIndex = 0;
373
+ doneTargetIndexes.clear();
374
+
375
+
376
+ if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ...
377
+ iter = targetNullWords.begin();
378
+
379
+ if (*iter == startIndex) {
380
+
381
+ j1 = startIndex;
382
+ source = currF[j1-startIndex];
383
+ english = "_INS_";
384
+ generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
385
+ }
386
+ }
387
+
388
+ if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ...
389
+ english = currE[targetIndex];
390
+ generateDeleteOperations(english,targetIndex, doneTargetIndexes);
391
+ }
392
+
393
+
394
+ for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
395
+ source = "";
396
+ english = "";
397
+
398
+ fSide = ceptsInPhrase[i].first;
399
+ eSide = ceptsInPhrase[i].second;
400
+
401
+ iter = eSide.begin();
402
+ targetIndex = *iter;
403
+ english += currE[*iter];
404
+ iter++;
405
+
406
+ for (; iter != eSide.end(); iter++) {
407
+ if(*iter == targetIndex+1)
408
+ targetIndex++;
409
+ else
410
+ doneTargetIndexes.insert(*iter);
411
+
412
+ english += "^_^";
413
+ english += currE[*iter];
414
+ }
415
+
416
+ iter = fSide.begin();
417
+ source += currF[*iter];
418
+ iter++;
419
+
420
+ for (; iter != fSide.end(); iter++) {
421
+ source += "^_^";
422
+ source += currF[*iter];
423
+ }
424
+
425
+ iter = fSide.begin();
426
+ j1 = *iter + startIndex;
427
+ iter++;
428
+
429
+ generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
430
+
431
+
432
+ for (; iter != fSide.end(); iter++) {
433
+ j1 = *iter + startIndex;
434
+ generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
435
+ }
436
+
437
+ targetIndex++; // Check whether the next target word is unaligned ...
438
+
439
+ while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
440
+ targetIndex++;
441
+ }
442
+
443
+ if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
444
+ english = currE[targetIndex];
445
+ generateDeleteOperations(english,targetIndex, doneTargetIndexes);
446
+ }
447
+ }
448
+
449
+ //removeReorderingOperations();
450
+
451
+ //print();
452
+
453
+ }
454
+
455
+ void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
456
+ {
457
+ set <int> :: iterator iter;
458
+
459
+ int sz = eSide.size();
460
+ vector <int> t;
461
+
462
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
463
+ t = tS[*iter];
464
+
465
+ for (size_t i = 0; i < t.size(); i++) {
466
+ fSide.insert(t[i]);
467
+ }
468
+
469
+ }
470
+
471
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
472
+
473
+ t = sT[*iter];
474
+
475
+ for (size_t i = 0 ; i<t.size(); i++) {
476
+ eSide.insert(t[i]);
477
+ }
478
+
479
+ }
480
+
481
+ if (eSide.size () > sz) {
482
+ getMeCepts(eSide,fSide,tS,sT);
483
+ }
484
+
485
+ }
486
+
487
+ void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
488
+ {
489
+
490
+ std::map <int , vector <int> > sT;
491
+ std::map <int , vector <int> > tS;
492
+ std::set <int> eSide;
493
+ std::set <int> fSide;
494
+ std::set <int> :: iterator iter;
495
+ std :: map <int , vector <int> > :: iterator iter2;
496
+ std :: pair < set <int> , set <int> > cept;
497
+ int src;
498
+ int tgt;
499
+
500
+
501
+ for (size_t i = 0; i < align.size(); i+=2) {
502
+ src = align[i];
503
+ tgt = align[i+1];
504
+ tS[tgt].push_back(src);
505
+ sT[src].push_back(tgt);
506
+ }
507
+
508
+ for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ...
509
+ if (sT.find(i-startIndex) == sT.end()) {
510
+ targetNullWords.insert(i);
511
+ }
512
+ }
513
+
514
+ for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ...
515
+ if (tS.find(i) == tS.end()) {
516
+ sourceNullWords.insert(i);
517
+ }
518
+ }
519
+
520
+
521
+ while (tS.size() != 0 && sT.size() != 0) {
522
+
523
+ iter2 = tS.begin();
524
+
525
+ eSide.clear();
526
+ fSide.clear();
527
+ eSide.insert (iter2->first);
528
+
529
+ getMeCepts(eSide, fSide, tS , sT);
530
+
531
+ for (iter = eSide.begin(); iter != eSide.end(); iter++) {
532
+ iter2 = tS.find(*iter);
533
+ tS.erase(iter2);
534
+ }
535
+
536
+ for (iter = fSide.begin(); iter != fSide.end(); iter++) {
537
+ iter2 = sT.find(*iter);
538
+ sT.erase(iter2);
539
+ }
540
+
541
+ cept = make_pair (fSide , eSide);
542
+ ceptsInPhrase.push_back(cept);
543
+ }
544
+
545
+
546
+
547
+ /*
548
+
549
+ cerr<<"Extracted Cepts "<<endl;
550
+ for (int i = 0; i < ceptsInPhrase.size(); i++)
551
+ {
552
+
553
+ fSide = ceptsInPhrase[i].first;
554
+ eSide = ceptsInPhrase[i].second;
555
+
556
+ for (iter = eSide.begin(); iter != eSide.end(); iter++)
557
+ {
558
+ cerr<<*iter<<" ";
559
+ }
560
+ cerr<<"<---> ";
561
+
562
+ for (iter = fSide.begin(); iter != fSide.end(); iter++)
563
+ {
564
+ cerr<<*iter<<" ";
565
+ }
566
+
567
+ cerr<<endl;
568
+ }
569
+ cerr<<endl;
570
+
571
+ cerr<<"Unaligned Target Words"<<endl;
572
+
573
+ for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
574
+ cerr<<*iter<<"<--->"<<endl;
575
+
576
+ cerr<<"Unaligned Source Words"<<endl;
577
+
578
+ for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
579
+ cerr<<*iter<<"<--->"<<endl;
580
+
581
+ */
582
+
583
+ }
584
+
585
+ void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
586
+ {
587
+ scores.clear();
588
+ scores.push_back(opProb);
589
+
590
+ if (numFeatures == 1)
591
+ return;
592
+
593
+ scores.push_back(gapWidth);
594
+ scores.push_back(gapCount);
595
+ scores.push_back(openGapCount);
596
+ scores.push_back(deletionCount);
597
+ }
598
+
599
+
600
+ } // namespace
601
+
mosesdecoder/moses2/FF/OSM/osmHyp.h ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ # include <set>
4
+ # include <map>
5
+ # include <string>
6
+ # include <vector>
7
+ #include "KenOSM.h"
8
+ # include "../FFState.h"
9
+ # include "../../legacy/Bitmap.h"
10
+
11
+ namespace Moses2
12
+ {
13
+
14
+ class osmState : public FFState
15
+ {
16
+ public:
17
+ osmState()
18
+ {}
19
+
20
+ void setState(const lm::ngram::State & val);
21
+
22
+ virtual size_t hash() const;
23
+ virtual bool operator==(const FFState& other) const;
24
+
25
+ virtual std::string ToString() const {
26
+ return "osmState";
27
+ }
28
+
29
+ void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
30
+ int getJ()const {
31
+ return j;
32
+ }
33
+ int getE()const {
34
+ return E;
35
+ }
36
+ std::map <int , std::string> getGap() const {
37
+ return gap;
38
+ }
39
+
40
+ lm::ngram::State getLMState() const {
41
+ return lmState;
42
+ }
43
+
44
+ void print() const;
45
+ std::string getName() const;
46
+
47
+ protected:
48
+ int j, E;
49
+ std::map <int,std::string> gap;
50
+ lm::ngram::State lmState;
51
+ };
52
+
53
+ class osmHypothesis
54
+ {
55
+
56
+ private:
57
+
58
+
59
+ std::vector <std::string> operations; // List of operations required to generated this hyp ...
60
+ std::map <int,std::string> gap; // Maintains gap history ...
61
+ int j; // Position after the last source word generated ...
62
+ int E; // Position after the right most source word so far generated ...
63
+ lm::ngram::State lmState; // KenLM's Model State ...
64
+
65
+ int gapCount; // Number of gaps inserted ...
66
+ int deletionCount;
67
+ int openGapCount;
68
+ int gapWidth;
69
+ double opProb;
70
+
71
+ std::vector <std::string> currE;
72
+ std::vector <std::string> currF;
73
+ std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
74
+ std::set <int> targetNullWords;
75
+ std::set <int> sourceNullWords;
76
+
77
+ int closestGap(std::map <int,std::string> gap,int j1, int & gp);
78
+ int firstOpenGap(std::vector <int> & coverageVector);
79
+ std::string intToString(int);
80
+ int getOpenGaps();
81
+ int isTranslationOperation(int j);
82
+ void removeReorderingOperations();
83
+
84
+ void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
85
+
86
+ public:
87
+
88
+ osmHypothesis();
89
+ ~osmHypothesis() {};
90
+ void generateOperations(int & startIndex, int j1 , int contFlag , Bitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
91
+ void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
92
+ void calculateOSMProb(OSMLM& ptrOp);
93
+ void computeOSMFeature(int startIndex , Bitmap & coverageVector);
94
+ void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
95
+ void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
96
+ currF = val1;
97
+ currE = val2;
98
+ }
99
+ void setState(const FFState* prev_state);
100
+ void saveState(osmState &state);
101
+ void print();
102
+ void populateScores(std::vector <float> & scores , const int numFeatures);
103
+ void setState(const lm::ngram::State & val) {
104
+ lmState = val;
105
+ }
106
+
107
+ };
108
+
109
+ } // namespace
110
+
111
+
112
+
mosesdecoder/moses2/FF/PhrasePenalty.cpp ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SkeletonStatefulFF.cpp
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "PhrasePenalty.h"
9
+ #include "../Scores.h"
10
+
11
+ namespace Moses2
12
+ {
13
+
14
+ PhrasePenalty::PhrasePenalty(size_t startInd, const std::string &line) :
15
+ StatelessFeatureFunction(startInd, line)
16
+ {
17
+ ReadParameters();
18
+ }
19
+
20
+ PhrasePenalty::~PhrasePenalty()
21
+ {
22
+ // TODO Auto-generated destructor stub
23
+ }
24
+
25
+ void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system,
26
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
27
+ SCORE &estimatedScore) const
28
+ {
29
+ scores.PlusEquals(system, *this, 1);
30
+ }
31
+
32
+ void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
33
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
34
+ SCORE &estimatedScore) const
35
+ {
36
+ scores.PlusEquals(system, *this, 1);
37
+ }
38
+
39
+ }
40
+
mosesdecoder/moses2/FF/PhrasePenalty.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * SkeletonStatefulFF.h
3
+ *
4
+ * Created on: 27 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include "StatelessFeatureFunction.h"
11
+
12
+ namespace Moses2
13
+ {
14
+
15
+ class PhrasePenalty: public StatelessFeatureFunction
16
+ {
17
+ public:
18
+ PhrasePenalty(size_t startInd, const std::string &line);
19
+ virtual ~PhrasePenalty();
20
+
21
+ virtual void
22
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
23
+ const TargetPhraseImpl &targetPhrase, Scores &scores,
24
+ SCORE &estimatedScore) const;
25
+
26
+ virtual void
27
+ EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
28
+ const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
29
+ SCORE &estimatedScore) const;
30
+
31
+ };
32
+
33
+ }
34
+
mosesdecoder/moses2/FF/PointerState.cpp ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #include "PointerState.h"
2
+
3
+ namespace Moses2
4
+ {
5
+
6
+ }