suricodes commited on
Commit
ebb7bf2
·
verified ·
1 Parent(s): e8bc68a

Upload 575 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. mosesdecoder/moses/AlignmentInfo.cpp +170 -0
  3. mosesdecoder/moses/AlignmentInfo.h +143 -0
  4. mosesdecoder/moses/AlignmentInfoCollection.cpp +60 -0
  5. mosesdecoder/moses/AlignmentInfoCollection.h +81 -0
  6. mosesdecoder/moses/AlignmentInfoTest.cpp +71 -0
  7. mosesdecoder/moses/BaseManager.cpp +160 -0
  8. mosesdecoder/moses/BaseManager.h +78 -0
  9. mosesdecoder/moses/Bitmap.cpp +96 -0
  10. mosesdecoder/moses/Bitmap.h +249 -0
  11. mosesdecoder/moses/BitmapContainer.cpp +498 -0
  12. mosesdecoder/moses/BitmapContainer.h +275 -0
  13. mosesdecoder/moses/Bitmaps.cpp +58 -0
  14. mosesdecoder/moses/Bitmaps.h +32 -0
  15. mosesdecoder/moses/CMakeLists.txt +13 -0
  16. mosesdecoder/moses/ChartCell.cpp +233 -0
  17. mosesdecoder/moses/ChartCell.h +128 -0
  18. mosesdecoder/moses/ChartCellCollection.cpp +58 -0
  19. mosesdecoder/moses/ChartCellCollection.h +102 -0
  20. mosesdecoder/moses/ChartCellLabel.h +89 -0
  21. mosesdecoder/moses/ChartCellLabelSet.h +147 -0
  22. mosesdecoder/moses/ChartHypothesis.cpp +360 -0
  23. mosesdecoder/moses/ChartHypothesis.h +204 -0
  24. mosesdecoder/moses/ChartHypothesisCollection.cpp +301 -0
  25. mosesdecoder/moses/ChartHypothesisCollection.h +108 -0
  26. mosesdecoder/moses/ChartKBestExtractor.cpp +332 -0
  27. mosesdecoder/moses/ChartKBestExtractor.h +132 -0
  28. mosesdecoder/moses/ChartManager.cpp +867 -0
  29. mosesdecoder/moses/ChartManager.h +162 -0
  30. mosesdecoder/moses/ChartParser.cpp +313 -0
  31. mosesdecoder/moses/ChartParser.h +99 -0
  32. mosesdecoder/moses/ChartParserCallback.h +35 -0
  33. mosesdecoder/moses/ChartRuleLookupManager.cpp +9 -0
  34. mosesdecoder/moses/ChartRuleLookupManager.h +84 -0
  35. mosesdecoder/moses/ChartTranslationOption.cpp +33 -0
  36. mosesdecoder/moses/ChartTranslationOption.h +54 -0
  37. mosesdecoder/moses/ChartTranslationOptionList.cpp +219 -0
  38. mosesdecoder/moses/ChartTranslationOptionList.h +90 -0
  39. mosesdecoder/moses/ChartTranslationOptions.cpp +168 -0
  40. mosesdecoder/moses/ChartTranslationOptions.h +104 -0
  41. mosesdecoder/moses/ConfusionNet.cpp +294 -0
  42. mosesdecoder/moses/ConfusionNet.h +92 -0
  43. mosesdecoder/moses/ContextScope.h +124 -0
  44. mosesdecoder/moses/DecodeGraph.cpp +43 -0
  45. mosesdecoder/moses/DecodeGraph.h +101 -0
  46. mosesdecoder/moses/DecodeStep.cpp +90 -0
  47. mosesdecoder/moses/DecodeStep.h +118 -0
  48. mosesdecoder/moses/DecodeStepGeneration.cpp +169 -0
  49. mosesdecoder/moses/DecodeStepGeneration.h +54 -0
  50. mosesdecoder/moses/DecodeStepTranslation.cpp +280 -0
.gitattributes CHANGED
@@ -95,3 +95,6 @@ mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/processPhraseTab
95
  mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/prunePhraseTable filter=lfs diff=lfs merge=lfs -text
96
  mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryLexicalTable filter=lfs diff=lfs merge=lfs -text
97
  mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryPhraseTableMin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
95
  mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/prunePhraseTable filter=lfs diff=lfs merge=lfs -text
96
  mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryLexicalTable filter=lfs diff=lfs merge=lfs -text
97
  mosesdecoder/misc/bin/gcc-9/release/link-static/threading-multi/queryPhraseTableMin filter=lfs diff=lfs merge=lfs -text
98
+ mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
99
+ mosesdecoder/moses/bin/gcc-9/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
100
+ mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-9/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
mosesdecoder/moses/AlignmentInfo.cpp ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+ #include <algorithm>
20
+ #include <set>
21
+ #include "AlignmentInfo.h"
22
+ #include "TypeDef.h"
23
+ #include "StaticData.h"
24
+ #include "Util.h"
25
+ #include "util/exception.hh"
26
+
27
+ namespace Moses
28
+ {
29
+
30
+ AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
31
+ : m_collection(pairs)
32
+ {
33
+ BuildNonTermIndexMaps();
34
+ }
35
+
36
+ AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
37
+ {
38
+ assert(aln.size()%2==0);
39
+ for (size_t i = 0; i < aln.size(); i+= 2)
40
+ m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
41
+ BuildNonTermIndexMaps();
42
+ }
43
+
44
+ AlignmentInfo::AlignmentInfo(const std::string &str)
45
+ {
46
+ std::vector<std::string> points = Tokenize(str, " ");
47
+ std::vector<std::string>::const_iterator iter;
48
+ for (iter = points.begin(); iter != points.end(); iter++) {
49
+ std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
50
+ UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
51
+ Add(point[0], point[1]);
52
+ }
53
+ }
54
+
55
+ void AlignmentInfo::BuildNonTermIndexMaps()
56
+ {
57
+ if (m_collection.empty()) {
58
+ return;
59
+ }
60
+ const_iterator p = begin();
61
+ size_t maxIndex = p->second;
62
+ for (++p; p != end(); ++p) {
63
+ if (p->second > maxIndex) {
64
+ maxIndex = p->second;
65
+ }
66
+ }
67
+ m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
68
+ m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
69
+ size_t i = 0;
70
+ for (p = begin(); p != end(); ++p) {
71
+ if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
72
+ // 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
73
+ m_nonTermIndexMap.clear();
74
+ m_nonTermIndexMap2.clear();
75
+ return;
76
+ }
77
+ m_nonTermIndexMap[p->second] = i++;
78
+ m_nonTermIndexMap2[p->second] = p->first;
79
+ }
80
+ }
81
+
82
+ std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
83
+ {
84
+ std::set<size_t> ret;
85
+ CollType::const_iterator iter;
86
+ for (iter = begin(); iter != end(); ++iter) {
87
+ // const std::pair<size_t,size_t> &align = *iter;
88
+ if (iter->first == sourcePos) {
89
+ ret.insert(iter->second);
90
+ }
91
+ }
92
+ return ret;
93
+ }
94
+
95
+ std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
96
+ {
97
+ std::set<size_t> ret;
98
+ CollType::const_iterator iter;
99
+ for (iter = begin(); iter != end(); ++iter) {
100
+ // const std::pair<size_t,size_t> &align = *iter;
101
+ if (iter->second == targetPos) {
102
+ ret.insert(iter->first);
103
+ }
104
+ }
105
+ return ret;
106
+ }
107
+
108
+
109
+ bool
110
+ compare_target(std::pair<size_t,size_t> const* a,
111
+ std::pair<size_t,size_t> const* b)
112
+ {
113
+ if(a->second < b->second) return true;
114
+ if(a->second == b->second) return (a->first < b->first);
115
+ return false;
116
+ }
117
+
118
+
119
+ std::vector< const std::pair<size_t,size_t>* >
120
+ AlignmentInfo::
121
+ GetSortedAlignments(WordAlignmentSort SortOrder) const
122
+ {
123
+ std::vector< const std::pair<size_t,size_t>* > ret;
124
+
125
+ CollType::const_iterator iter;
126
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
127
+ const std::pair<size_t,size_t> &alignPair = *iter;
128
+ ret.push_back(&alignPair);
129
+ }
130
+
131
+ switch (SortOrder) {
132
+ case NoSort:
133
+ break;
134
+
135
+ case TargetOrder:
136
+ std::sort(ret.begin(), ret.end(), compare_target);
137
+ break;
138
+
139
+ default:
140
+ UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
141
+ << SortOrder);
142
+ }
143
+
144
+ return ret;
145
+
146
+ }
147
+
148
+ std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
149
+ {
150
+ std::set<size_t> sourcePoses;
151
+
152
+ CollType::const_iterator iter;
153
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
154
+ size_t sourcePos = iter->first;
155
+ sourcePoses.insert(sourcePos);
156
+ }
157
+ std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
158
+ return ret;
159
+ }
160
+
161
+ std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
162
+ {
163
+ AlignmentInfo::const_iterator iter;
164
+ for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) {
165
+ out << iter->first << "-" << iter->second << " ";
166
+ }
167
+ return out;
168
+ }
169
+
170
+ }
mosesdecoder/moses/AlignmentInfo.h ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include <iostream>
23
+ #include <ostream>
24
+ #include <set>
25
+ #include <vector>
26
+ #include <cstdlib>
27
+
28
+ #include <boost/functional/hash.hpp>
29
+ #include "TypeDef.h"
30
+ namespace Moses
31
+ {
32
+
33
+ class AlignmentInfoCollection;
34
+
35
+ /** Collection of non-terminal alignment pairs, ordered by source index.
36
+ * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
37
+ */
38
+ class AlignmentInfo
39
+ {
40
+ friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
41
+ friend struct AlignmentInfoOrderer;
42
+ friend struct AlignmentInfoHasher;
43
+ friend class AlignmentInfoCollection;
44
+ friend class VW;
45
+
46
+ public:
47
+ typedef std::set<std::pair<size_t,size_t> > CollType;
48
+ typedef std::vector<size_t> NonTermIndexMap;
49
+ typedef CollType::const_iterator const_iterator;
50
+
51
+ const_iterator begin() const {
52
+ return m_collection.begin();
53
+ }
54
+ const_iterator end() const {
55
+ return m_collection.end();
56
+ }
57
+
58
+ void Add(size_t sourcePos, size_t targetPos) {
59
+ m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
60
+ }
61
+ /** Provides a map from target-side to source-side non-terminal indices.
62
+ * The target-side index should be the rule symbol index (COUNTING terminals).
63
+ * The index returned is the rule non-terminal index (IGNORING terminals).
64
+ */
65
+ const NonTermIndexMap &GetNonTermIndexMap() const {
66
+ return m_nonTermIndexMap;
67
+ }
68
+
69
+ /** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
70
+ * the index counting both terminals and non-terminals) */
71
+ const NonTermIndexMap &GetNonTermIndexMap2() const {
72
+ return m_nonTermIndexMap2;
73
+ }
74
+
75
+ const CollType &GetAlignments() const {
76
+ return m_collection;
77
+ }
78
+
79
+ std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
80
+ std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
81
+
82
+ size_t GetSize() const {
83
+ return m_collection.size();
84
+ }
85
+
86
+ std::vector< const std::pair<size_t,size_t>* >
87
+ GetSortedAlignments(WordAlignmentSort SortOrder) const;
88
+
89
+ std::vector<size_t> GetSourceIndex2PosMap() const;
90
+
91
+ bool operator==(const AlignmentInfo& rhs) const {
92
+ return m_collection == rhs.m_collection &&
93
+ m_nonTermIndexMap == rhs.m_nonTermIndexMap;
94
+ }
95
+
96
+ private:
97
+ //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
98
+ explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
99
+ explicit AlignmentInfo(const std::vector<unsigned char> &aln);
100
+
101
+ // used only by VW to load word alignment between sentences
102
+ explicit AlignmentInfo(const std::string &str);
103
+
104
+ void BuildNonTermIndexMaps();
105
+
106
+ CollType m_collection;
107
+ NonTermIndexMap m_nonTermIndexMap;
108
+ NonTermIndexMap m_nonTermIndexMap2;
109
+ };
110
+
111
+ /** Define an arbitrary strict weak ordering between AlignmentInfo objects
112
+ * for use by AlignmentInfoCollection.
113
+ */
114
+ struct AlignmentInfoOrderer {
115
+ bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
116
+ if (a.m_collection == b.m_collection) {
117
+ return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
118
+ } else {
119
+ return a.m_collection < b.m_collection;
120
+ }
121
+ }
122
+ };
123
+
124
+ /**
125
+ * Hashing functoid
126
+ **/
127
+ struct AlignmentInfoHasher {
128
+ size_t operator()(const AlignmentInfo& a) const {
129
+ size_t seed = 0;
130
+ boost::hash_combine(seed,a.m_collection);
131
+ boost::hash_combine(seed,a.m_nonTermIndexMap);
132
+ return seed;
133
+ }
134
+
135
+ };
136
+
137
+ inline size_t hash_value(const AlignmentInfo& a)
138
+ {
139
+ static AlignmentInfoHasher hasher;
140
+ return hasher(a);
141
+ }
142
+
143
+ }
mosesdecoder/moses/AlignmentInfoCollection.cpp ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include "AlignmentInfoCollection.h"
21
+
22
+ namespace Moses
23
+ {
24
+
25
+ AlignmentInfoCollection AlignmentInfoCollection::s_instance;
26
+
27
+ AlignmentInfoCollection::AlignmentInfoCollection()
28
+ {
29
+ std::set<std::pair<size_t,size_t> > pairs;
30
+ m_emptyAlignmentInfo = Add(pairs);
31
+ }
32
+
33
+ AlignmentInfoCollection::~AlignmentInfoCollection()
34
+ {}
35
+
36
+ const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
37
+ {
38
+ return *m_emptyAlignmentInfo;
39
+ }
40
+
41
+ AlignmentInfo const *
42
+ AlignmentInfoCollection::
43
+ Add(AlignmentInfo const& ainfo)
44
+ {
45
+ #ifdef WITH_THREADS
46
+ {
47
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
48
+ AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
49
+ if (i != m_collection.end())
50
+ return &*i;
51
+ }
52
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
53
+ #endif
54
+ std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
55
+ return &(*ret.first);
56
+ }
57
+
58
+
59
+
60
+ }
mosesdecoder/moses/AlignmentInfoCollection.h ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "AlignmentInfo.h"
23
+
24
+ #include <set>
25
+
26
+ #ifdef WITH_THREADS
27
+ #include <boost/thread/shared_mutex.hpp>
28
+ #include <boost/thread/locks.hpp>
29
+ #endif
30
+
31
+ namespace Moses
32
+ {
33
+
34
+ /** Singleton collection of all AlignmentInfo objects.
35
+ * Used as a cache of all alignment info to save space.
36
+ */
37
+ class AlignmentInfoCollection
38
+ {
39
+ public:
40
+ static AlignmentInfoCollection &Instance() {
41
+ return s_instance;
42
+ }
43
+
44
+ /** Returns a pointer to an AlignmentInfo object with the same source-target
45
+ * alignment pairs as given in the argument. If the collection already
46
+ * contains such an object then returns a pointer to it; otherwise a new
47
+ * one is inserted.
48
+ */
49
+ private:
50
+ const AlignmentInfo* Add(AlignmentInfo const& ainfo);
51
+
52
+ public:
53
+ template<typename ALNREP>
54
+ AlignmentInfo const *
55
+ Add(ALNREP const & aln) {
56
+ return this->Add(AlignmentInfo(aln));
57
+ }
58
+
59
+ //! Returns a pointer to an empty AlignmentInfo object.
60
+ const AlignmentInfo &GetEmptyAlignmentInfo() const;
61
+
62
+ private:
63
+ typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
64
+
65
+
66
+ //! Only a single static variable should be created.
67
+ AlignmentInfoCollection();
68
+ ~AlignmentInfoCollection();
69
+
70
+ static AlignmentInfoCollection s_instance;
71
+
72
+ #ifdef WITH_THREADS
73
+ //reader-writer lock
74
+ mutable boost::shared_mutex m_accessLock;
75
+ #endif
76
+
77
+ AlignmentInfoSet m_collection;
78
+ const AlignmentInfo *m_emptyAlignmentInfo;
79
+ };
80
+
81
+ }
mosesdecoder/moses/AlignmentInfoTest.cpp ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2010- University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include <boost/test/unit_test.hpp>
21
+
22
+ #include "AlignmentInfo.h"
23
+ #include "AlignmentInfoCollection.h"
24
+
25
+ using namespace Moses;
26
+ using namespace std;
27
+
28
+ BOOST_AUTO_TEST_SUITE(alignment_info)
29
+
30
+ typedef pair<size_t,size_t> IndexPair;
31
+ typedef set<pair<size_t,size_t> > IndexSet;
32
+
33
+ struct AlignmentInfoFixture {
34
+ const AlignmentInfo* ai1;
35
+ const AlignmentInfo* ai2;
36
+ const AlignmentInfo* ai3;
37
+
38
+ AlignmentInfoFixture() {
39
+ AlignmentInfoCollection& collection = AlignmentInfoCollection::Instance();
40
+ IndexSet aligns1,aligns2,aligns3;
41
+ aligns1.insert(IndexPair(1,1));
42
+ aligns1.insert(IndexPair(2,1));
43
+ aligns2.insert(IndexPair(1,1));
44
+ aligns2.insert(IndexPair(2,1));
45
+ aligns3.insert(IndexPair(1,2));
46
+ aligns3.insert(IndexPair(2,1));
47
+ ai1 = collection.Add(aligns1);
48
+ ai2 = collection.Add(aligns2);
49
+ ai3 = collection.Add(aligns3);
50
+ }
51
+
52
+ };
53
+
54
+ BOOST_FIXTURE_TEST_CASE(comparator, AlignmentInfoFixture)
55
+ {
56
+ BOOST_CHECK(*ai1 == *ai2);
57
+ BOOST_CHECK(*ai1 == *ai1);
58
+ BOOST_CHECK(*ai2 == *ai2);
59
+ BOOST_CHECK(*ai3 == *ai3);
60
+ BOOST_CHECK(!(*ai2 == *ai3));
61
+ BOOST_CHECK(!(*ai1 == *ai3));
62
+ }
63
+
64
+ BOOST_FIXTURE_TEST_CASE(hasher, AlignmentInfoFixture)
65
+ {
66
+ //simple test that same objects give same hash
67
+ AlignmentInfoHasher hash;
68
+ BOOST_CHECK_EQUAL(hash(*ai1), hash(*ai2));
69
+ }
70
+
71
+ BOOST_AUTO_TEST_SUITE_END()
mosesdecoder/moses/BaseManager.cpp ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "BaseManager.h"
2
+ #include "StaticData.h"
3
+ #include "moses/FF/StatelessFeatureFunction.h"
4
+ #include "moses/FF/StatefulFeatureFunction.h"
5
+ #include "moses/TranslationTask.h"
6
+
7
+ #include <vector>
8
+ #include <boost/algorithm/string/predicate.hpp>
9
+ #include <boost/iostreams/device/file.hpp>
10
+ #include <boost/iostreams/filter/bzip2.hpp>
11
+ #include <boost/iostreams/filter/gzip.hpp>
12
+ #include <boost/iostreams/filtering_stream.hpp>
13
+ #include <boost/filesystem.hpp>
14
+
15
+ using namespace std;
16
+
17
+ namespace Moses
18
+ {
19
+
20
+ BaseManager::BaseManager(ttasksptr const& ttask)
21
+ : m_ttask(ttask), m_source(*(ttask->GetSource().get()))
22
+ { }
23
+
24
+ const InputType&
25
+ BaseManager::GetSource() const
26
+ {
27
+ return m_source;
28
+ }
29
+
30
+ const ttasksptr
31
+ BaseManager::GetTtask() const
32
+ {
33
+ return m_ttask.lock();
34
+ }
35
+
36
+ void
37
+ BaseManager::
38
+ OutputSearchGraphAsHypergraph(std::ostream& out) const
39
+ {
40
+ // This virtual function that may not be implemented everywhere, but it should for
41
+ // derived classes that use it
42
+ UTIL_THROW2("Not implemented.");
43
+ }
44
+
45
+ void
46
+ BaseManager::
47
+ OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const
48
+ {
49
+ std::string odir = boost::filesystem::path(fname).parent_path().string();
50
+ if (! boost::filesystem::exists(odir))
51
+ boost::filesystem::create_directory(odir);
52
+ UTIL_THROW_IF2(!boost::filesystem::is_directory(odir),
53
+ "Cannot output hypergraphs to " << odir
54
+ << " because that path exists but is not a directory.");
55
+
56
+ // not clear why we need to output the weights every time we dump a search
57
+ // graph into a file again, but that's what the old code did.
58
+
59
+ string weightsFile = odir + "/weights";
60
+ TRACE_ERR("The weights file is " << weightsFile << "\n");
61
+ ofstream weightsOut;
62
+ weightsOut.open(weightsFile.c_str());
63
+ weightsOut.setf(std::ios::fixed);
64
+ weightsOut.precision(6);
65
+ // just temporarily, till we've implemented weight scoring in the manager
66
+ // (or the translation task)
67
+ StaticData::Instance().GetAllWeights().Save(weightsOut);
68
+ weightsOut.close();
69
+
70
+ boost::iostreams::filtering_ostream file;
71
+ if (boost::ends_with(fname, ".gz"))
72
+ file.push(boost::iostreams::gzip_compressor());
73
+ else if (boost::ends_with(fname, ".bz2"))
74
+ file.push( boost::iostreams::bzip2_compressor() );
75
+ file.push( boost::iostreams::file_sink(fname, ios_base::out) );
76
+ if (file.is_complete() && file.good()) {
77
+ file.setf(std::ios::fixed);
78
+ file.precision(precision);
79
+ this->OutputSearchGraphAsHypergraph(file);
80
+ file.flush();
81
+ } else {
82
+ TRACE_ERR("Cannot output hypergraph for line "
83
+ << this->GetSource().GetTranslationId()
84
+ << " because the output file " << fname
85
+ << " is not open or not ready for writing"
86
+ << std::endl);
87
+ }
88
+ file.pop();
89
+ }
90
+
91
+
92
+
93
+
94
+ /***
95
+ * print surface factor only for the given phrase
96
+ */
97
+ void
98
+ BaseManager::
99
+ OutputSurface(std::ostream &out, Phrase const& phrase) const
100
+ {
101
+ std::vector<FactorType> const& factor_order = options()->output.factor_order;
102
+
103
+ bool markUnknown = options()->unk.mark;
104
+ std::string const& fd = options()->output.factor_delimiter;
105
+
106
+ size_t size = phrase.GetSize();
107
+ for (size_t pos = 0 ; pos < size ; pos++) {
108
+ const Factor *factor = phrase.GetFactor(pos, factor_order[0]);
109
+ UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos);
110
+
111
+ const Word &word = phrase.GetWord(pos);
112
+ if(markUnknown && word.IsOOV()) {
113
+ out << options()->unk.prefix;
114
+ }
115
+
116
+ out << *factor;
117
+
118
+ for (size_t i = 1 ; i < factor_order.size() ; i++) {
119
+ const Factor *factor = phrase.GetFactor(pos, factor_order[i]);
120
+ UTIL_THROW_IF2(!factor, "Empty factor " << i << " at position " << pos);
121
+ out << fd << *factor;
122
+ }
123
+
124
+ if(markUnknown && word.IsOOV()) {
125
+ out << options()->unk.suffix;
126
+ }
127
+
128
+ out << " ";
129
+ }
130
+ }
131
+
132
+ // Emulates the old operator<<(ostream &, const DottedRule &) function. The
133
+ // output format is a bit odd (reverse order and double spacing between symbols)
134
+ // but there are scripts and tools that expect the output of -T to look like
135
+ // that.
136
+ void BaseManager::WriteApplicationContext(std::ostream &out,
137
+ const ApplicationContext &context) const
138
+ {
139
+ assert(!context.empty());
140
+ ApplicationContext::const_reverse_iterator p = context.rbegin();
141
+ while (true) {
142
+ out << p->second << "=" << p->first << " ";
143
+ if (++p == context.rend()) {
144
+ break;
145
+ }
146
+ out << " ";
147
+ }
148
+ }
149
+
150
+ AllOptions::ptr const&
151
+ BaseManager::
152
+ options() const
153
+ {
154
+ return GetTtask()->options();
155
+ }
156
+
157
+
158
+ } // namespace
159
+
160
+
mosesdecoder/moses/BaseManager.h ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ #pragma once
3
+
4
+ #include <iostream>
5
+ #include <string>
6
+ #include "ScoreComponentCollection.h"
7
+ #include "InputType.h"
8
+ #include "moses/parameters/AllOptions.h"
9
+ namespace Moses
10
+ {
11
+ class ScoreComponentCollection;
12
+ class FeatureFunction;
13
+ class OutputCollector;
14
+
15
+ class BaseManager
16
+ {
17
+ protected:
18
+ // const InputType &m_source; /**< source sentence to be translated */
19
+ ttaskwptr m_ttask;
20
+ InputType const& m_source;
21
+
22
+ BaseManager(ttasksptr const& ttask);
23
+
24
+ // output
25
+ typedef std::vector<std::pair<Moses::Word, Moses::Range> > ApplicationContext;
26
+ typedef std::set< std::pair<size_t, size_t> > Alignments;
27
+
28
+ void OutputSurface(std::ostream &out, Phrase const& phrase) const;
29
+
30
+ void WriteApplicationContext(std::ostream &out,
31
+ const ApplicationContext &context) const;
32
+
33
+ template <class T>
34
+ void ShiftOffsets(std::vector<T> &offsets, T shift) const {
35
+ T currPos = shift;
36
+ for (size_t i = 0; i < offsets.size(); ++i) {
37
+ if (offsets[i] == 0) {
38
+ offsets[i] = currPos;
39
+ ++currPos;
40
+ } else {
41
+ currPos += offsets[i];
42
+ }
43
+ }
44
+ }
45
+
46
+ public:
47
+ virtual ~BaseManager() { }
48
+
49
+ //! the input sentence being decoded
50
+ const InputType& GetSource() const;
51
+ const ttasksptr GetTtask() const;
52
+ AllOptions::ptr const& options() const;
53
+
54
+ virtual void Decode() = 0;
55
+ // outputs
56
+ virtual void OutputBest(OutputCollector *collector) const = 0;
57
+ virtual void OutputNBest(OutputCollector *collector) const = 0;
58
+ virtual void OutputLatticeSamples(OutputCollector *collector) const = 0;
59
+ virtual void OutputAlignment(OutputCollector *collector) const = 0;
60
+ virtual void OutputDetailedTranslationReport(OutputCollector *collector) const = 0;
61
+ virtual void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const = 0;
62
+ virtual void OutputWordGraph(OutputCollector *collector) const = 0;
63
+ virtual void OutputSearchGraph(OutputCollector *collector) const = 0;
64
+ virtual void OutputUnknowns(OutputCollector *collector) const = 0;
65
+ virtual void OutputSearchGraphSLF() const = 0;
66
+ // virtual void OutputSearchGraphHypergraph() const = 0;
67
+
68
+ virtual void OutputSearchGraphAsHypergraph(std::ostream& out) const;
69
+ virtual void OutputSearchGraphAsHypergraph(std::string const& fname,
70
+ size_t const precision) const;
71
+ /***
72
+ * to be called after processing a sentence
73
+ */
74
+ virtual void CalcDecoderStatistics() const = 0;
75
+
76
+ };
77
+
78
+ }
mosesdecoder/moses/Bitmap.cpp ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <boost/functional/hash.hpp>
23
+ #include "Bitmap.h"
24
+
25
+ namespace Moses
26
+ {
27
+
28
+ TO_STRING_BODY(Bitmap);
29
+
30
+ Bitmap::Bitmap(size_t size, const std::vector<bool>& initializer)
31
+ :m_bitmap(initializer.begin(), initializer.end())
32
+ {
33
+
34
+ // The initializer may not be of the same length. Change to the desired
35
+ // length. If we need to add any elements, initialize them to false.
36
+ m_bitmap.resize(size, false);
37
+
38
+ m_numWordsCovered = std::count(m_bitmap.begin(), m_bitmap.end(), true);
39
+
40
+ // Find the first gap, and cache it.
41
+ std::vector<char>::const_iterator first_gap = std::find(
42
+ m_bitmap.begin(), m_bitmap.end(), false);
43
+ m_firstGap = (
44
+ (first_gap == m_bitmap.end()) ?
45
+ NOT_FOUND : first_gap - m_bitmap.begin());
46
+ }
47
+
48
+ //! Create Bitmap of length size and initialise.
49
+ Bitmap::Bitmap(size_t size)
50
+ :m_bitmap(size, false)
51
+ ,m_firstGap(0)
52
+ ,m_numWordsCovered(0)
53
+
54
+ {
55
+ }
56
+
57
+ //! Deep copy.
58
+ Bitmap::Bitmap(const Bitmap &copy)
59
+ :m_bitmap(copy.m_bitmap)
60
+ ,m_firstGap(copy.m_firstGap)
61
+ ,m_numWordsCovered(copy.m_numWordsCovered)
62
+ {
63
+ }
64
+
65
+ Bitmap::Bitmap(const Bitmap &copy, const Range &range)
66
+ :m_bitmap(copy.m_bitmap)
67
+ ,m_firstGap(copy.m_firstGap)
68
+ ,m_numWordsCovered(copy.m_numWordsCovered)
69
+ {
70
+ SetValueNonOverlap(range);
71
+ }
72
+
73
+ // for unordered_set in stack
74
+ size_t Bitmap::hash() const
75
+ {
76
+ size_t ret = boost::hash_value(m_bitmap);
77
+ return ret;
78
+ }
79
+
80
+ bool Bitmap::operator==(const Bitmap& other) const
81
+ {
82
+ return m_bitmap == other.m_bitmap;
83
+ }
84
+
85
+ // friend
86
+ std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap)
87
+ {
88
+ for (size_t i = 0 ; i < bitmap.m_bitmap.size() ; i++) {
89
+ out << int(bitmap.GetValue(i));
90
+ }
91
+ return out;
92
+ }
93
+
94
+ } // namespace
95
+
96
+
mosesdecoder/moses/Bitmap.h ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_WordsBitmap_h
23
+ #define moses_WordsBitmap_h
24
+
25
+ #include <algorithm>
26
+ #include <limits>
27
+ #include <vector>
28
+ #include <iostream>
29
+ #include <cstring>
30
+ #include <cmath>
31
+ #include <cstdlib>
32
+ #include "TypeDef.h"
33
+ #include "Range.h"
34
+
35
+ namespace Moses
36
+ {
37
+ typedef unsigned long WordsBitmapID;
38
+
39
+ /** Vector of boolean to represent whether a word has been translated or not.
40
+ *
41
+ * Implemented using a vector of char, which is usually the same representation
42
+ * for the elements that a C array of bool would use. A vector of bool, or a
43
+ * Boost dynamic_bitset, could be much more efficient in theory. Unfortunately
44
+ * algorithms like std::find() are not optimized for vector<bool> on gcc or
45
+ * clang, and dynamic_bitset lacks all the optimized search operations we want.
46
+ * Only benchmarking will tell what works best. Perhaps dynamic_bitset could
47
+ * still be a dramatic improvement, if we flip the meaning of the bits around
48
+ * so we can use its find_first() and find_next() for the most common searches.
49
+ */
50
+ class Bitmap
51
+ {
52
+ friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
53
+ private:
54
+ std::vector<char> m_bitmap; //! Ticks of words in sentence that have been done.
55
+ size_t m_firstGap; //! Cached position of first gap, or NOT_FOUND.
56
+ size_t m_numWordsCovered;
57
+
58
+ Bitmap(); // not implemented
59
+ Bitmap& operator= (const Bitmap& other);
60
+
61
+ /** Update the first gap, when bits are flipped */
62
+ void UpdateFirstGap(size_t startPos, size_t endPos, bool value) {
63
+ if (value) {
64
+ //may remove gap
65
+ if (startPos <= m_firstGap && m_firstGap <= endPos) {
66
+ m_firstGap = NOT_FOUND;
67
+ for (size_t i = endPos + 1 ; i < m_bitmap.size(); ++i) {
68
+ if (!m_bitmap[i]) {
69
+ m_firstGap = i;
70
+ break;
71
+ }
72
+ }
73
+ }
74
+
75
+ } else {
76
+ //setting positions to false, may add new gap
77
+ if (startPos < m_firstGap) {
78
+ m_firstGap = startPos;
79
+ }
80
+ }
81
+ }
82
+
83
+ //! set value between 2 positions, inclusive
84
+ void
85
+ SetValueNonOverlap(Range const& range) {
86
+ size_t startPos = range.GetStartPos();
87
+ size_t endPos = range.GetEndPos();
88
+
89
+ for(size_t pos = startPos ; pos <= endPos ; pos++) {
90
+ m_bitmap[pos] = true;
91
+ }
92
+
93
+ m_numWordsCovered += range.GetNumWordsCovered();
94
+ UpdateFirstGap(startPos, endPos, true);
95
+ }
96
+
97
+ public:
98
+ //! Create Bitmap of length size, and initialise with vector.
99
+ explicit Bitmap(size_t size, const std::vector<bool>& initializer);
100
+
101
+ //! Create Bitmap of length size and initialise.
102
+ explicit Bitmap(size_t size);
103
+
104
+ //! Deep copy.
105
+ explicit Bitmap(const Bitmap &copy);
106
+
107
+ explicit Bitmap(const Bitmap &copy, const Range &range);
108
+
109
+ //! Count of words translated.
110
+ size_t GetNumWordsCovered() const {
111
+ return m_numWordsCovered;
112
+ }
113
+
114
+ //! position of 1st word not yet translated, or NOT_FOUND if everything already translated
115
+ size_t GetFirstGapPos() const {
116
+ return m_firstGap;
117
+ }
118
+
119
+
120
+ //! position of last word not yet translated, or NOT_FOUND if everything already translated
121
+ size_t GetLastGapPos() const {
122
+ for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
123
+ if (!m_bitmap[pos]) {
124
+ return pos;
125
+ }
126
+ }
127
+ // no starting pos
128
+ return NOT_FOUND;
129
+ }
130
+
131
+
132
+ //! position of last translated word
133
+ size_t GetLastPos() const {
134
+ for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
135
+ if (m_bitmap[pos]) {
136
+ return pos;
137
+ }
138
+ }
139
+ // no starting pos
140
+ return NOT_FOUND;
141
+ }
142
+
143
+ //! whether a word has been translated at a particular position
144
+ bool GetValue(size_t pos) const {
145
+ return bool(m_bitmap[pos]);
146
+ }
147
+ //! set value at a particular position
148
+ void SetValue( size_t pos, bool value ) {
149
+ bool origValue = m_bitmap[pos];
150
+ if (origValue == value) {
151
+ // do nothing
152
+ } else {
153
+ m_bitmap[pos] = value;
154
+ UpdateFirstGap(pos, pos, value);
155
+ if (value) {
156
+ ++m_numWordsCovered;
157
+ } else {
158
+ --m_numWordsCovered;
159
+ }
160
+ }
161
+ }
162
+
163
+ //! whether every word has been translated
164
+ bool IsComplete() const {
165
+ return GetSize() == GetNumWordsCovered();
166
+ }
167
+ //! whether the wordrange overlaps with any translated word in this bitmap
168
+ bool Overlap(const Range &compare) const {
169
+ for (size_t pos = compare.GetStartPos() ; pos <= compare.GetEndPos() ; pos++) {
170
+ if (m_bitmap[pos])
171
+ return true;
172
+ }
173
+ return false;
174
+ }
175
+ //! number of elements
176
+ size_t GetSize() const {
177
+ return m_bitmap.size();
178
+ }
179
+
180
+ inline size_t GetEdgeToTheLeftOf(size_t l) const {
181
+ if (l == 0) return l;
182
+ while (l && !m_bitmap[l-1]) {
183
+ --l;
184
+ }
185
+ return l;
186
+ }
187
+
188
+ inline size_t GetEdgeToTheRightOf(size_t r) const {
189
+ if (r+1 == m_bitmap.size()) return r;
190
+ return (
191
+ std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
192
+ m_bitmap.begin()
193
+ ) - 1;
194
+ }
195
+
196
+
197
+ //! converts bitmap into an integer ID: it consists of two parts: the first 16 bit are the pattern between the first gap and the last word-1, the second 16 bit are the number of filled positions. enforces a sentence length limit of 65535 and a max distortion of 16
198
+ WordsBitmapID GetID() const {
199
+ assert(m_bitmap.size() < (1<<16));
200
+
201
+ size_t start = GetFirstGapPos();
202
+ if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
203
+
204
+ size_t end = GetLastPos();
205
+ if (end == NOT_FOUND) end = 0; // nothing translated yet
206
+
207
+ assert(end < start || end-start <= 16);
208
+ WordsBitmapID id = 0;
209
+ for(size_t pos = end; pos > start; pos--) {
210
+ id = id*2 + (int) GetValue(pos);
211
+ }
212
+ return id + (1<<16) * start;
213
+ }
214
+
215
+ //! converts bitmap into an integer ID, with an additional span covered
216
+ WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
217
+ assert(m_bitmap.size() < (1<<16));
218
+
219
+ size_t start = GetFirstGapPos();
220
+ if (start == NOT_FOUND) start = m_bitmap.size(); // nothing left
221
+
222
+ size_t end = GetLastPos();
223
+ if (end == NOT_FOUND) end = 0; // nothing translated yet
224
+
225
+ if (start == startPos) start = endPos+1;
226
+ if (end < endPos) end = endPos;
227
+
228
+ assert(end < start || end-start <= 16);
229
+ WordsBitmapID id = 0;
230
+ for(size_t pos = end; pos > start; pos--) {
231
+ id = id*2;
232
+ if (GetValue(pos) || (startPos<=pos && pos<=endPos))
233
+ id++;
234
+ }
235
+ return id + (1<<16) * start;
236
+ }
237
+
238
+ // for unordered_set in stack
239
+ size_t hash() const;
240
+ bool operator==(const Bitmap& other) const;
241
+ bool operator!=(const Bitmap& other) const {
242
+ return !(*this == other);
243
+ }
244
+
245
+ TO_STRING();
246
+ };
247
+
248
+ }
249
+ #endif
mosesdecoder/moses/BitmapContainer.cpp ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <algorithm>
23
+ #include <limits>
24
+ #include <utility>
25
+
26
+ #include "BitmapContainer.h"
27
+ #include "HypothesisStackCubePruning.h"
28
+ #include "moses/FF/DistortionScoreProducer.h"
29
+ #include "TranslationOptionList.h"
30
+ #include "Manager.h"
31
+
32
+ namespace Moses
33
+ {
34
+
35
+ class HypothesisScoreOrdererNoDistortion
36
+ {
37
+ public:
38
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
39
+ const float scoreA = hypoA->GetScore();
40
+ const float scoreB = hypoB->GetScore();
41
+
42
+ if (scoreA > scoreB) {
43
+ return true;
44
+ } else if (scoreA < scoreB) {
45
+ return false;
46
+ } else {
47
+ return hypoA < hypoB;
48
+ }
49
+ }
50
+ };
51
+
52
+ class HypothesisScoreOrdererWithDistortion
53
+ {
54
+ private:
55
+ bool m_deterministic;
56
+
57
+ public:
58
+ HypothesisScoreOrdererWithDistortion(const Range* transOptRange,
59
+ const bool deterministic = false)
60
+ : m_deterministic(deterministic)
61
+ , m_transOptRange(transOptRange) {
62
+ m_totalWeightDistortion = 0;
63
+ const StaticData &staticData = StaticData::Instance();
64
+
65
+ const std::vector<const DistortionScoreProducer*> &ffs = DistortionScoreProducer::GetDistortionFeatureFunctions();
66
+ std::vector<const DistortionScoreProducer*>::const_iterator iter;
67
+ for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
68
+ const DistortionScoreProducer *ff = *iter;
69
+
70
+ float weight =staticData.GetAllWeights().GetScoreForProducer(ff);
71
+ m_totalWeightDistortion += weight;
72
+ }
73
+ }
74
+
75
+ const Range* m_transOptRange;
76
+ float m_totalWeightDistortion;
77
+
78
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
79
+ UTIL_THROW_IF2(m_transOptRange == NULL, "Words range not set");
80
+
81
+
82
+ const float distortionScoreA = DistortionScoreProducer::CalculateDistortionScore(
83
+ *hypoA,
84
+ hypoA->GetCurrSourceWordsRange(),
85
+ *m_transOptRange,
86
+ hypoA->GetWordsBitmap().GetFirstGapPos()
87
+ );
88
+ const float distortionScoreB = DistortionScoreProducer::CalculateDistortionScore(
89
+ *hypoB,
90
+ hypoB->GetCurrSourceWordsRange(),
91
+ *m_transOptRange,
92
+ hypoB->GetWordsBitmap().GetFirstGapPos()
93
+ );
94
+
95
+
96
+ const float scoreA = hypoA->GetScore() + distortionScoreA * m_totalWeightDistortion;
97
+ const float scoreB = hypoB->GetScore() + distortionScoreB * m_totalWeightDistortion;
98
+
99
+
100
+ if (scoreA > scoreB) {
101
+ return true;
102
+ } else if (scoreA < scoreB) {
103
+ return false;
104
+ } else {
105
+ if (m_deterministic) {
106
+ // Equal scores: break ties by comparing target phrases
107
+ return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
108
+ }
109
+ // Fallback: non-deterministic sort
110
+ return hypoA < hypoB;
111
+ }
112
+ }
113
+
114
+ };
115
+
116
+ ////////////////////////////////////////////////////////////////////////////////
117
+ // BackwardsEdge Code
118
+ ////////////////////////////////////////////////////////////////////////////////
119
+
120
+ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
121
+ , BitmapContainer &parent
122
+ , const TranslationOptionList &translations
123
+ , const SquareMatrix &estimatedScores,
124
+ const InputType& itype,
125
+ const bool deterministic)
126
+ : m_initialized(false)
127
+ , m_prevBitmapContainer(prevBitmapContainer)
128
+ , m_parent(parent)
129
+ , m_translations(translations)
130
+ , m_estimatedScores(estimatedScores)
131
+ , m_deterministic(deterministic)
132
+ , m_seenPosition()
133
+ {
134
+
135
+ // If either dimension is empty, we haven't got anything to do.
136
+ if(m_prevBitmapContainer.GetHypotheses().size() == 0 || m_translations.size() == 0) {
137
+ VERBOSE(3, "Empty cube on BackwardsEdge" << std::endl);
138
+ return;
139
+ }
140
+
141
+ // Fetch the things we need for distortion cost computation.
142
+ // int maxDistortion = StaticData::Instance().GetMaxDistortion();
143
+ int maxDistortion = itype.options()->reordering.max_distortion;
144
+
145
+ if (maxDistortion == -1) {
146
+ for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter) {
147
+ m_hypotheses.push_back(*iter);
148
+ }
149
+ return;
150
+ }
151
+
152
+ const Range &transOptRange = translations.Get(0)->GetSourceWordsRange();
153
+
154
+ HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
155
+ HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
156
+
157
+ while (iterHypo != iterEnd) {
158
+ const Hypothesis &hypo = **iterHypo;
159
+ // Special case: If this is the first hypothesis used to seed the search,
160
+ // it doesn't have a valid range, and we create the hypothesis, if the
161
+ // initial position is not further into the sentence than the distortion limit.
162
+ if (hypo.GetWordsBitmap().GetNumWordsCovered() == 0) {
163
+ if ((int)transOptRange.GetStartPos() <= maxDistortion)
164
+ m_hypotheses.push_back(&hypo);
165
+ } else {
166
+ int distortionDistance = itype.ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
167
+ , transOptRange);
168
+
169
+ if (distortionDistance <= maxDistortion)
170
+ m_hypotheses.push_back(&hypo);
171
+ }
172
+
173
+ ++iterHypo;
174
+ }
175
+
176
+ if (m_translations.size() > 1) {
177
+ UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
178
+ "Non-monotonic future score: "
179
+ << m_translations.Get(0)->GetFutureScore() << " vs. "
180
+ << m_translations.Get(1)->GetFutureScore());
181
+ }
182
+
183
+ if (m_hypotheses.size() > 1) {
184
+ UTIL_THROW_IF2(m_hypotheses[0]->GetFutureScore() < m_hypotheses[1]->GetFutureScore(),
185
+ "Non-monotonic total score"
186
+ << m_hypotheses[0]->GetFutureScore() << " vs. "
187
+ << m_hypotheses[1]->GetFutureScore());
188
+ }
189
+
190
+ HypothesisScoreOrdererWithDistortion orderer (&transOptRange, m_deterministic);
191
+ std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
192
+
193
+ // std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
194
+ }
195
+
196
+ BackwardsEdge::~BackwardsEdge()
197
+ {
198
+ m_seenPosition.clear();
199
+ m_hypotheses.clear();
200
+ }
201
+
202
+
203
+ void
204
+ BackwardsEdge::Initialize()
205
+ {
206
+ if(m_hypotheses.size() == 0 || m_translations.size() == 0) {
207
+ m_initialized = true;
208
+ return;
209
+ }
210
+
211
+ const Bitmap &bm = m_hypotheses[0]->GetWordsBitmap();
212
+ const Range &newRange = m_translations.Get(0)->GetSourceWordsRange();
213
+ m_estimatedScore = m_estimatedScores.CalcEstimatedScore(bm, newRange.GetStartPos(), newRange.GetEndPos());
214
+
215
+ Hypothesis *expanded = CreateHypothesis(*m_hypotheses[0], *m_translations.Get(0));
216
+ m_parent.Enqueue(0, 0, expanded, this);
217
+ SetSeenPosition(0, 0);
218
+ m_initialized = true;
219
+ }
220
+
221
+ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt)
222
+ {
223
+ // create hypothesis and calculate all its scores
224
+ IFVERBOSE(2) {
225
+ hypothesis.GetManager().GetSentenceStats().StartTimeBuildHyp();
226
+ }
227
+ const Bitmap &bitmap = m_parent.GetWordsBitmap();
228
+ Hypothesis *newHypo = new Hypothesis(hypothesis, transOpt, bitmap, hypothesis.GetManager().GetNextHypoId());
229
+ IFVERBOSE(2) {
230
+ hypothesis.GetManager().GetSentenceStats().StopTimeBuildHyp();
231
+ }
232
+ newHypo->EvaluateWhenApplied(m_estimatedScore);
233
+
234
+ return newHypo;
235
+ }
236
+
237
+ bool
238
+ BackwardsEdge::SeenPosition(const size_t x, const size_t y)
239
+ {
240
+ boost::unordered_set< int >::iterator iter = m_seenPosition.find((x<<16) + y);
241
+ return (iter != m_seenPosition.end());
242
+ }
243
+
244
+ void
245
+ BackwardsEdge::SetSeenPosition(const size_t x, const size_t y)
246
+ {
247
+ UTIL_THROW_IF2(x >= (1<<17), "Error");
248
+ UTIL_THROW_IF2(y >= (1<<17), "Error");
249
+
250
+ m_seenPosition.insert((x<<16) + y);
251
+ }
252
+
253
+
254
+ bool
255
+ BackwardsEdge::GetInitialized()
256
+ {
257
+ return m_initialized;
258
+ }
259
+
260
+ const BitmapContainer&
261
+ BackwardsEdge::GetBitmapContainer() const
262
+ {
263
+ return m_prevBitmapContainer;
264
+ }
265
+
266
+ void
267
+ BackwardsEdge::PushSuccessors(const size_t x, const size_t y)
268
+ {
269
+ Hypothesis *newHypo;
270
+
271
+ if(y + 1 < m_translations.size() && !SeenPosition(x, y + 1)) {
272
+ SetSeenPosition(x, y + 1);
273
+ newHypo = CreateHypothesis(*m_hypotheses[x], *m_translations.Get(y + 1));
274
+ if(newHypo != NULL) {
275
+ m_parent.Enqueue(x, y + 1, newHypo, (BackwardsEdge*)this);
276
+ }
277
+ }
278
+
279
+ if(x + 1 < m_hypotheses.size() && !SeenPosition(x + 1, y)) {
280
+ SetSeenPosition(x + 1, y);
281
+ newHypo = CreateHypothesis(*m_hypotheses[x + 1], *m_translations.Get(y));
282
+ if(newHypo != NULL) {
283
+ m_parent.Enqueue(x + 1, y, newHypo, (BackwardsEdge*)this);
284
+ }
285
+ }
286
+ }
287
+
288
+
289
+ ////////////////////////////////////////////////////////////////////////////////
290
+ // BitmapContainer Code
291
+ ////////////////////////////////////////////////////////////////////////////////
292
+
293
+ BitmapContainer::BitmapContainer(const Bitmap &bitmap
294
+ , HypothesisStackCubePruning &stack
295
+ , bool deterministic)
296
+ : m_bitmap(bitmap)
297
+ , m_stack(stack)
298
+ , m_numStackInsertions(0)
299
+ , m_deterministic(deterministic)
300
+ {
301
+ m_hypotheses = HypothesisSet();
302
+ m_edges = BackwardsEdgeSet();
303
+ m_queue = HypothesisQueue();
304
+ }
305
+
306
+ BitmapContainer::~BitmapContainer()
307
+ {
308
+ // As we have created the square position objects we clean up now.
309
+
310
+ while (!m_queue.empty()) {
311
+ HypothesisQueueItem *item = m_queue.top();
312
+ m_queue.pop();
313
+
314
+ delete item->GetHypothesis();
315
+ delete item;
316
+ }
317
+
318
+ // Delete all edges.
319
+ RemoveAllInColl(m_edges);
320
+
321
+ m_hypotheses.clear();
322
+ m_edges.clear();
323
+ }
324
+
325
+
326
+ void
327
+ BitmapContainer::Enqueue(int hypothesis_pos
328
+ , int translation_pos
329
+ , Hypothesis *hypothesis
330
+ , BackwardsEdge *edge)
331
+ {
332
+ // Only supply target phrase if running deterministic search mode
333
+ const TargetPhrase *target_phrase = m_deterministic ? &(hypothesis->GetCurrTargetPhrase()) : NULL;
334
+ HypothesisQueueItem *item = new HypothesisQueueItem(hypothesis_pos
335
+ , translation_pos
336
+ , hypothesis
337
+ , edge
338
+ , target_phrase);
339
+ IFVERBOSE(2) {
340
+ item->GetHypothesis()->GetManager().GetSentenceStats().StartTimeManageCubes();
341
+ }
342
+ m_queue.push(item);
343
+ IFVERBOSE(2) {
344
+ item->GetHypothesis()->GetManager().GetSentenceStats().StopTimeManageCubes();
345
+ }
346
+ }
347
+
348
+ HypothesisQueueItem*
349
+ BitmapContainer::Dequeue(bool keepValue)
350
+ {
351
+ if (!m_queue.empty()) {
352
+ HypothesisQueueItem *item = m_queue.top();
353
+
354
+ if (!keepValue) {
355
+ m_queue.pop();
356
+ }
357
+
358
+ return item;
359
+ }
360
+
361
+ return NULL;
362
+ }
363
+
364
+ HypothesisQueueItem*
365
+ BitmapContainer::Top() const
366
+ {
367
+ return m_queue.top();
368
+ }
369
+
370
+ size_t
371
+ BitmapContainer::Size()
372
+ {
373
+ return m_queue.size();
374
+ }
375
+
376
+ bool
377
+ BitmapContainer::Empty() const
378
+ {
379
+ return m_queue.empty();
380
+ }
381
+
382
+ const HypothesisSet&
383
+ BitmapContainer::GetHypotheses() const
384
+ {
385
+ return m_hypotheses;
386
+ }
387
+
388
+ size_t
389
+ BitmapContainer::GetHypothesesSize() const
390
+ {
391
+ return m_hypotheses.size();
392
+ }
393
+
394
+ const BackwardsEdgeSet&
395
+ BitmapContainer::GetBackwardsEdges()
396
+ {
397
+ return m_edges;
398
+ }
399
+
400
+ void
401
+ BitmapContainer::AddHypothesis(Hypothesis *hypothesis)
402
+ {
403
+ bool itemExists = false;
404
+ HypothesisSet::const_iterator iter = m_hypotheses.begin();
405
+ HypothesisSet::const_iterator iterEnd = m_hypotheses.end();
406
+
407
+ // cfedermann: do we actually need this check?
408
+ while (iter != iterEnd) {
409
+ if (*iter == hypothesis) {
410
+ itemExists = true;
411
+ break;
412
+ }
413
+
414
+ ++iter;
415
+ }
416
+ UTIL_THROW_IF2(itemExists, "Duplicate hypotheses");
417
+ m_hypotheses.push_back(hypothesis);
418
+ }
419
+
420
+ void
421
+ BitmapContainer::AddBackwardsEdge(BackwardsEdge *edge)
422
+ {
423
+ m_edges.insert(edge);
424
+ }
425
+
426
+ void
427
+ BitmapContainer::InitializeEdges()
428
+ {
429
+ BackwardsEdgeSet::iterator iter = m_edges.begin();
430
+ BackwardsEdgeSet::iterator iterEnd = m_edges.end();
431
+
432
+ while (iter != iterEnd) {
433
+ BackwardsEdge *edge = *iter;
434
+ edge->Initialize();
435
+
436
+ ++iter;
437
+ }
438
+ }
439
+
440
+ void
441
+ BitmapContainer::EnsureMinStackHyps(const size_t minNumHyps)
442
+ {
443
+ while ((!Empty()) && m_numStackInsertions < minNumHyps) {
444
+ ProcessBestHypothesis();
445
+ }
446
+ }
447
+
448
+ void
449
+ BitmapContainer::ProcessBestHypothesis()
450
+ {
451
+ if (m_queue.empty()) {
452
+ return;
453
+ }
454
+
455
+ // Get the currently best hypothesis from the queue.
456
+ HypothesisQueueItem *item = Dequeue();
457
+
458
+ // If the priority queue is exhausted, we are done and should have exited
459
+ UTIL_THROW_IF2(item == NULL, "Null object");
460
+
461
+ // check we are pulling things off of priority queue in right order
462
+ if (!Empty()) {
463
+ HypothesisQueueItem *check = Dequeue(true);
464
+ UTIL_THROW_IF2(item->GetHypothesis()->GetFutureScore() < check->GetHypothesis()->GetFutureScore(),
465
+ "Non-monotonic total score: "
466
+ << item->GetHypothesis()->GetFutureScore() << " vs. "
467
+ << check->GetHypothesis()->GetFutureScore());
468
+ }
469
+
470
+ // Logging for the criminally insane
471
+ IFVERBOSE(3) {
472
+ item->GetHypothesis()->PrintHypothesis();
473
+ }
474
+
475
+ // Add best hypothesis to hypothesis stack.
476
+ const bool newstackentry = m_stack.AddPrune(item->GetHypothesis());
477
+ if (newstackentry)
478
+ m_numStackInsertions++;
479
+
480
+ IFVERBOSE(3) {
481
+ TRACE_ERR("new stack entry flag is " << newstackentry << std::endl);
482
+ }
483
+
484
+ // Create new hypotheses for the two successors of the hypothesis just added.
485
+ item->GetBackwardsEdge()->PushSuccessors(item->GetHypothesisPos(), item->GetTranslationPos());
486
+
487
+ // We are done with the queue item, we delete it.
488
+ delete item;
489
+ }
490
+
491
+ void
492
+ BitmapContainer::SortHypotheses()
493
+ {
494
+ std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrderer(m_deterministic));
495
+ }
496
+
497
+ }
498
+
mosesdecoder/moses/BitmapContainer.h ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_BitmapContainer_h
23
+ #define moses_BitmapContainer_h
24
+
25
+ #include <queue>
26
+ #include <set>
27
+ #include <vector>
28
+
29
+ #include "Hypothesis.h"
30
+ #include "HypothesisStackCubePruning.h"
31
+ #include "SquareMatrix.h"
32
+ #include "TranslationOption.h"
33
+ #include "TypeDef.h"
34
+ #include "Bitmap.h"
35
+
36
+ #include <boost/unordered_set.hpp>
37
+
38
+ namespace Moses
39
+ {
40
+
41
+ class BitmapContainer;
42
+ class BackwardsEdge;
43
+ class Hypothesis;
44
+ class HypothesisStackCubePruning;
45
+ class HypothesisQueueItem;
46
+ class QueueItemOrderer;
47
+ class TranslationOptionList;
48
+
49
+ typedef std::vector< Hypothesis* > HypothesisSet;
50
+ typedef std::set< BackwardsEdge* > BackwardsEdgeSet;
51
+ typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueItem* >, QueueItemOrderer> HypothesisQueue;
52
+
53
+ ////////////////////////////////////////////////////////////////////////////////
54
+ // Hypothesis Priority Queue Code
55
+ ////////////////////////////////////////////////////////////////////////////////
56
+
57
+ //! 1 item in the priority queue for stack decoding (phrase-based)
58
+ class HypothesisQueueItem
59
+ {
60
+ private:
61
+ size_t m_hypothesis_pos, m_translation_pos;
62
+ Hypothesis *m_hypothesis;
63
+ BackwardsEdge *m_edge;
64
+ boost::shared_ptr<TargetPhrase> m_target_phrase;
65
+
66
+ HypothesisQueueItem();
67
+
68
+ public:
69
+ HypothesisQueueItem(const size_t hypothesis_pos
70
+ , const size_t translation_pos
71
+ , Hypothesis *hypothesis
72
+ , BackwardsEdge *edge
73
+ , const TargetPhrase *target_phrase = NULL)
74
+ : m_hypothesis_pos(hypothesis_pos)
75
+ , m_translation_pos(translation_pos)
76
+ , m_hypothesis(hypothesis)
77
+ , m_edge(edge) {
78
+ if (target_phrase != NULL) {
79
+ m_target_phrase.reset(new TargetPhrase(*target_phrase));
80
+ }
81
+ }
82
+
83
+ ~HypothesisQueueItem() {
84
+ }
85
+
86
+ int GetHypothesisPos() {
87
+ return m_hypothesis_pos;
88
+ }
89
+
90
+ int GetTranslationPos() {
91
+ return m_translation_pos;
92
+ }
93
+
94
+ Hypothesis *GetHypothesis() {
95
+ return m_hypothesis;
96
+ }
97
+
98
+ BackwardsEdge *GetBackwardsEdge() {
99
+ return m_edge;
100
+ }
101
+
102
+ boost::shared_ptr<TargetPhrase> GetTargetPhrase() {
103
+ return m_target_phrase;
104
+ }
105
+ };
106
+
107
+ //! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
108
+ class QueueItemOrderer
109
+ {
110
+ public:
111
+ bool operator()(HypothesisQueueItem* itemA, HypothesisQueueItem* itemB) const {
112
+ float scoreA = itemA->GetHypothesis()->GetFutureScore();
113
+ float scoreB = itemB->GetHypothesis()->GetFutureScore();
114
+
115
+ if (scoreA < scoreB) {
116
+ return true;
117
+ } else if (scoreA > scoreB) {
118
+ return false;
119
+ } else {
120
+ // Equal scores: break ties by comparing target phrases (if they exist)
121
+ // *Important*: these are pointers to copies of the target phrases from the
122
+ // hypotheses. This class is used to keep priority queues ordered in the
123
+ // background, so comparisons made as those data structures are cleaned up
124
+ // may occur *after* the target phrases in hypotheses have been cleaned up,
125
+ // leading to segfaults if relying on hypotheses to provide target phrases.
126
+ boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
127
+ boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
128
+ if (!phrA || !phrB) {
129
+ // Fallback: scoreA < scoreB == false, non-deterministic sort
130
+ return false;
131
+ }
132
+ return (phrA->Compare(*phrB) > 0);
133
+ }
134
+ }
135
+ };
136
+
137
+ ////////////////////////////////////////////////////////////////////////////////
138
+ // Hypothesis Orderer Code
139
+ ////////////////////////////////////////////////////////////////////////////////
140
+ // Allows to compare two Hypothesis objects by the corresponding scores.
141
+ ////////////////////////////////////////////////////////////////////////////////
142
+
143
+ class HypothesisScoreOrderer
144
+ {
145
+ private:
146
+ bool m_deterministic;
147
+
148
+ public:
149
+ HypothesisScoreOrderer(const bool deterministic = false)
150
+ : m_deterministic(deterministic) {}
151
+
152
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
153
+
154
+ float scoreA = hypoA->GetFutureScore();
155
+ float scoreB = hypoB->GetFutureScore();
156
+
157
+ if (scoreA > scoreB) {
158
+ return true;
159
+ } else if (scoreA < scoreB) {
160
+ return false;
161
+ } else {
162
+ if (m_deterministic) {
163
+ // Equal scores: break ties by comparing target phrases
164
+ return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
165
+ }
166
+ // Fallback: scoreA > scoreB == false, non-deterministic sort
167
+ return false;
168
+ }
169
+ }
170
+ };
171
+
172
+ ////////////////////////////////////////////////////////////////////////////////
173
+ // Backwards Edge Code
174
+ ////////////////////////////////////////////////////////////////////////////////
175
+ // Encodes an edge pointing to a BitmapContainer.
176
+ ////////////////////////////////////////////////////////////////////////////////
177
+
178
+ class BackwardsEdge
179
+ {
180
+ private:
181
+ friend class BitmapContainer;
182
+ bool m_initialized;
183
+
184
+ const BitmapContainer &m_prevBitmapContainer;
185
+ BitmapContainer &m_parent;
186
+ const TranslationOptionList &m_translations;
187
+ const SquareMatrix &m_estimatedScores;
188
+ float m_estimatedScore;
189
+
190
+ bool m_deterministic;
191
+
192
+ std::vector< const Hypothesis* > m_hypotheses;
193
+ boost::unordered_set< int > m_seenPosition;
194
+
195
+ // We don't want to instantiate "empty" objects.
196
+ BackwardsEdge();
197
+
198
+ Hypothesis *CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt);
199
+ bool SeenPosition(const size_t x, const size_t y);
200
+ void SetSeenPosition(const size_t x, const size_t y);
201
+
202
+ protected:
203
+ void Initialize();
204
+
205
+ public:
206
+ BackwardsEdge(const BitmapContainer &prevBitmapContainer
207
+ , BitmapContainer &parent
208
+ , const TranslationOptionList &translations
209
+ , const SquareMatrix &estimatedScores
210
+ , const InputType& source
211
+ , const bool deterministic = false);
212
+ ~BackwardsEdge();
213
+
214
+ bool GetInitialized();
215
+ const BitmapContainer &GetBitmapContainer() const;
216
+ int GetDistortionPenalty();
217
+ void PushSuccessors(const size_t x, const size_t y);
218
+ };
219
+
220
+ ////////////////////////////////////////////////////////////////////////////////
221
+ // Bitmap Container Code
222
+ ////////////////////////////////////////////////////////////////////////////////
223
+ // A BitmapContainer encodes an ordered set of hypotheses and a set of edges
224
+ // pointing to the "generating" BitmapContainers. It also stores a priority
225
+ // queue that contains expanded hypotheses from the connected edges.
226
+ ////////////////////////////////////////////////////////////////////////////////
227
+
228
+ class BitmapContainer
229
+ {
230
+ private:
231
+ const Bitmap &m_bitmap;
232
+ HypothesisStackCubePruning &m_stack;
233
+ HypothesisSet m_hypotheses;
234
+ BackwardsEdgeSet m_edges;
235
+ HypothesisQueue m_queue;
236
+ size_t m_numStackInsertions;
237
+ bool m_deterministic;
238
+
239
+ // We always require a corresponding bitmap to be supplied.
240
+ BitmapContainer();
241
+ BitmapContainer(const BitmapContainer &);
242
+ public:
243
+ BitmapContainer(const Bitmap &bitmap
244
+ , HypothesisStackCubePruning &stack
245
+ , bool deterministic = false);
246
+
247
+ // The destructor will also delete all the edges that are
248
+ // connected to this BitmapContainer.
249
+ ~BitmapContainer();
250
+
251
+ void Enqueue(int hypothesis_pos, int translation_pos, Hypothesis *hypothesis, BackwardsEdge *edge);
252
+ HypothesisQueueItem *Dequeue(bool keepValue=false);
253
+ HypothesisQueueItem *Top() const;
254
+ size_t Size();
255
+ bool Empty() const;
256
+
257
+ const Bitmap &GetWordsBitmap() const {
258
+ return m_bitmap;
259
+ }
260
+
261
+ const HypothesisSet &GetHypotheses() const;
262
+ size_t GetHypothesesSize() const;
263
+ const BackwardsEdgeSet &GetBackwardsEdges();
264
+
265
+ void InitializeEdges();
266
+ void ProcessBestHypothesis();
267
+ void EnsureMinStackHyps(const size_t minNumHyps);
268
+ void AddHypothesis(Hypothesis *hypothesis);
269
+ void AddBackwardsEdge(BackwardsEdge *edge);
270
+ void SortHypotheses();
271
+ };
272
+
273
+ }
274
+
275
+ #endif
mosesdecoder/moses/Bitmaps.cpp ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <boost/foreach.hpp>
2
+ #include "Bitmaps.h"
3
+ #include "Util.h"
4
+
5
+ using namespace std;
6
+
7
+ namespace Moses
8
+ {
9
+ Bitmaps::Bitmaps(size_t inputSize, const std::vector<bool> &initSourceCompleted)
10
+ {
11
+ m_initBitmap = new Bitmap(inputSize, initSourceCompleted);
12
+ m_coll[m_initBitmap];
13
+ }
14
+
15
+ Bitmaps::~Bitmaps()
16
+ {
17
+ BOOST_FOREACH (const Coll::value_type& myPair, m_coll) {
18
+ const Bitmap *bm = myPair.first;
19
+ delete bm;
20
+ }
21
+ }
22
+
23
+ const Bitmap &Bitmaps::GetNextBitmap(const Bitmap &bm, const Range &range)
24
+ {
25
+ Bitmap *newBM = new Bitmap(bm, range);
26
+
27
+ Coll::const_iterator iter = m_coll.find(newBM);
28
+ if (iter == m_coll.end()) {
29
+ m_coll[newBM] = NextBitmaps();
30
+ return *newBM;
31
+ } else {
32
+ delete newBM;
33
+ return *iter->first;
34
+ }
35
+ }
36
+
37
+ const Bitmap &Bitmaps::GetBitmap(const Bitmap &bm, const Range &range)
38
+ {
39
+ Coll::iterator iter = m_coll.find(&bm);
40
+ assert(iter != m_coll.end());
41
+
42
+ const Bitmap *newBM;
43
+ NextBitmaps &next = iter->second;
44
+ NextBitmaps::const_iterator iterNext = next.find(range);
45
+ if (iterNext == next.end()) {
46
+ // not seen the link yet.
47
+ newBM = &GetNextBitmap(bm, range);
48
+ next[range] = newBM;
49
+ } else {
50
+ // link exist
51
+ //std::cerr << "link exists" << endl;
52
+ newBM = iterNext->second;
53
+ }
54
+ return *newBM;
55
+ }
56
+
57
+ }
58
+
mosesdecoder/moses/Bitmaps.h ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <boost/unordered_set.hpp>
4
+ #include <boost/unordered_map.hpp>
5
+ #include <set>
6
+ #include "Bitmap.h"
7
+ #include "Util.h"
8
+
9
+ namespace Moses
10
+ {
11
+
12
+ class Bitmaps
13
+ {
14
+ typedef boost::unordered_map<Range, const Bitmap*> NextBitmaps;
15
+ typedef boost::unordered_map<const Bitmap*, NextBitmaps, UnorderedComparer<Bitmap>, UnorderedComparer<Bitmap> > Coll;
16
+ //typedef std::set<const Bitmap*, OrderedComparer<Bitmap> > Coll;
17
+ Coll m_coll;
18
+ const Bitmap *m_initBitmap;
19
+
20
+ const Bitmap &GetNextBitmap(const Bitmap &bm, const Range &range);
21
+ public:
22
+ Bitmaps(size_t inputSize, const std::vector<bool> &initSourceCompleted);
23
+ virtual ~Bitmaps();
24
+
25
+ const Bitmap &GetInitialBitmap() const {
26
+ return *m_initBitmap;
27
+ }
28
+ const Bitmap &GetBitmap(const Bitmap &bm, const Range &range);
29
+
30
+ };
31
+
32
+ }
mosesdecoder/moses/CMakeLists.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project(moses)
2
+
3
+ FILE(GLOB source_moses *.cpp)
4
+ FILE(GLOB source_moses_ff FF/*.cpp)
5
+ FILE(GLOB source_moses_ff_lexicalReordering FF/LexicalReordering/*.cpp)
6
+ FILE(GLOB source_moses_ff_osm FF/OSM-Feature/*.cpp)
7
+ FILE(GLOB source_moses_lm LM/*.cpp)
8
+ FILE(GLOB source_moses_tm TranslationModel/*.cpp)
9
+ FILE(GLOB source_moses_tm_compactPT TranslationModel/CompactPT/*.cpp)
10
+ FILE(GLOB source_moses_tm_cky TranslationModel/CKYPlusParser/*.cpp)
11
+
12
+
13
+ add_library(biconcor ${biconcor_source})
mosesdecoder/moses/ChartCell.cpp ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <algorithm>
23
+ #include "ChartCell.h"
24
+ #include "ChartCellCollection.h"
25
+ #include "HypergraphOutput.h"
26
+ #include "RuleCubeQueue.h"
27
+ #include "RuleCube.h"
28
+ #include "Range.h"
29
+ #include "Util.h"
30
+ #include "ChartTranslationOptions.h"
31
+ #include "ChartTranslationOptionList.h"
32
+ #include "ChartManager.h"
33
+ #include "util/exception.hh"
34
+
35
+ using namespace std;
36
+
37
+ namespace Moses
38
+ {
39
+
40
+ ChartCellBase::ChartCellBase(size_t startPos, size_t endPos) :
41
+ m_coverage(startPos, endPos),
42
+ m_targetLabelSet(m_coverage) {}
43
+
44
+ ChartCellBase::~ChartCellBase() {}
45
+
46
+ /** Constructor
47
+ * \param startPos endPos range of this cell
48
+ * \param manager pointer back to the manager
49
+ */
50
+ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
51
+ ChartCellBase(startPos, endPos), m_manager(manager)
52
+ {
53
+ m_nBestIsEnabled = manager.options()->nbest.enabled;
54
+ }
55
+
56
+ ChartCell::~ChartCell() {}
57
+
58
+ /** Add the given hypothesis to the cell.
59
+ * Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
60
+ * This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
61
+ * \param hypo Hypothesis to be added
62
+ */
63
+ bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
64
+ {
65
+ const Word &targetLHS = hypo->GetTargetLHS();
66
+ MapType::iterator m = m_hypoColl.find(targetLHS);
67
+ if (m == m_hypoColl.end()) {
68
+ std::pair<Word, ChartHypothesisCollection>
69
+ e(targetLHS, ChartHypothesisCollection(*m_manager.options()));
70
+ m = m_hypoColl.insert(e).first;
71
+ }
72
+ return m->second.AddHypothesis(hypo, m_manager);
73
+ }
74
+
75
+ /** Prune each collection in this cell to a particular size */
76
+ void ChartCell::PruneToSize()
77
+ {
78
+ MapType::iterator iter;
79
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
80
+ ChartHypothesisCollection &coll = iter->second;
81
+ coll.PruneToSize(m_manager);
82
+ }
83
+ }
84
+
85
+ /** Decoding at span level: fill chart cell with hypotheses
86
+ * (implementation of cube pruning)
87
+ * \param transOptList list of applicable rules to create hypotheses for the cell
88
+ * \param allChartCells entire chart - needed to look up underlying hypotheses
89
+ */
90
+ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
91
+ , const ChartCellCollection &allChartCells)
92
+ {
93
+ // priority queue for applicable rules with selected hypotheses
94
+ RuleCubeQueue queue(m_manager);
95
+
96
+ // add all trans opt into queue. using only 1st child node.
97
+ for (size_t i = 0; i < transOptList.GetSize(); ++i) {
98
+ const ChartTranslationOptions &transOpt = transOptList.Get(i);
99
+ RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
100
+ queue.Add(ruleCube);
101
+ }
102
+
103
+ // pluck things out of queue and add to hypo collection
104
+ const size_t popLimit = m_manager.options()->cube.pop_limit;
105
+ for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
106
+ ChartHypothesis *hypo = queue.Pop();
107
+ AddHypothesis(hypo);
108
+ }
109
+ }
110
+
111
+ //! call SortHypotheses() in each hypo collection in this cell
112
+ void ChartCell::SortHypotheses()
113
+ {
114
+ UTIL_THROW_IF2(!m_targetLabelSet.Empty(), "Already sorted");
115
+
116
+ MapType::iterator iter;
117
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
118
+ ChartHypothesisCollection &coll = iter->second;
119
+
120
+ if (coll.GetSize()) {
121
+ coll.SortHypotheses();
122
+ m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
123
+ }
124
+ }
125
+ }
126
+
127
+ /** Return the highest scoring hypothesis out of all the hypo collection in this cell */
128
+ const ChartHypothesis *ChartCell::GetBestHypothesis() const
129
+ {
130
+ const ChartHypothesis *ret = NULL;
131
+ float bestScore = -std::numeric_limits<float>::infinity();
132
+
133
+ MapType::const_iterator iter;
134
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
135
+ const HypoList &sortedList = iter->second.GetSortedHypotheses();
136
+ if (sortedList.size() > 0) {
137
+ const ChartHypothesis *hypo = sortedList[0];
138
+ if (hypo->GetFutureScore() > bestScore) {
139
+ bestScore = hypo->GetFutureScore();
140
+ ret = hypo;
141
+ }
142
+ }
143
+ }
144
+
145
+ return ret;
146
+ }
147
+
148
+ //! call CleanupArcList() in each hypo collection in this cell
149
+ void ChartCell::CleanupArcList()
150
+ {
151
+ // only necessary if n-best calculations are enabled
152
+ if (!m_nBestIsEnabled) return;
153
+
154
+ MapType::iterator iter;
155
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
156
+ ChartHypothesisCollection &coll = iter->second;
157
+ coll.CleanupArcList();
158
+ }
159
+ }
160
+
161
+ //! debug info - size of each hypo collection in this cell
162
+ void ChartCell::OutputSizes(std::ostream &out) const
163
+ {
164
+ MapType::const_iterator iter;
165
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
166
+ const Word &targetLHS = iter->first;
167
+ const ChartHypothesisCollection &coll = iter->second;
168
+
169
+ out << targetLHS << "=" << coll.GetSize() << " ";
170
+ }
171
+ }
172
+
173
+ //! debug info - total number of hypos in all hypo collection in this cell
174
+ size_t ChartCell::GetSize() const
175
+ {
176
+ size_t ret = 0;
177
+ MapType::const_iterator iter;
178
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
179
+ const ChartHypothesisCollection &coll = iter->second;
180
+
181
+ ret += coll.GetSize();
182
+ }
183
+
184
+ return ret;
185
+ }
186
+
187
+ const HypoList *ChartCell::GetAllSortedHypotheses() const
188
+ {
189
+ HypoList *ret = new HypoList();
190
+
191
+ MapType::const_iterator iter;
192
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
193
+ const ChartHypothesisCollection &coll = iter->second;
194
+ const HypoList &list = coll.GetSortedHypotheses();
195
+ std::copy(list.begin(), list.end(), std::inserter(*ret, ret->end()));
196
+ }
197
+ return ret;
198
+ }
199
+
200
+ //! call WriteSearchGraph() for each hypo collection
201
+ void ChartCell::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
202
+ {
203
+ MapType::const_iterator iterOutside;
204
+ for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
205
+ const ChartHypothesisCollection &coll = iterOutside->second;
206
+ coll.WriteSearchGraph(writer, reachable);
207
+ }
208
+ }
209
+
210
+ std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
211
+ {
212
+ ChartCell::MapType::const_iterator iterOutside;
213
+ for (iterOutside = cell.m_hypoColl.begin(); iterOutside != cell.m_hypoColl.end(); ++iterOutside) {
214
+ const Word &targetLHS = iterOutside->first;
215
+ cerr << targetLHS << ":" << endl;
216
+
217
+ const ChartHypothesisCollection &coll = iterOutside->second;
218
+ cerr << coll;
219
+ }
220
+
221
+ /*
222
+ ChartCell::HCType::const_iterator iter;
223
+ for (iter = cell.m_hypos.begin(); iter != cell.m_hypos.end(); ++iter)
224
+ {
225
+ const ChartHypothesis &hypo = **iter;
226
+ out << hypo << endl;
227
+ }
228
+ */
229
+
230
+ return out;
231
+ }
232
+
233
+ } // namespace
mosesdecoder/moses/ChartCell.h ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #pragma once
23
+
24
+ #include <iostream>
25
+ #include <queue>
26
+ #include <map>
27
+ #include <vector>
28
+ #include "Word.h"
29
+ #include "Range.h"
30
+ #include "NonTerminal.h"
31
+ #include "ChartHypothesis.h"
32
+ #include "ChartHypothesisCollection.h"
33
+ #include "RuleCube.h"
34
+ #include "ChartCellLabelSet.h"
35
+
36
+ #include <boost/scoped_ptr.hpp>
37
+ #include <boost/functional/hash.hpp>
38
+ #include <boost/unordered_map.hpp>
39
+ #include <boost/version.hpp>
40
+
41
+ namespace Moses
42
+ {
43
+ class ChartSearchGraphWriter;
44
+ class ChartTranslationOptionList;
45
+ class ChartCellCollection;
46
+ class ChartManager;
47
+
48
+ class ChartCellBase
49
+ {
50
+ public:
51
+ ChartCellBase(size_t startPos, size_t endPos);
52
+
53
+ virtual ~ChartCellBase();
54
+
55
+ const ChartCellLabelSet &GetTargetLabelSet() const {
56
+ return m_targetLabelSet;
57
+ }
58
+
59
+ ChartCellLabelSet &MutableTargetLabelSet() {
60
+ return m_targetLabelSet;
61
+ }
62
+
63
+ const Range &GetCoverage() const {
64
+ return m_coverage;
65
+ }
66
+
67
+ protected:
68
+ const Range m_coverage;
69
+ ChartCellLabelSet m_targetLabelSet;
70
+ };
71
+
72
+ /** 1 cell in chart decoder.
73
+ * Doesn't directly hold hypotheses. Each cell contain a map of ChartHypothesisCollection that have different constituent labels
74
+ */
75
+ class ChartCell : public ChartCellBase
76
+ {
77
+ friend std::ostream& operator<<(std::ostream&, const ChartCell&);
78
+ public:
79
+ #if defined(BOOST_VERSION) && (BOOST_VERSION >= 104200)
80
+ typedef boost::unordered_map<Word,
81
+ ChartHypothesisCollection,
82
+ NonTerminalHasher,
83
+ NonTerminalEqualityPred
84
+ > MapType;
85
+ #else
86
+ typedef std::map<Word, ChartHypothesisCollection> MapType;
87
+ #endif
88
+
89
+ protected:
90
+ MapType m_hypoColl;
91
+
92
+ bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
93
+ ChartManager &m_manager;
94
+
95
+ public:
96
+ ChartCell(size_t startPos, size_t endPos, ChartManager &manager);
97
+ ~ChartCell();
98
+
99
+ void Decode(const ChartTranslationOptionList &transOptList
100
+ ,const ChartCellCollection &allChartCells);
101
+
102
+ //! Get all hypotheses in the cell that have the specified constituent label
103
+ const HypoList *GetSortedHypotheses(const Word &constituentLabel) const {
104
+ MapType::const_iterator p = m_hypoColl.find(constituentLabel);
105
+ return (p == m_hypoColl.end()) ? NULL : &(p->second.GetSortedHypotheses());
106
+ }
107
+
108
+ //! for n-best list
109
+ const HypoList *GetAllSortedHypotheses() const;
110
+
111
+ bool AddHypothesis(ChartHypothesis *hypo);
112
+
113
+ void SortHypotheses();
114
+ void PruneToSize();
115
+
116
+ const ChartHypothesis *GetBestHypothesis() const;
117
+
118
+ void CleanupArcList();
119
+
120
+ void OutputSizes(std::ostream &out) const;
121
+ size_t GetSize() const;
122
+
123
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
124
+
125
+ };
126
+
127
+ }
128
+
mosesdecoder/moses/ChartCellCollection.cpp ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "ChartCellCollection.h"
23
+ #include "InputType.h"
24
+ #include "Range.h"
25
+ #include "ChartManager.h"
26
+
27
+ namespace Moses
28
+ {
29
+
30
+ ChartCellCollectionBase::~ChartCellCollectionBase()
31
+ {
32
+ m_source.clear();
33
+ for (std::vector<std::vector<ChartCellBase*> >::iterator i = m_cells.begin(); i != m_cells.end(); ++i)
34
+ RemoveAllInColl(*i);
35
+ }
36
+
37
+ class CubeCellFactory
38
+ {
39
+ public:
40
+ explicit CubeCellFactory(ChartManager &manager) : m_manager(manager) {}
41
+
42
+ ChartCell *operator()(size_t start, size_t end) const {
43
+ return new ChartCell(start, end, m_manager);
44
+ }
45
+
46
+ private:
47
+ ChartManager &m_manager;
48
+ };
49
+
50
+ /** Costructor
51
+ \param input the input sentence
52
+ \param manager reference back to the manager
53
+ */
54
+ ChartCellCollection::ChartCellCollection(const InputType &input, ChartManager &manager)
55
+ :ChartCellCollectionBase(input, CubeCellFactory(manager), manager.GetParser()) {}
56
+
57
+ } // namespace
58
+
mosesdecoder/moses/ChartCellCollection.h ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+ #pragma once
22
+
23
+ #include <boost/ptr_container/ptr_vector.hpp>
24
+ #include "InputType.h"
25
+ #include "ChartCell.h"
26
+ #include "Range.h"
27
+ #include "InputPath.h"
28
+
29
+ namespace Moses
30
+ {
31
+ class InputType;
32
+ class ChartManager;
33
+ class ChartParser;
34
+
35
+ class ChartCellCollectionBase
36
+ {
37
+ public:
38
+ template <class Factory> ChartCellCollectionBase(const InputType &input,
39
+ const Factory &factory,
40
+ const ChartParser &parser)
41
+ :m_cells(input.GetSize()) {
42
+
43
+ size_t size = input.GetSize();
44
+ for (size_t startPos = 0; startPos < size; ++startPos) {
45
+ std::vector<ChartCellBase*> &inner = m_cells[startPos];
46
+ inner.reserve(size - startPos);
47
+ for (size_t endPos = startPos; endPos < size; ++endPos) {
48
+ inner.push_back(factory(startPos, endPos));
49
+ }
50
+ /* Hack: ChartCellLabel shouldn't need to know its span, but the parser
51
+ * gets it from there :-(. The span is actually stored as a reference,
52
+ * which needs to point somewhere, so I have it refer to the ChartCell.
53
+ */
54
+ const Range &range = inner[0]->GetCoverage();
55
+
56
+ m_source.push_back(new ChartCellLabel(range, input.GetWord(startPos)));
57
+ }
58
+ }
59
+
60
+ virtual ~ChartCellCollectionBase();
61
+
62
+
63
+ const ChartCellBase &GetBase(const Range &coverage) const {
64
+ return *m_cells[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()];
65
+ }
66
+
67
+ ChartCellBase &MutableBase(const Range &coverage) {
68
+ return *m_cells[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()];
69
+ }
70
+
71
+
72
+ const ChartCellLabel &GetSourceWordLabel(size_t at) const {
73
+ return m_source[at];
74
+ }
75
+
76
+ private:
77
+ std::vector<std::vector<ChartCellBase*> > m_cells;
78
+
79
+ boost::ptr_vector<ChartCellLabel> m_source;
80
+
81
+ };
82
+
83
+ /** Hold all the chart cells for 1 input sentence. A variable of this type is held by the ChartManager
84
+ */
85
+ class ChartCellCollection : public ChartCellCollectionBase
86
+ {
87
+ public:
88
+ ChartCellCollection(const InputType &input, ChartManager &manager);
89
+
90
+ //! get a chart cell for a particular range
91
+ ChartCell &Get(const Range &coverage) {
92
+ return static_cast<ChartCell&>(MutableBase(coverage));
93
+ }
94
+
95
+ //! get a chart cell for a particular range
96
+ const ChartCell &Get(const Range &coverage) const {
97
+ return static_cast<const ChartCell&>(GetBase(coverage));
98
+ }
99
+ };
100
+
101
+ }
102
+
mosesdecoder/moses/ChartCellLabel.h ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "HypoList.h"
23
+ #include "Word.h"
24
+ #include "Range.h"
25
+ #include "ChartParserCallback.h"
26
+
27
+ namespace search
28
+ {
29
+ class Vertex;
30
+ }
31
+
32
+ namespace Moses
33
+ {
34
+
35
+ class Word;
36
+
37
+ /** Contains a range, word (non-terms?) and a vector of hypotheses.
38
+ * @todo This is probably incompatible with lattice decoding when the word that spans
39
+ * a position (or positions) can vary.
40
+ * @todo is this to hold sorted hypotheses that are in the queue for creating the next hypos?
41
+ */
42
+ class ChartCellLabel
43
+ {
44
+ public:
45
+ union Stack {
46
+ const HypoList *cube; // cube pruning
47
+ search::Vertex *incr; // incremental search after filling.
48
+ void *incr_generator; // incremental search during filling.
49
+ };
50
+
51
+
52
+ ChartCellLabel(const Range &coverage, const Word &label,
53
+ Stack stack=Stack())
54
+ : m_coverage(coverage)
55
+ , m_label(label)
56
+ , m_stack(stack)
57
+ , m_bestScore(0) {
58
+ }
59
+
60
+ const Range &GetCoverage() const {
61
+ return m_coverage;
62
+ }
63
+ const Word &GetLabel() const {
64
+ return m_label;
65
+ }
66
+ Stack GetStack() const {
67
+ return m_stack;
68
+ }
69
+ Stack &MutableStack() {
70
+ return m_stack;
71
+ }
72
+
73
+ //caching of best score on stack
74
+ float GetBestScore(const ChartParserCallback *outColl) const {
75
+ if (m_bestScore == 0) {
76
+ m_bestScore = outColl->GetBestScore(this);
77
+ }
78
+ return m_bestScore;
79
+ }
80
+
81
+ private:
82
+ const Range &m_coverage;
83
+ const Word &m_label;
84
+ //const InputPath &m_inputPath;
85
+ Stack m_stack;
86
+ mutable float m_bestScore;
87
+ };
88
+
89
+ }
mosesdecoder/moses/ChartCellLabelSet.h ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "ChartCellLabel.h"
23
+ #include "NonTerminal.h"
24
+ #include "moses/FactorCollection.h"
25
+
26
+ #include <boost/functional/hash.hpp>
27
+ #include <boost/unordered_map.hpp>
28
+ #include <boost/version.hpp>
29
+
30
+ namespace Moses
31
+ {
32
+
33
+ class ChartHypothesisCollection;
34
+
35
+ /** @todo I have no idea what's in here
36
+ */
37
+ class ChartCellLabelSet
38
+ {
39
+ private:
40
+
41
+ typedef std::vector<ChartCellLabel*> MapType;
42
+
43
+ public:
44
+ typedef MapType::const_iterator const_iterator;
45
+ typedef MapType::iterator iterator;
46
+
47
+ ChartCellLabelSet(const Range &coverage)
48
+ : m_coverage(coverage)
49
+ , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
50
+ , m_size(0) { }
51
+
52
+ ~ChartCellLabelSet() {
53
+ RemoveAllInColl(m_map);
54
+ }
55
+
56
+ // TODO: skip empty elements when iterating, or deprecate this
57
+ const_iterator begin() const {
58
+ return m_map.begin();
59
+ }
60
+ const_iterator end() const {
61
+ return m_map.end();
62
+ }
63
+
64
+ iterator mutable_begin() {
65
+ return m_map.begin();
66
+ }
67
+ iterator mutable_end() {
68
+ return m_map.end();
69
+ }
70
+
71
+ void AddWord(const Word &w) {
72
+ size_t idx = w[0]->GetId();
73
+ if (! ChartCellExists(idx)) {
74
+ m_size++;
75
+
76
+
77
+ m_map[idx] = new ChartCellLabel(m_coverage, w);
78
+ }
79
+ }
80
+
81
+ // Stack is a HypoList or whatever the search algorithm uses.
82
+ void AddConstituent(const Word &w, const HypoList *stack) {
83
+ size_t idx = w[0]->GetId();
84
+ if (ChartCellExists(idx)) {
85
+ ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
86
+ s.cube = stack;
87
+ } else {
88
+ ChartCellLabel::Stack s;
89
+ s.cube = stack;
90
+ m_size++;
91
+ m_map[idx] = new ChartCellLabel(m_coverage, w, s);
92
+ }
93
+ }
94
+
95
+ // grow vector if necessary
96
+ bool ChartCellExists(size_t idx) {
97
+ try {
98
+ if (m_map.at(idx) != NULL) {
99
+ return true;
100
+ }
101
+ } catch (const std::out_of_range& oor) {
102
+ m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
103
+ }
104
+ return false;
105
+ }
106
+
107
+ bool Empty() const {
108
+ return m_size == 0;
109
+ }
110
+
111
+ size_t GetSize() const {
112
+ return m_size;
113
+ }
114
+
115
+ const ChartCellLabel *Find(const Word &w) const {
116
+ size_t idx = w[0]->GetId();
117
+ try {
118
+ return m_map.at(idx);
119
+ } catch (const std::out_of_range& oor) {
120
+ return NULL;
121
+ }
122
+ }
123
+
124
+ const ChartCellLabel *Find(size_t idx) const {
125
+ try {
126
+ return m_map.at(idx);
127
+ } catch (const std::out_of_range& oor) {
128
+ return NULL;
129
+ }
130
+ }
131
+
132
+ ChartCellLabel::Stack &FindOrInsert(const Word &w) {
133
+ size_t idx = w[0]->GetId();
134
+ if (! ChartCellExists(idx)) {
135
+ m_size++;
136
+ m_map[idx] = new ChartCellLabel(m_coverage, w);
137
+ }
138
+ return m_map[idx]->MutableStack();
139
+ }
140
+
141
+ private:
142
+ const Range &m_coverage;
143
+ MapType m_map;
144
+ size_t m_size;
145
+ };
146
+
147
+ }
mosesdecoder/moses/ChartHypothesis.cpp ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // vim:tabstop=2
2
+ /***********************************************************************
3
+ Moses - factored phrase-based language decoder
4
+ Copyright (C) 2010 Hieu Hoang
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ ***********************************************************************/
20
+
21
+ #include <algorithm>
22
+ #include <vector>
23
+ #include "ChartHypothesis.h"
24
+ #include "RuleCubeItem.h"
25
+ #include "ChartCell.h"
26
+ #include "ChartManager.h"
27
+ #include "TargetPhrase.h"
28
+ #include "Phrase.h"
29
+ #include "StaticData.h"
30
+ #include "ChartTranslationOptions.h"
31
+ #include "moses/FF/FFState.h"
32
+ #include "moses/FF/StatefulFeatureFunction.h"
33
+ #include "moses/FF/StatelessFeatureFunction.h"
34
+
35
+ using namespace std;
36
+
37
+ namespace Moses
38
+ {
39
+
40
+ /** Create a hypothesis from a rule
41
+ * \param transOpt wrapper around the rule
42
+ * \param item @todo dunno
43
+ * \param manager reference back to manager
44
+ */
45
+ ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
46
+ const RuleCubeItem &item,
47
+ ChartManager &manager)
48
+ :m_transOpt(item.GetTranslationDimension().GetTranslationOption())
49
+ ,m_currSourceWordsRange(transOpt.GetSourceWordsRange())
50
+ ,m_ffStates(StatefulFeatureFunction::GetStatefulFeatureFunctions().size())
51
+ ,m_arcList(NULL)
52
+ ,m_winningHypo(NULL)
53
+ ,m_manager(manager)
54
+ ,m_id(manager.GetNextHypoId())
55
+ {
56
+ // underlying hypotheses for sub-spans
57
+ const std::vector<HypothesisDimension> &childEntries = item.GetHypothesisDimensions();
58
+ m_prevHypos.reserve(childEntries.size());
59
+ std::vector<HypothesisDimension>::const_iterator iter;
60
+ for (iter = childEntries.begin(); iter != childEntries.end(); ++iter) {
61
+ m_prevHypos.push_back(iter->GetHypothesis());
62
+ }
63
+ }
64
+
65
+ // Intended to be used by ChartKBestExtractor only. This creates a mock
66
+ // ChartHypothesis for use by the extractor's top-level target vertex.
67
+ ChartHypothesis::ChartHypothesis(const ChartHypothesis &pred,
68
+ const ChartKBestExtractor & /*unused*/)
69
+ :m_currSourceWordsRange(pred.m_currSourceWordsRange)
70
+ ,m_totalScore(pred.m_totalScore)
71
+ ,m_arcList(NULL)
72
+ ,m_winningHypo(NULL)
73
+ ,m_manager(pred.m_manager)
74
+ ,m_id(pred.m_manager.GetNextHypoId())
75
+ {
76
+ // One predecessor, which is an existing top-level ChartHypothesis.
77
+ m_prevHypos.push_back(&pred);
78
+ }
79
+
80
+ ChartHypothesis::~ChartHypothesis()
81
+ {
82
+ // delete feature function states
83
+ for (unsigned i = 0; i < m_ffStates.size(); ++i) {
84
+ delete m_ffStates[i];
85
+ }
86
+
87
+ // delete hypotheses that are not in the chart (recombined away)
88
+ if (m_arcList) {
89
+ ChartArcList::iterator iter;
90
+ for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
91
+ ChartHypothesis *hypo = *iter;
92
+ delete hypo;
93
+ }
94
+ m_arcList->clear();
95
+
96
+ delete m_arcList;
97
+ }
98
+ }
99
+
100
+ /** Create full output phrase that is contained in the hypothesis (and its children)
101
+ * \param outPhrase full output phrase as return argument
102
+ */
103
+ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const
104
+ {
105
+ FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor;
106
+
107
+ for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
108
+ const Word &word = GetCurrTargetPhrase().GetWord(pos);
109
+ if (word.IsNonTerminal()) {
110
+ // non-term. fill out with prev hypo
111
+ size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
112
+ const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
113
+ prevHypo->GetOutputPhrase(outPhrase);
114
+ } else {
115
+ outPhrase.AddWord(word);
116
+
117
+ if (placeholderFactor != NOT_FOUND) {
118
+ std::set<size_t> sourcePosSet = GetCurrTargetPhrase().GetAlignTerm().GetAlignmentsForTarget(pos);
119
+ if (sourcePosSet.size() == 1) {
120
+ const std::vector<const Word*> *ruleSourceFromInputPath = GetTranslationOption().GetSourceRuleFromInputPath();
121
+ UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
122
+ "No source rule");
123
+
124
+ size_t sourcePos = *sourcePosSet.begin();
125
+ const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
126
+ UTIL_THROW_IF2(sourceWord == NULL,
127
+ "No source word");
128
+ const Factor *factor = sourceWord->GetFactor(placeholderFactor);
129
+ if (factor) {
130
+ outPhrase.Back()[0] = factor;
131
+ }
132
+ }
133
+ }
134
+
135
+ }
136
+ }
137
+ }
138
+
139
+ /** Return full output phrase */
140
+ Phrase ChartHypothesis::GetOutputPhrase() const
141
+ {
142
+ Phrase outPhrase(ARRAY_SIZE_INCR);
143
+ GetOutputPhrase(outPhrase);
144
+ return outPhrase;
145
+ }
146
+
147
+ /** TODO: this method isn't used anywhere. Remove? */
148
+ void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const
149
+ {
150
+ const TargetPhrase &tp = GetCurrTargetPhrase();
151
+
152
+ size_t targetSize = tp.GetSize();
153
+ for (size_t i = 0; i < targetSize; ++i) {
154
+ size_t pos;
155
+ if (leftRightMost == 1) {
156
+ pos = i;
157
+ } else if (leftRightMost == 2) {
158
+ pos = targetSize - i - 1;
159
+ } else {
160
+ abort();
161
+ }
162
+
163
+ const Word &word = tp.GetWord(pos);
164
+
165
+ if (word.IsNonTerminal()) {
166
+ // non-term. fill out with prev hypo
167
+ size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
168
+ const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
169
+ prevHypo->GetOutputPhrase(outPhrase);
170
+ } else {
171
+ outPhrase.AddWord(word);
172
+ }
173
+
174
+ if (outPhrase.GetSize() >= numWords) {
175
+ return;
176
+ }
177
+ }
178
+ }
179
+
180
+ /** calculate total score */
181
+ void ChartHypothesis::EvaluateWhenApplied()
182
+ {
183
+ const StaticData &staticData = StaticData::Instance();
184
+
185
+ // compute values of stateless feature functions that were not
186
+ // cached in the translation option-- there is no principled distinction
187
+ const std::vector<const StatelessFeatureFunction*>& sfs =
188
+ StatelessFeatureFunction::GetStatelessFeatureFunctions();
189
+ for (unsigned i = 0; i < sfs.size(); ++i) {
190
+ if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
191
+ sfs[i]->EvaluateWhenApplied(*this,&m_currScoreBreakdown);
192
+ }
193
+ }
194
+
195
+ const std::vector<const StatefulFeatureFunction*>& ffs =
196
+ StatefulFeatureFunction::GetStatefulFeatureFunctions();
197
+ for (unsigned i = 0; i < ffs.size(); ++i) {
198
+ if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
199
+ m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_currScoreBreakdown);
200
+ }
201
+ }
202
+
203
+ // total score from current translation rule
204
+ m_totalScore = GetTranslationOption().GetScores().GetWeightedScore();
205
+ m_totalScore += m_currScoreBreakdown.GetWeightedScore();
206
+
207
+ // total scores from prev hypos
208
+ for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
209
+ const ChartHypothesis &prevHypo = **iter;
210
+ m_totalScore += prevHypo.GetFutureScore();
211
+ }
212
+ }
213
+
214
+ void ChartHypothesis::AddArc(ChartHypothesis *loserHypo)
215
+ {
216
+ if (!m_arcList) {
217
+ if (loserHypo->m_arcList) {
218
+ // we don't have an arcList, but loser does
219
+ this->m_arcList = loserHypo->m_arcList; // take ownership, we'll delete
220
+ loserHypo->m_arcList = 0; // prevent a double deletion
221
+ } else {
222
+ this->m_arcList = new ChartArcList();
223
+ }
224
+ } else {
225
+ if (loserHypo->m_arcList) {
226
+ // both have an arc list: merge. delete loser
227
+ size_t my_size = m_arcList->size();
228
+ size_t add_size = loserHypo->m_arcList->size();
229
+ this->m_arcList->resize(my_size + add_size, 0);
230
+ std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(ChartHypothesis *));
231
+ delete loserHypo->m_arcList;
232
+ loserHypo->m_arcList = 0;
233
+ } else {
234
+ // loserHypo doesn't have any arcs
235
+ // DO NOTHING
236
+ }
237
+ }
238
+ m_arcList->push_back(loserHypo);
239
+ }
240
+
241
+ // sorting helper
242
+ struct CompareChartHypothesisTotalScore {
243
+ bool operator()(const ChartHypothesis* hypo1, const ChartHypothesis* hypo2) const {
244
+ return hypo1->GetFutureScore() > hypo2->GetFutureScore();
245
+ }
246
+ };
247
+
248
+ void ChartHypothesis::CleanupArcList()
249
+ {
250
+ // point this hypo's main hypo to itself
251
+ m_winningHypo = this;
252
+
253
+ if (!m_arcList) return;
254
+
255
+ /* keep only number of arcs we need to create all n-best paths.
256
+ * However, may not be enough if only unique candidates are needed,
257
+ * so we'll keep all of arc list if nedd distinct n-best list
258
+ */
259
+ AllOptions const& opts = *StaticData::Instance().options();
260
+ size_t nBestSize = opts.nbest.nbest_size;
261
+ bool distinctNBest = (opts.nbest.only_distinct
262
+ || opts.mbr.enabled
263
+ || opts.output.NeedSearchGraph()
264
+ || !opts.output.SearchGraphHG.empty());
265
+
266
+ if (!distinctNBest && m_arcList->size() > nBestSize) {
267
+ // prune arc list only if there too many arcs
268
+ NTH_ELEMENT4(m_arcList->begin()
269
+ , m_arcList->begin() + nBestSize - 1
270
+ , m_arcList->end()
271
+ , CompareChartHypothesisTotalScore());
272
+
273
+ // delete bad ones
274
+ ChartArcList::iterator iter;
275
+ for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
276
+ ChartHypothesis *arc = *iter;
277
+ delete arc;
278
+ }
279
+ m_arcList->erase(m_arcList->begin() + nBestSize
280
+ , m_arcList->end());
281
+ }
282
+
283
+ // set all arc's main hypo variable to this hypo
284
+ ChartArcList::iterator iter = m_arcList->begin();
285
+ for (; iter != m_arcList->end() ; ++iter) {
286
+ ChartHypothesis *arc = *iter;
287
+ arc->SetWinningHypo(this);
288
+ }
289
+
290
+ //cerr << m_arcList->size() << " ";
291
+ }
292
+
293
+ void ChartHypothesis::SetWinningHypo(const ChartHypothesis *hypo)
294
+ {
295
+ m_winningHypo = hypo;
296
+ }
297
+
298
+ size_t ChartHypothesis::hash() const
299
+ {
300
+ size_t seed = 0;
301
+
302
+ // states
303
+ for (size_t i = 0; i < m_ffStates.size(); ++i) {
304
+ const FFState *state = m_ffStates[i];
305
+ size_t hash = state->hash();
306
+ boost::hash_combine(seed, hash);
307
+ }
308
+ return seed;
309
+
310
+ }
311
+
312
+ bool ChartHypothesis::operator==(const ChartHypothesis& other) const
313
+ {
314
+ // states
315
+ for (size_t i = 0; i < m_ffStates.size(); ++i) {
316
+ const FFState &thisState = *m_ffStates[i];
317
+ const FFState &otherState = *other.m_ffStates[i];
318
+ if (thisState != otherState) {
319
+ return false;
320
+ }
321
+ }
322
+ return true;
323
+ }
324
+
325
+ TO_STRING_BODY(ChartHypothesis)
326
+
327
+ // friend
328
+ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
329
+ {
330
+
331
+ out << hypo.GetId();
332
+
333
+ // recombination
334
+ if (hypo.GetWinningHypothesis() != NULL &&
335
+ hypo.GetWinningHypothesis() != &hypo) {
336
+ out << "->" << hypo.GetWinningHypothesis()->GetId();
337
+ }
338
+
339
+ if (hypo.GetManager().options()->output.include_lhs_in_search_graph) {
340
+ out << " " << hypo.GetTargetLHS() << "=>";
341
+ }
342
+ out << " " << hypo.GetCurrTargetPhrase()
343
+ //<< " " << outPhrase
344
+ << " " << hypo.GetCurrSourceRange();
345
+
346
+ HypoList::const_iterator iter;
347
+ for (iter = hypo.GetPrevHypos().begin(); iter != hypo.GetPrevHypos().end(); ++iter) {
348
+ const ChartHypothesis &prevHypo = **iter;
349
+ out << " " << prevHypo.GetId();
350
+ }
351
+
352
+ out << " [total=" << hypo.GetFutureScore() << "]";
353
+ out << " " << hypo.GetScoreBreakdown();
354
+
355
+ //out << endl;
356
+
357
+ return out;
358
+ }
359
+
360
+ }
mosesdecoder/moses/ChartHypothesis.h ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // vim:tabstop=2
2
+ /***********************************************************************
3
+ Moses - factored phrase-based language decoder
4
+ Copyright (C) 2010 Hieu Hoang
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ ***********************************************************************/
20
+
21
+ #pragma once
22
+
23
+ #include <vector>
24
+ #include <boost/scoped_ptr.hpp>
25
+ #include "Util.h"
26
+ #include "Range.h"
27
+ #include "ScoreComponentCollection.h"
28
+ #include "Phrase.h"
29
+ #include "ChartTranslationOptions.h"
30
+ #include "ObjectPool.h"
31
+
32
+ namespace Moses
33
+ {
34
+
35
+ class ChartKBestExtractor;
36
+ class ChartHypothesis;
37
+ class ChartManager;
38
+ class RuleCubeItem;
39
+ class FFState;
40
+
41
+ typedef std::vector<ChartHypothesis*> ChartArcList;
42
+
43
+ /** a hypothesis in the hierarchical/syntax decoder.
44
+ * Contain a pointer to the current target phrase, a vector of previous hypos, and some scores
45
+ */
46
+ class ChartHypothesis
47
+ {
48
+ friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
49
+ // friend class ChartKBestExtractor;
50
+
51
+ protected:
52
+
53
+ boost::shared_ptr<ChartTranslationOption> m_transOpt;
54
+
55
+ Range m_currSourceWordsRange;
56
+ std::vector<const FFState*> m_ffStates; /*! stateful feature function states */
57
+ /*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
58
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
59
+ mutable boost::scoped_ptr<ScoreComponentCollection> m_deltaScoreBreakdown;
60
+ ScoreComponentCollection m_currScoreBreakdown /*! scores for this hypothesis only */
61
+ ,m_lmNGram
62
+ ,m_lmPrefix;
63
+ float m_totalScore;
64
+
65
+ ChartArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
66
+ const ChartHypothesis *m_winningHypo;
67
+
68
+ std::vector<const ChartHypothesis*> m_prevHypos; // always sorted by source position?
69
+
70
+ ChartManager& m_manager;
71
+
72
+ unsigned m_id; /* pkoehn wants to log the order in which hypotheses were generated */
73
+
74
+ //! not implemented
75
+ ChartHypothesis();
76
+
77
+ //! not implemented
78
+ ChartHypothesis(const ChartHypothesis &copy);
79
+
80
+ public:
81
+ ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
82
+ ChartManager &manager);
83
+
84
+ //! only used by ChartKBestExtractor
85
+ ChartHypothesis(const ChartHypothesis &, const ChartKBestExtractor &);
86
+
87
+ ~ChartHypothesis();
88
+
89
+ unsigned GetId() const {
90
+ return m_id;
91
+ }
92
+
93
+ const ChartTranslationOption &GetTranslationOption() const {
94
+ return *m_transOpt;
95
+ }
96
+
97
+ //! Get the rule that created this hypothesis
98
+ const TargetPhrase &GetCurrTargetPhrase() const {
99
+ return m_transOpt->GetPhrase();
100
+ }
101
+
102
+ //! the source range that this hypothesis spans
103
+ const Range &GetCurrSourceRange() const {
104
+ return m_currSourceWordsRange;
105
+ }
106
+
107
+ //! the arc list when creating n-best lists
108
+ inline const ChartArcList* GetArcList() const {
109
+ return m_arcList;
110
+ }
111
+
112
+ //! the feature function states for a particular feature \param featureID
113
+ inline const FFState* GetFFState( size_t featureID ) const {
114
+ return m_ffStates[ featureID ];
115
+ }
116
+
117
+ //! reference back to the manager
118
+ inline const ChartManager& GetManager() const {
119
+ return m_manager;
120
+ }
121
+
122
+ void GetOutputPhrase(Phrase &outPhrase) const;
123
+ Phrase GetOutputPhrase() const;
124
+
125
+ // get leftmost/rightmost words only
126
+ // leftRightMost: 1=left, 2=right
127
+ void GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const;
128
+
129
+ void EvaluateWhenApplied();
130
+
131
+ void AddArc(ChartHypothesis *loserHypo);
132
+ void CleanupArcList();
133
+ void SetWinningHypo(const ChartHypothesis *hypo);
134
+
135
+ //! get the unweighted score for each feature function
136
+ const ScoreComponentCollection &GetScoreBreakdown() const {
137
+ // Note: never call this method before m_currScoreBreakdown is fully computed
138
+ if (!m_scoreBreakdown.get()) {
139
+ m_scoreBreakdown.reset(new ScoreComponentCollection());
140
+ // score breakdown from current translation rule
141
+ if (m_transOpt) {
142
+ m_scoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
143
+ }
144
+ m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
145
+ // score breakdowns from prev hypos
146
+ for (std::vector<const ChartHypothesis*>::const_iterator iter = m_prevHypos.begin(); iter != m_prevHypos.end(); ++iter) {
147
+ const ChartHypothesis &prevHypo = **iter;
148
+ m_scoreBreakdown->PlusEquals(prevHypo.GetScoreBreakdown());
149
+ }
150
+ }
151
+ return *(m_scoreBreakdown.get());
152
+ }
153
+
154
+ //! get the unweighted score delta for each feature function
155
+ const ScoreComponentCollection &GetDeltaScoreBreakdown() const {
156
+ // Note: never call this method before m_currScoreBreakdown is fully computed
157
+ if (!m_deltaScoreBreakdown.get()) {
158
+ m_deltaScoreBreakdown.reset(new ScoreComponentCollection());
159
+ // score breakdown from current translation rule
160
+ if (m_transOpt) {
161
+ m_deltaScoreBreakdown->PlusEquals(GetTranslationOption().GetScores());
162
+ }
163
+ m_deltaScoreBreakdown->PlusEquals(m_currScoreBreakdown);
164
+ // delta: score breakdowns from prev hypos _not_ added
165
+ }
166
+ return *(m_deltaScoreBreakdown.get());
167
+ }
168
+
169
+ //! Get the weighted total score
170
+ float GetFutureScore() const {
171
+ // scores from current translation rule. eg. translation models & word penalty
172
+ return m_totalScore;
173
+ }
174
+
175
+ //! vector of previous hypotheses this hypo is built on
176
+ const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
177
+ return m_prevHypos;
178
+ }
179
+
180
+ //! get a particular previous hypos
181
+ const ChartHypothesis* GetPrevHypo(size_t pos) const {
182
+ return m_prevHypos[pos];
183
+ }
184
+
185
+ //! get the constituency label that covers this hypo
186
+ const Word &GetTargetLHS() const {
187
+ return GetCurrTargetPhrase().GetTargetLHS();
188
+ }
189
+
190
+ //! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
191
+ const ChartHypothesis* GetWinningHypothesis() const {
192
+ return m_winningHypo;
193
+ }
194
+
195
+ // for unordered_set in stack
196
+ size_t hash() const;
197
+ bool operator==(const ChartHypothesis& other) const;
198
+
199
+ TO_STRING();
200
+
201
+ }; // class ChartHypothesis
202
+
203
+ }
204
+
mosesdecoder/moses/ChartHypothesisCollection.cpp ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <algorithm>
23
+ #include "StaticData.h"
24
+ #include "ChartHypothesisCollection.h"
25
+ #include "ChartHypothesis.h"
26
+ #include "ChartManager.h"
27
+ #include "HypergraphOutput.h"
28
+ #include "util/exception.hh"
29
+ #include "parameters/AllOptions.h"
30
+
31
+ using namespace std;
32
+ using namespace Moses;
33
+
34
+ namespace Moses
35
+ {
36
+
37
+ ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts)
38
+ {
39
+ // const StaticData &staticData = StaticData::Instance();
40
+
41
+ m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth();
42
+ m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size;
43
+ m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled;
44
+ m_bestScore = -std::numeric_limits<float>::infinity();
45
+ }
46
+
47
+ ChartHypothesisCollection::~ChartHypothesisCollection()
48
+ {
49
+ HCType::iterator iter;
50
+ for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
51
+ ChartHypothesis *hypo = *iter;
52
+ delete hypo;
53
+ }
54
+ //RemoveAllInColl(m_hypos);
55
+ }
56
+
57
+ /** public function to add hypothesis to this collection.
58
+ * Returns false if equiv hypo exists in collection, otherwise returns true.
59
+ * Takes care of update arc list for n-best list creation.
60
+ * Will delete hypo if it exists - once this function is call don't delete hypothesis.
61
+ * \param hypo hypothesis to add
62
+ * \param manager pointer back to manager
63
+ */
64
+ bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManager &manager)
65
+ {
66
+ if (hypo->GetFutureScore() == - std::numeric_limits<float>::infinity()) {
67
+ manager.GetSentenceStats().AddDiscarded();
68
+ VERBOSE(3,"discarded, -inf score" << std::endl);
69
+ delete hypo;
70
+ return false;
71
+ }
72
+
73
+ if (hypo->GetFutureScore() < m_bestScore + m_beamWidth) {
74
+ // really bad score. don't bother adding hypo into collection
75
+ manager.GetSentenceStats().AddDiscarded();
76
+ VERBOSE(3,"discarded, too bad for stack" << std::endl);
77
+ delete hypo;
78
+ return false;
79
+ }
80
+
81
+ // over threshold, try to add to collection
82
+ std::pair<HCType::iterator, bool> addRet = Add(hypo, manager);
83
+
84
+ // does it have the same state as an existing hypothesis?
85
+ if (addRet.second) {
86
+ // nothing found. add to collection
87
+ return true;
88
+ }
89
+
90
+ // equiv hypo exists, recombine with other hypo
91
+ HCType::iterator &iterExisting = addRet.first;
92
+ ChartHypothesis *hypoExisting = *iterExisting;
93
+ UTIL_THROW_IF2(iterExisting == m_hypos.end(),
94
+ "Adding a hypothesis should have returned a valid iterator");
95
+
96
+ //StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
97
+
98
+ // found existing hypo with same target ending.
99
+ // keep the best 1
100
+ if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
101
+ // incoming hypo is better than the one we have
102
+ VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
103
+ if (m_nBestIsEnabled) {
104
+ hypo->AddArc(hypoExisting);
105
+ Detach(iterExisting);
106
+ } else {
107
+ Remove(iterExisting);
108
+ }
109
+
110
+ bool added = Add(hypo, manager).second;
111
+ if (!added) {
112
+ iterExisting = m_hypos.find(hypo);
113
+ UTIL_THROW2("Offending hypo = " << **iterExisting);
114
+ }
115
+ return false;
116
+ } else {
117
+ // already storing the best hypo. discard current hypo
118
+ VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
119
+ if (m_nBestIsEnabled) {
120
+ hypoExisting->AddArc(hypo);
121
+ } else {
122
+ delete hypo;
123
+ }
124
+ return false;
125
+ }
126
+ }
127
+
128
+ /** add hypothesis to stack. Prune if necessary.
129
+ * Returns false if equiv hypo exists in collection, otherwise returns true, and the iterator that points to the place where the hypo was added
130
+ * \param hypo hypothesis to add
131
+ * \param manager pointer back to manager
132
+ */
133
+ pair<ChartHypothesisCollection::HCType::iterator, bool> ChartHypothesisCollection::Add(ChartHypothesis *hypo, ChartManager &manager)
134
+ {
135
+ std::pair<HCType::iterator, bool> ret = m_hypos.insert(hypo);
136
+ if (ret.second) {
137
+ // equiv hypo doesn't exists
138
+ VERBOSE(3,"added hyp to stack");
139
+
140
+ // Update best score, if this hypothesis is new best
141
+ if (hypo->GetFutureScore() > m_bestScore) {
142
+ VERBOSE(3,", best on stack");
143
+ m_bestScore = hypo->GetFutureScore();
144
+ }
145
+
146
+ // Prune only if stack is twice as big as needed (lazy pruning)
147
+ VERBOSE(3,", now size " << m_hypos.size());
148
+ if (m_hypos.size() > 2*m_maxHypoStackSize-1) {
149
+ PruneToSize(manager);
150
+ } else {
151
+ VERBOSE(3,std::endl);
152
+ }
153
+ }
154
+
155
+ return ret;
156
+ }
157
+
158
+ /** Remove hypothesis pointed to by iterator but DOES NOT delete the object.
159
+ * \param iter iterator to delete
160
+ */
161
+ void ChartHypothesisCollection::Detach(const HCType::iterator &iter)
162
+ {
163
+ m_hypos.erase(iter);
164
+ }
165
+
166
+ /** destroy iterator AND hypothesis pointed to by iterator. If in an object pool, takes care of that too
167
+ */
168
+ void ChartHypothesisCollection::Remove(const HCType::iterator &iter)
169
+ {
170
+ ChartHypothesis *h = *iter;
171
+ Detach(iter);
172
+ delete h;
173
+ }
174
+
175
+ /** prune number of hypo to a particular number of hypos, specified by m_maxHypoStackSize, according to score
176
+ * Don't prune of hypos have identical scores on the boundary, so occasionally number of hypo can remain above m_maxHypoStackSize.
177
+ * \param manager reference back to manager. Used for collecting stats
178
+ */
179
+ void ChartHypothesisCollection::PruneToSize(ChartManager &manager)
180
+ {
181
+ if (m_maxHypoStackSize == 0) return; // no limit
182
+
183
+ if (GetSize() > m_maxHypoStackSize) { // ok, if not over the limit
184
+ priority_queue<float> bestScores;
185
+
186
+ // push all scores to a heap
187
+ // (but never push scores below m_bestScore+m_beamWidth)
188
+ HCType::iterator iter = m_hypos.begin();
189
+ float score = 0;
190
+ while (iter != m_hypos.end()) {
191
+ ChartHypothesis *hypo = *iter;
192
+ score = hypo->GetFutureScore();
193
+ if (score > m_bestScore+m_beamWidth) {
194
+ bestScores.push(score);
195
+ }
196
+ ++iter;
197
+ }
198
+
199
+ // pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)
200
+ // ensure to never pop beyond heap size
201
+ size_t minNewSizeHeapSize = m_maxHypoStackSize > bestScores.size() ? bestScores.size() : m_maxHypoStackSize;
202
+ for (size_t i = 1 ; i < minNewSizeHeapSize ; i++)
203
+ bestScores.pop();
204
+
205
+ // and remember the threshold
206
+ float scoreThreshold = bestScores.top();
207
+
208
+ // delete all hypos under score threshold
209
+ iter = m_hypos.begin();
210
+ while (iter != m_hypos.end()) {
211
+ ChartHypothesis *hypo = *iter;
212
+ float score = hypo->GetFutureScore();
213
+ if (score < scoreThreshold) {
214
+ HCType::iterator iterRemove = iter++;
215
+ Remove(iterRemove);
216
+ manager.GetSentenceStats().AddPruning();
217
+ } else {
218
+ ++iter;
219
+ }
220
+ }
221
+ VERBOSE(3,", pruned to size " << m_hypos.size() << endl);
222
+
223
+ IFVERBOSE(3) {
224
+ TRACE_ERR("stack now contains: ");
225
+ for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) {
226
+ ChartHypothesis *hypo = *iter;
227
+ TRACE_ERR( hypo->GetId() << " (" << hypo->GetFutureScore() << ") ");
228
+ }
229
+ TRACE_ERR( endl);
230
+ }
231
+
232
+ // desperation pruning
233
+ if (m_hypos.size() > m_maxHypoStackSize * 2) {
234
+ std::vector<ChartHypothesis*> hyposOrdered;
235
+
236
+ // sort hypos
237
+ std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(hyposOrdered, hyposOrdered.end()));
238
+ std::sort(hyposOrdered.begin(), hyposOrdered.end(), ChartHypothesisScoreOrderer());
239
+
240
+ //keep only |size|. delete the rest
241
+ std::vector<ChartHypothesis*>::iterator iter;
242
+ for (iter = hyposOrdered.begin() + (m_maxHypoStackSize * 2); iter != hyposOrdered.end(); ++iter) {
243
+ ChartHypothesis *hypo = *iter;
244
+ HCType::iterator iterFindHypo = m_hypos.find(hypo);
245
+ UTIL_THROW_IF2(iterFindHypo == m_hypos.end(),
246
+ "Adding a hypothesis should have returned a valid iterator");
247
+
248
+ Remove(iterFindHypo);
249
+ }
250
+ }
251
+ }
252
+ }
253
+
254
+ //! sort hypothses by descending score. Put these hypos into a vector m_hyposOrdered to be returned by function GetSortedHypotheses()
255
+ void ChartHypothesisCollection::SortHypotheses()
256
+ {
257
+ UTIL_THROW_IF2(!m_hyposOrdered.empty(), "Hypotheses already sorted");
258
+ if (!m_hypos.empty()) {
259
+ // done everything for this cell.
260
+ // sort
261
+ // put into vec
262
+ m_hyposOrdered.reserve(m_hypos.size());
263
+ std::copy(m_hypos.begin(), m_hypos.end(), back_inserter(m_hyposOrdered));
264
+ std::sort(m_hyposOrdered.begin(), m_hyposOrdered.end(), ChartHypothesisScoreOrderer());
265
+ }
266
+ }
267
+
268
+ //! Call CleanupArcList() for each main hypo in collection
269
+ void ChartHypothesisCollection::CleanupArcList()
270
+ {
271
+ HCType::iterator iter;
272
+ for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
273
+ ChartHypothesis *mainHypo = *iter;
274
+ mainHypo->CleanupArcList();
275
+ }
276
+ }
277
+
278
+ /** Return all hypos, and all hypos in the arclist, in order to create the output searchgraph, ie. the hypergraph. The output is the debug hypo information.
279
+ * @todo this is a useful function. Make sure it outputs everything required, especially scores.
280
+ * \param translationId unique, contiguous id for the input sentence
281
+ * \param outputSearchGraphStream stream to output the info to
282
+ * \param reachable @todo don't know
283
+ */
284
+ void ChartHypothesisCollection::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
285
+ {
286
+ writer.WriteHypos(*this,reachable);
287
+ }
288
+
289
+ std::ostream& operator<<(std::ostream &out, const ChartHypothesisCollection &coll)
290
+ {
291
+ HypoList::const_iterator iterInside;
292
+ for (iterInside = coll.m_hyposOrdered.begin(); iterInside != coll.m_hyposOrdered.end(); ++iterInside) {
293
+ const ChartHypothesis &hypo = **iterInside;
294
+ out << hypo << endl;
295
+ }
296
+
297
+ return out;
298
+ }
299
+
300
+
301
+ } // namespace
mosesdecoder/moses/ChartHypothesisCollection.h ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+ #pragma once
22
+
23
+ #include <set>
24
+ #include "ChartHypothesis.h"
25
+ #include "RuleCube.h"
26
+
27
+
28
+ namespace Moses
29
+ {
30
+
31
+ class ChartSearchGraphWriter;
32
+ struct AllOptions;
33
+
34
+ //! functor to compare (chart) hypotheses by (descending) score
35
+ class ChartHypothesisScoreOrderer
36
+ {
37
+ public:
38
+ bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const {
39
+ return hypoA->GetFutureScore() > hypoB->GetFutureScore();
40
+ }
41
+ };
42
+
43
+ /** Contains a set of unique hypos that have the same HS non-term.
44
+ * ie. 1 of these for each target LHS in each cell
45
+ */
46
+ class ChartHypothesisCollection
47
+ {
48
+ friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&);
49
+
50
+ protected:
51
+ //typedef std::set<ChartHypothesis*, ChartHypothesisRecombinationOrderer> HCType;
52
+ typedef boost::unordered_set< ChartHypothesis*, UnorderedComparer<ChartHypothesis>, UnorderedComparer<ChartHypothesis> > HCType;
53
+ HCType m_hypos;
54
+ HypoList m_hyposOrdered;
55
+
56
+ float m_bestScore; /**< score of the best hypothesis in collection */
57
+ float m_beamWidth; /**< minimum score due to threashold pruning */
58
+ size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
59
+ bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
60
+
61
+ std::pair<HCType::iterator, bool> Add(ChartHypothesis *hypo, ChartManager &manager);
62
+
63
+ public:
64
+ typedef HCType::iterator iterator;
65
+ typedef HCType::const_iterator const_iterator;
66
+ //! iterators
67
+ const_iterator begin() const {
68
+ return m_hypos.begin();
69
+ }
70
+ const_iterator end() const {
71
+ return m_hypos.end();
72
+ }
73
+
74
+ ChartHypothesisCollection(AllOptions const& opts);
75
+ ~ChartHypothesisCollection();
76
+ bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);
77
+
78
+ void Detach(const HCType::iterator &iter);
79
+ void Remove(const HCType::iterator &iter);
80
+
81
+ void PruneToSize(ChartManager &manager);
82
+
83
+ size_t GetSize() const {
84
+ return m_hypos.size();
85
+ }
86
+ size_t GetHypo() const {
87
+ return m_hypos.size();
88
+ }
89
+
90
+ void SortHypotheses();
91
+ void CleanupArcList();
92
+
93
+ //! return vector of hypothesis that has been sorted by score
94
+ const HypoList &GetSortedHypotheses() const {
95
+ return m_hyposOrdered;
96
+ }
97
+
98
+ //! return the best total score of all hypos in this collection
99
+ float GetBestScore() const {
100
+ return m_bestScore;
101
+ }
102
+
103
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
104
+
105
+ };
106
+
107
+ } // namespace
108
+
mosesdecoder/moses/ChartKBestExtractor.cpp ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2014 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include "ChartKBestExtractor.h"
21
+
22
+ #include "ChartHypothesis.h"
23
+ #include "ScoreComponentCollection.h"
24
+ #include "StaticData.h"
25
+
26
+ #include <boost/scoped_ptr.hpp>
27
+
28
+ #include <vector>
29
+
30
+ using namespace std;
31
+
32
+ namespace Moses
33
+ {
34
+
35
+ // Extract the k-best list from the search graph.
36
+ void ChartKBestExtractor::Extract(
37
+ const std::vector<const ChartHypothesis*> &topLevelHypos, std::size_t k,
38
+ KBestVec &kBestList)
39
+ {
40
+ kBestList.clear();
41
+ if (topLevelHypos.empty()) {
42
+ return;
43
+ }
44
+
45
+ // Create a new ChartHypothesis object, supremeHypo, that has the best
46
+ // top-level hypothesis as its predecessor and has the same score.
47
+ std::vector<const ChartHypothesis*>::const_iterator p = topLevelHypos.begin();
48
+ const ChartHypothesis &bestTopLevelHypo = **p;
49
+ boost::scoped_ptr<ChartHypothesis> supremeHypo(
50
+ new ChartHypothesis(bestTopLevelHypo, *this));
51
+
52
+ // Do the same for each alternative top-level hypothesis, but add the new
53
+ // ChartHypothesis objects as arcs from supremeHypo, as if they had been
54
+ // recombined.
55
+ for (++p; p != topLevelHypos.end(); ++p) {
56
+ // Check that the first item in topLevelHypos really was the best.
57
+ UTIL_THROW_IF2((*p)->GetFutureScore() > bestTopLevelHypo.GetFutureScore(),
58
+ "top-level hypotheses are not correctly sorted");
59
+ // Note: there's no need for a smart pointer here: supremeHypo will take
60
+ // ownership of altHypo.
61
+ ChartHypothesis *altHypo = new ChartHypothesis(**p, *this);
62
+ supremeHypo->AddArc(altHypo);
63
+ }
64
+
65
+ // Create the target vertex then lazily fill its k-best list.
66
+ boost::shared_ptr<Vertex> targetVertex = FindOrCreateVertex(*supremeHypo);
67
+ LazyKthBest(*targetVertex, k, k);
68
+
69
+ // Copy the k-best list from the target vertex, but drop the top edge from
70
+ // each derivation.
71
+ kBestList.reserve(targetVertex->kBestList.size());
72
+ for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
73
+ q = targetVertex->kBestList.begin();
74
+ q != targetVertex->kBestList.end(); ++q) {
75
+ const boost::shared_ptr<Derivation> d(*q);
76
+ assert(d);
77
+ assert(d->subderivations.size() == 1);
78
+ kBestList.push_back(d->subderivations[0]);
79
+ }
80
+ }
81
+
82
+ // Generate the target-side yield of the derivation d.
83
+ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
84
+ {
85
+ FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor;
86
+
87
+ Phrase ret(ARRAY_SIZE_INCR);
88
+
89
+ const ChartHypothesis &hypo = d.edge.head->hypothesis;
90
+ const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
91
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
92
+ phrase.GetAlignNonTerm().GetNonTermIndexMap();
93
+ for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
94
+ const Word &word = phrase.GetWord(pos);
95
+ if (word.IsNonTerminal()) {
96
+ std::size_t nonTermInd = nonTermIndexMap[pos];
97
+ const Derivation &subderivation = *d.subderivations[nonTermInd];
98
+ Phrase subPhrase = GetOutputPhrase(subderivation);
99
+ ret.Append(subPhrase);
100
+ } else {
101
+ ret.AddWord(word);
102
+ if (placeholderFactor == NOT_FOUND) {
103
+ continue;
104
+ }
105
+ std::set<std::size_t> sourcePosSet =
106
+ phrase.GetAlignTerm().GetAlignmentsForTarget(pos);
107
+ if (sourcePosSet.size() == 1) {
108
+ const std::vector<const Word*> *ruleSourceFromInputPath =
109
+ hypo.GetTranslationOption().GetSourceRuleFromInputPath();
110
+ UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
111
+ "Source Words in of the rules hasn't been filled out");
112
+ std::size_t sourcePos = *sourcePosSet.begin();
113
+ const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
114
+ UTIL_THROW_IF2(sourceWord == NULL,
115
+ "Null source word at position " << sourcePos);
116
+ const Factor *factor = sourceWord->GetFactor(placeholderFactor);
117
+ if (factor) {
118
+ ret.Back()[0] = factor;
119
+ }
120
+ }
121
+ }
122
+ }
123
+
124
+ return ret;
125
+ }
126
+
127
+ // Generate the score breakdown of the derivation d.
128
+ boost::shared_ptr<ScoreComponentCollection>
129
+ ChartKBestExtractor::GetOutputScoreBreakdown(const Derivation &d)
130
+ {
131
+ const ChartHypothesis &hypo = d.edge.head->hypothesis;
132
+ boost::shared_ptr<ScoreComponentCollection> scoreBreakdown(new ScoreComponentCollection());
133
+ scoreBreakdown->PlusEquals(hypo.GetDeltaScoreBreakdown());
134
+ const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
135
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
136
+ phrase.GetAlignNonTerm().GetNonTermIndexMap();
137
+ for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
138
+ const Word &word = phrase.GetWord(pos);
139
+ if (word.IsNonTerminal()) {
140
+ std::size_t nonTermInd = nonTermIndexMap[pos];
141
+ const Derivation &subderivation = *d.subderivations[nonTermInd];
142
+ scoreBreakdown->PlusEquals(*GetOutputScoreBreakdown(subderivation));
143
+ }
144
+ }
145
+
146
+ return scoreBreakdown;
147
+ }
148
+
149
+ // Generate the target tree of the derivation d.
150
+ TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
151
+ {
152
+ const ChartHypothesis &hypo = d.edge.head->hypothesis;
153
+ const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
154
+ if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
155
+ const std::string *tree = property->GetValueString();
156
+ TreePointer mytree (boost::make_shared<InternalTree>(*tree));
157
+
158
+ //get subtrees (in target order)
159
+ std::vector<TreePointer> previous_trees;
160
+ for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
161
+ const Word &word = phrase.GetWord(pos);
162
+ if (word.IsNonTerminal()) {
163
+ size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos];
164
+ const Derivation &subderivation = *d.subderivations[nonTermInd];
165
+ const TreePointer prev_tree = GetOutputTree(subderivation);
166
+ previous_trees.push_back(prev_tree);
167
+ }
168
+ }
169
+
170
+ mytree->Combine(previous_trees);
171
+ mytree->Unbinarize();
172
+ return mytree;
173
+ } else {
174
+ UTIL_THROW2("Error: k-best tree output active, but no internal tree structure found");
175
+ }
176
+ }
177
+
178
+ // Create an unweighted hyperarc corresponding to the given ChartHypothesis.
179
+ ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
180
+ const ChartHypothesis &h)
181
+ {
182
+ UnweightedHyperarc edge;
183
+ edge.head = FindOrCreateVertex(h);
184
+ const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
185
+ edge.tail.resize(prevHypos.size());
186
+ for (std::size_t i = 0; i < prevHypos.size(); ++i) {
187
+ const ChartHypothesis *prevHypo = prevHypos[i];
188
+ edge.tail[i] = FindOrCreateVertex(*prevHypo);
189
+ }
190
+ return edge;
191
+ }
192
+
193
+ // Look for the vertex corresponding to a given ChartHypothesis, creating
194
+ // a new one if necessary.
195
+ boost::shared_ptr<ChartKBestExtractor::Vertex>
196
+ ChartKBestExtractor::FindOrCreateVertex(const ChartHypothesis &h)
197
+ {
198
+ VertexMap::value_type element(&h, boost::shared_ptr<Vertex>());
199
+ std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element);
200
+ boost::shared_ptr<Vertex> &sp = p.first->second;
201
+ if (!p.second) {
202
+ return sp; // Vertex was already in m_vertexMap.
203
+ }
204
+ sp.reset(new Vertex(h));
205
+ // Create the 1-best derivation and add it to the vertex's kBestList.
206
+ UnweightedHyperarc bestEdge;
207
+ bestEdge.head = sp;
208
+ const std::vector<const ChartHypothesis*> &prevHypos = h.GetPrevHypos();
209
+ bestEdge.tail.resize(prevHypos.size());
210
+ for (std::size_t i = 0; i < prevHypos.size(); ++i) {
211
+ const ChartHypothesis *prevHypo = prevHypos[i];
212
+ bestEdge.tail[i] = FindOrCreateVertex(*prevHypo);
213
+ }
214
+ boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
215
+ #ifndef NDEBUG
216
+ std::pair<DerivationSet::iterator, bool> q =
217
+ #endif
218
+ m_derivations.insert(bestDerivation);
219
+ assert(q.second);
220
+ sp->kBestList.push_back(bestDerivation);
221
+ return sp;
222
+ }
223
+
224
+ // Create the 1-best derivation for each edge in BS(v) (except the best one)
225
+ // and add it to v's candidate queue.
226
+ void ChartKBestExtractor::GetCandidates(Vertex &v, std::size_t k)
227
+ {
228
+ // Create derivations for all of v's incoming edges except the best. This
229
+ // means everything in v.hypothesis.GetArcList() and not the edge defined
230
+ // by v.hypothesis itself. The 1-best derivation for that edge will already
231
+ // have been created.
232
+ const ChartArcList *arcList = v.hypothesis.GetArcList();
233
+ if (arcList) {
234
+ for (std::size_t i = 0; i < arcList->size(); ++i) {
235
+ const ChartHypothesis &recombinedHypo = *(*arcList)[i];
236
+ boost::shared_ptr<Vertex> w = FindOrCreateVertex(recombinedHypo);
237
+ assert(w->kBestList.size() == 1);
238
+ v.candidates.push(w->kBestList[0]);
239
+ }
240
+ }
241
+ }
242
+
243
+ // Lazily fill v's k-best list.
244
+ void ChartKBestExtractor::LazyKthBest(Vertex &v, std::size_t k,
245
+ std::size_t globalK)
246
+ {
247
+ // If this is the first visit to vertex v then initialize the priority queue.
248
+ if (v.visited == false) {
249
+ // The 1-best derivation should already be in v's k-best list.
250
+ assert(v.kBestList.size() == 1);
251
+ // Initialize v's priority queue.
252
+ GetCandidates(v, globalK);
253
+ v.visited = true;
254
+ }
255
+ // Add derivations to the k-best list until it contains k or there are none
256
+ // left to add.
257
+ while (v.kBestList.size() < k) {
258
+ assert(!v.kBestList.empty());
259
+ // Update the priority queue by adding the successors of the last
260
+ // derivation (unless they've been seen before).
261
+ boost::shared_ptr<Derivation> d(v.kBestList.back());
262
+ LazyNext(v, *d, globalK);
263
+ // Check if there are any derivations left in the queue.
264
+ if (v.candidates.empty()) {
265
+ break;
266
+ }
267
+ // Get the next best derivation and delete it from the queue.
268
+ boost::weak_ptr<Derivation> next = v.candidates.top();
269
+ v.candidates.pop();
270
+ // Add it to the k-best list.
271
+ v.kBestList.push_back(next);
272
+ }
273
+ }
274
+
275
+ // Create the neighbours of Derivation d and add them to v's candidate queue.
276
+ void ChartKBestExtractor::LazyNext(Vertex &v, const Derivation &d,
277
+ std::size_t globalK)
278
+ {
279
+ for (std::size_t i = 0; i < d.edge.tail.size(); ++i) {
280
+ Vertex &pred = *d.edge.tail[i];
281
+ // Ensure that pred's k-best list contains enough derivations.
282
+ std::size_t k = d.backPointers[i] + 2;
283
+ LazyKthBest(pred, k, globalK);
284
+ if (pred.kBestList.size() < k) {
285
+ // pred's derivations have been exhausted.
286
+ continue;
287
+ }
288
+ // Create the neighbour.
289
+ boost::shared_ptr<Derivation> next(new Derivation(d, i));
290
+ // Check if it has been created before.
291
+ std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
292
+ if (p.second) {
293
+ v.candidates.push(next); // Haven't previously seen it.
294
+ }
295
+ }
296
+ }
297
+
298
+ // Construct the 1-best Derivation that ends at edge e.
299
+ ChartKBestExtractor::Derivation::Derivation(const UnweightedHyperarc &e)
300
+ {
301
+ edge = e;
302
+ std::size_t arity = edge.tail.size();
303
+ backPointers.resize(arity, 0);
304
+ subderivations.reserve(arity);
305
+ for (std::size_t i = 0; i < arity; ++i) {
306
+ const Vertex &pred = *edge.tail[i];
307
+ assert(pred.kBestList.size() >= 1);
308
+ boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
309
+ subderivations.push_back(sub);
310
+ }
311
+ score = edge.head->hypothesis.GetFutureScore();
312
+ }
313
+
314
+ // Construct a Derivation that neighbours an existing Derivation.
315
+ ChartKBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
316
+ {
317
+ edge.head = d.edge.head;
318
+ edge.tail = d.edge.tail;
319
+ backPointers = d.backPointers;
320
+ subderivations = d.subderivations;
321
+ std::size_t j = ++backPointers[i];
322
+ score = d.score;
323
+ // Deduct the score of the old subderivation.
324
+ score -= subderivations[i]->score;
325
+ // Update the subderivation pointer.
326
+ boost::shared_ptr<Derivation> newSub(edge.tail[i]->kBestList[j]);
327
+ subderivations[i] = newSub;
328
+ // Add the score of the new subderivation.
329
+ score += subderivations[i]->score;
330
+ }
331
+
332
+ } // namespace Moses
mosesdecoder/moses/ChartKBestExtractor.h ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2014 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include <cassert>
23
+ #include "ChartHypothesis.h"
24
+ #include "ScoreComponentCollection.h"
25
+ #include "FF/InternalTree.h"
26
+
27
+ #include <boost/unordered_set.hpp>
28
+ #include <boost/weak_ptr.hpp>
29
+ #include <boost/shared_ptr.hpp>
30
+
31
+ #include <queue>
32
+ #include <vector>
33
+
34
+ namespace Moses
35
+ {
36
+
37
+ // k-best list extractor that implements algorithm 3 from this paper:
38
+ //
39
+ // Liang Huang and David Chiang
40
+ // "Better k-best parsing"
41
+ // In Proceedings of IWPT 2005
42
+ //
43
+ class ChartKBestExtractor
44
+ {
45
+ public:
46
+ struct Vertex;
47
+
48
+ struct UnweightedHyperarc {
49
+ boost::shared_ptr<Vertex> head;
50
+ std::vector<boost::shared_ptr<Vertex> > tail;
51
+ };
52
+
53
+ struct Derivation {
54
+ Derivation(const UnweightedHyperarc &);
55
+ Derivation(const Derivation &, std::size_t);
56
+
57
+ UnweightedHyperarc edge;
58
+ std::vector<std::size_t> backPointers;
59
+ std::vector<boost::shared_ptr<Derivation> > subderivations;
60
+ float score;
61
+ };
62
+
63
+ struct DerivationOrderer {
64
+ bool operator()(const boost::weak_ptr<Derivation> &d1,
65
+ const boost::weak_ptr<Derivation> &d2) const {
66
+ boost::shared_ptr<Derivation> s1(d1);
67
+ boost::shared_ptr<Derivation> s2(d2);
68
+ return s1->score < s2->score;
69
+ }
70
+ };
71
+
72
+ struct Vertex {
73
+ typedef std::priority_queue<boost::weak_ptr<Derivation>,
74
+ std::vector<boost::weak_ptr<Derivation> >,
75
+ DerivationOrderer> DerivationQueue;
76
+
77
+ Vertex(const ChartHypothesis &h) : hypothesis(h), visited(false) {}
78
+
79
+ const ChartHypothesis &hypothesis;
80
+ std::vector<boost::weak_ptr<Derivation> > kBestList;
81
+ DerivationQueue candidates;
82
+ bool visited;
83
+ };
84
+
85
+ typedef std::vector<boost::shared_ptr<Derivation> > KBestVec;
86
+
87
+ // Extract the k-best list from the search hypergraph given the full, sorted
88
+ // list of top-level vertices.
89
+ void Extract(const std::vector<const ChartHypothesis*> &topHypos,
90
+ std::size_t k, KBestVec &);
91
+
92
+ static Phrase GetOutputPhrase(const Derivation &);
93
+ static boost::shared_ptr<ScoreComponentCollection> GetOutputScoreBreakdown(const Derivation &);
94
+ static TreePointer GetOutputTree(const Derivation &);
95
+
96
+ private:
97
+ typedef boost::unordered_map<const ChartHypothesis *,
98
+ boost::shared_ptr<Vertex> > VertexMap;
99
+
100
+ struct DerivationHasher {
101
+ std::size_t operator()(const boost::shared_ptr<Derivation> &d) const {
102
+ std::size_t seed = 0;
103
+ boost::hash_combine(seed, d->edge.head);
104
+ boost::hash_combine(seed, d->edge.tail);
105
+ boost::hash_combine(seed, d->backPointers);
106
+ return seed;
107
+ }
108
+ };
109
+
110
+ struct DerivationEqualityPred {
111
+ bool operator()(const boost::shared_ptr<Derivation> &d1,
112
+ const boost::shared_ptr<Derivation> &d2) const {
113
+ return d1->edge.head == d2->edge.head &&
114
+ d1->edge.tail == d2->edge.tail &&
115
+ d1->backPointers == d2->backPointers;
116
+ }
117
+ };
118
+
119
+ typedef boost::unordered_set<boost::shared_ptr<Derivation>, DerivationHasher,
120
+ DerivationEqualityPred> DerivationSet;
121
+
122
+ UnweightedHyperarc CreateEdge(const ChartHypothesis &);
123
+ boost::shared_ptr<Vertex> FindOrCreateVertex(const ChartHypothesis &);
124
+ void GetCandidates(Vertex &, std::size_t);
125
+ void LazyKthBest(Vertex &, std::size_t, std::size_t);
126
+ void LazyNext(Vertex &, const Derivation &, std::size_t);
127
+
128
+ VertexMap m_vertexMap;
129
+ DerivationSet m_derivations;
130
+ };
131
+
132
+ } // namespace Moses
mosesdecoder/moses/ChartManager.cpp ADDED
@@ -0,0 +1,867 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <cstdio>
23
+ #include "ChartManager.h"
24
+ #include "ChartCell.h"
25
+ #include "ChartHypothesis.h"
26
+ #include "ChartKBestExtractor.h"
27
+ #include "ChartTranslationOptions.h"
28
+ #include "HypergraphOutput.h"
29
+ #include "StaticData.h"
30
+ #include "DecodeStep.h"
31
+ #include "TreeInput.h"
32
+ #include "moses/FF/StatefulFeatureFunction.h"
33
+ #include "moses/FF/WordPenaltyProducer.h"
34
+ #include "moses/OutputCollector.h"
35
+ #include "moses/ChartKBestExtractor.h"
36
+ #include "moses/HypergraphOutput.h"
37
+ #include "moses/TranslationTask.h"
38
+
39
+ using namespace std;
40
+
41
+ namespace Moses
42
+ {
43
+
44
+ /* constructor. Initialize everything prior to decoding a particular sentence.
45
+ * \param source the sentence to be decoded
46
+ * \param system which particular set of models to use.
47
+ */
48
+ ChartManager::ChartManager(ttasksptr const& ttask)
49
+ : BaseManager(ttask)
50
+ , m_hypoStackColl(m_source, *this)
51
+ , m_start(clock())
52
+ , m_hypothesisId(0)
53
+ , m_parser(ttask, m_hypoStackColl)
54
+ , m_translationOptionList(ttask->options()->syntax.rule_limit, m_source)
55
+ { }
56
+
57
+ ChartManager::~ChartManager()
58
+ {
59
+ clock_t end = clock();
60
+ float et = (end - m_start);
61
+ et /= (float)CLOCKS_PER_SEC;
62
+ VERBOSE(1, "Translation took " << et << " seconds" << endl);
63
+
64
+ }
65
+
66
+ //! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
67
+ void ChartManager::Decode()
68
+ {
69
+
70
+ VERBOSE(1,"Translating: " << m_source << endl);
71
+
72
+ ResetSentenceStats(m_source);
73
+
74
+ VERBOSE(2,"Decoding: " << endl);
75
+ //ChartHypothesis::ResetHypoCount();
76
+
77
+ AddXmlChartOptions();
78
+
79
+ // MAIN LOOP
80
+ size_t size = m_source.GetSize();
81
+ for (int startPos = size-1; startPos >= 0; --startPos) {
82
+ for (size_t width = 1; width <= size-startPos; ++width) {
83
+ size_t endPos = startPos + width - 1;
84
+ Range range(startPos, endPos);
85
+
86
+ // create trans opt
87
+ m_translationOptionList.Clear();
88
+ m_parser.Create(range, m_translationOptionList);
89
+ m_translationOptionList.ApplyThreshold(options()->search.trans_opt_threshold);
90
+
91
+ const InputPath &inputPath = m_parser.GetInputPath(range);
92
+ m_translationOptionList.EvaluateWithSourceContext(m_source, inputPath);
93
+
94
+ // decode
95
+ ChartCell &cell = m_hypoStackColl.Get(range);
96
+ cell.Decode(m_translationOptionList, m_hypoStackColl);
97
+
98
+ m_translationOptionList.Clear();
99
+ cell.PruneToSize();
100
+ cell.CleanupArcList();
101
+ cell.SortHypotheses();
102
+ }
103
+ }
104
+
105
+ IFVERBOSE(1) {
106
+
107
+ for (size_t startPos = 0; startPos < size; ++startPos) {
108
+ cerr.width(3);
109
+ cerr << startPos << " ";
110
+ }
111
+ cerr << endl;
112
+ for (size_t width = 1; width <= size; width++) {
113
+ for( size_t space = 0; space < width-1; space++ ) {
114
+ cerr << " ";
115
+ }
116
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
117
+ Range range(startPos, startPos+width-1);
118
+ cerr.width(3);
119
+ cerr << m_hypoStackColl.Get(range).GetSize() << " ";
120
+ }
121
+ cerr << endl;
122
+ }
123
+ }
124
+ }
125
+
126
+ /** add specific translation options and hypotheses according to the XML override translation scheme.
127
+ * Doesn't seem to do anything about walls and zones.
128
+ * @todo check walls & zones. Check that the implementation doesn't leak, xml options sometimes does if you're not careful
129
+ */
130
+ void ChartManager::AddXmlChartOptions()
131
+ {
132
+ const std::vector <ChartTranslationOptions*> xmlChartOptionsList
133
+ = m_source.GetXmlChartTranslationOptions();
134
+ IFVERBOSE(2) {
135
+ cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl;
136
+ }
137
+ if (xmlChartOptionsList.size() == 0) return;
138
+
139
+ typedef std::vector<ChartTranslationOptions*>::const_iterator citer;
140
+ for(citer i = xmlChartOptionsList.begin(); i != xmlChartOptionsList.end(); ++i) {
141
+ ChartTranslationOptions* opt = *i;
142
+
143
+ const Range &range = opt->GetSourceWordsRange();
144
+
145
+ RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
146
+ ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
147
+ hypo->EvaluateWhenApplied();
148
+
149
+
150
+ ChartCell &cell = m_hypoStackColl.Get(range);
151
+ cell.AddHypothesis(hypo);
152
+ }
153
+ }
154
+
155
+ //! get best complete translation from the top chart cell.
156
+ const ChartHypothesis *ChartManager::GetBestHypothesis() const
157
+ {
158
+ size_t size = m_source.GetSize();
159
+
160
+ if (size == 0) // empty source
161
+ return NULL;
162
+ else {
163
+ Range range(0, size-1);
164
+ const ChartCell &lastCell = m_hypoStackColl.Get(range);
165
+ return lastCell.GetBestHypothesis();
166
+ }
167
+ }
168
+
169
+ /** Calculate the n-best paths through the output hypergraph.
170
+ * Return the list of paths with the variable ret
171
+ * \param n how may paths to return
172
+ * \param ret return argument
173
+ * \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
174
+ */
175
+ void ChartManager::CalcNBest(
176
+ std::size_t n,
177
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
178
+ bool onlyDistinct) const
179
+ {
180
+ nBestList.clear();
181
+ if (n == 0 || m_source.GetSize() == 0) {
182
+ return;
183
+ }
184
+
185
+ // Get the list of top-level hypotheses, sorted by score.
186
+ Range range(0, m_source.GetSize()-1);
187
+ const ChartCell &lastCell = m_hypoStackColl.Get(range);
188
+ boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
189
+ lastCell.GetAllSortedHypotheses());
190
+ if (!topLevelHypos) {
191
+ return;
192
+ }
193
+
194
+ ChartKBestExtractor extractor;
195
+
196
+ if (!onlyDistinct) {
197
+ // Return the n-best list as is, including duplicate translations.
198
+ extractor.Extract(*topLevelHypos, n, nBestList);
199
+ return;
200
+ }
201
+
202
+ // Determine how many derivations to extract. If the n-best list is
203
+ // restricted to distinct translations then this limit should be bigger
204
+ // than n. The n-best factor determines how much bigger the limit should be,
205
+ // with 0 being 'unlimited.' This actually sets a large-ish limit in case
206
+ // too many translations are identical.
207
+ const std::size_t nBestFactor = options()->nbest.factor;
208
+ std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
209
+
210
+ // Extract the derivations.
211
+ ChartKBestExtractor::KBestVec bigList;
212
+ bigList.reserve(numDerivations);
213
+ extractor.Extract(*topLevelHypos, numDerivations, bigList);
214
+
215
+ // Copy derivations into nBestList, skipping ones with repeated translations.
216
+ std::set<Phrase> distinct;
217
+ for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
218
+ nBestList.size() < n && p != bigList.end(); ++p) {
219
+ boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
220
+ Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
221
+ if (distinct.insert(translation).second) {
222
+ nBestList.push_back(derivation);
223
+ }
224
+ }
225
+ }
226
+
227
+ void ChartManager::WriteSearchGraph(const ChartSearchGraphWriter& writer) const
228
+ {
229
+
230
+ size_t size = m_source.GetSize();
231
+
232
+ // which hypotheses are reachable?
233
+ std::map<unsigned,bool> reachable;
234
+ Range fullRange(0, size-1);
235
+ const ChartCell &lastCell = m_hypoStackColl.Get(fullRange);
236
+ const ChartHypothesis *hypo = lastCell.GetBestHypothesis();
237
+
238
+ if (hypo == NULL) {
239
+ // no hypothesis
240
+ return;
241
+ }
242
+ size_t winners = 0;
243
+ size_t losers = 0;
244
+
245
+ FindReachableHypotheses( hypo, reachable, &winners, &losers);
246
+ writer.WriteHeader(winners, losers);
247
+
248
+ for (size_t width = 1; width <= size; ++width) {
249
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
250
+ size_t endPos = startPos + width - 1;
251
+ Range range(startPos, endPos);
252
+ TRACE_ERR(" " << range << "=");
253
+
254
+ const ChartCell &cell = m_hypoStackColl.Get(range);
255
+ cell.WriteSearchGraph(writer, reachable);
256
+ }
257
+ }
258
+ }
259
+
260
+ void ChartManager::FindReachableHypotheses(
261
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable, size_t* winners, size_t* losers) const
262
+ {
263
+ // do not recurse, if already visited
264
+ if (reachable.find(hypo->GetId()) != reachable.end()) {
265
+ return;
266
+ }
267
+
268
+ // recurse
269
+ reachable[ hypo->GetId() ] = true;
270
+ if (hypo->GetWinningHypothesis() == hypo) {
271
+ (*winners)++;
272
+ } else {
273
+ (*losers)++;
274
+ }
275
+ const std::vector<const ChartHypothesis*> &previous = hypo->GetPrevHypos();
276
+ for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i) {
277
+ FindReachableHypotheses( *i, reachable, winners, losers );
278
+ }
279
+
280
+ // also loop over recombined hypotheses (arcs)
281
+ const ChartArcList *arcList = hypo->GetArcList();
282
+ if (arcList) {
283
+ ChartArcList::const_iterator iterArc;
284
+ for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
285
+ const ChartHypothesis &arc = **iterArc;
286
+ FindReachableHypotheses( &arc, reachable, winners, losers );
287
+ }
288
+ }
289
+ }
290
+
291
+ void
292
+ ChartManager::
293
+ OutputSearchGraphAsHypergraph(std::ostream& out) const
294
+ {
295
+ ChartSearchGraphWriterHypergraph writer(options(), &out);
296
+ WriteSearchGraph(writer);
297
+ }
298
+
299
+ void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
300
+ {
301
+ ChartSearchGraphWriterMoses writer(options(), &outputSearchGraphStream,
302
+ m_source.GetTranslationId());
303
+ WriteSearchGraph(writer);
304
+ }
305
+
306
+ void ChartManager::OutputBest(OutputCollector *collector) const
307
+ {
308
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
309
+ if (collector && bestHypo) {
310
+ const size_t translationId = m_source.GetTranslationId();
311
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
312
+ OutputBestHypo(collector, bestHypo, translationId);
313
+ }
314
+ }
315
+
316
+ void ChartManager::OutputNBest(OutputCollector *collector) const
317
+ {
318
+ size_t nBestSize = options()->nbest.nbest_size;
319
+ if (nBestSize > 0) {
320
+ const size_t translationId = m_source.GetTranslationId();
321
+
322
+ VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
323
+ << options()->nbest.output_file_path << endl);
324
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
325
+ CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
326
+ OutputNBestList(collector, nBestList, translationId);
327
+ IFVERBOSE(2) {
328
+ PrintUserTime("N-Best Hypotheses Generation Time:");
329
+ }
330
+ }
331
+
332
+ }
333
+
334
+ void ChartManager::OutputNBestList(OutputCollector *collector,
335
+ const ChartKBestExtractor::KBestVec &nBestList,
336
+ long translationId) const
337
+ {
338
+ std::ostringstream out;
339
+
340
+ if (collector->OutputIsCout()) {
341
+ // Set precision only if we're writing the n-best list to cout. This is to
342
+ // preserve existing behaviour, but should probably be done either way.
343
+ FixPrecision(out);
344
+ }
345
+
346
+ NBestOptions const& nbo = options()->nbest;
347
+ bool includeWordAlignment = nbo.include_alignment_info;
348
+ bool PrintNBestTrees = nbo.print_trees;
349
+
350
+ for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
351
+ p != nBestList.end(); ++p) {
352
+ const ChartKBestExtractor::Derivation &derivation = **p;
353
+
354
+ // get the derivation's target-side yield
355
+ Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
356
+
357
+ // delete <s> and </s>
358
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
359
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
360
+ outputPhrase.RemoveWord(0);
361
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
362
+
363
+ // print the translation ID, surface factors, and scores
364
+ out << translationId << " ||| ";
365
+ OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
366
+ out << " ||| ";
367
+ boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
368
+ bool with_labels = options()->nbest.include_feature_labels;
369
+ scoreBreakdown->OutputAllFeatureScores(out, with_labels);
370
+ out << " ||| " << derivation.score;
371
+
372
+ // optionally, print word alignments
373
+ if (includeWordAlignment) {
374
+ out << " ||| ";
375
+ Alignments align;
376
+ OutputAlignmentNBest(align, derivation, 0);
377
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
378
+ ++q) {
379
+ out << q->first << "-" << q->second << " ";
380
+ }
381
+ }
382
+
383
+ // optionally, print tree
384
+ if (PrintNBestTrees) {
385
+ TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
386
+ out << " ||| " << tree->GetString();
387
+ }
388
+
389
+ out << std::endl;
390
+ }
391
+
392
+ assert(collector);
393
+ collector->Write(translationId, out.str());
394
+ }
395
+
396
+ size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
397
+ {
398
+ size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
399
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
400
+ for (size_t i = 0; i < prevHypos.size(); ++i) {
401
+ size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
402
+ ret -= (childSize - 1);
403
+ }
404
+ return ret;
405
+ }
406
+
407
+ size_t ChartManager::OutputAlignmentNBest(
408
+ Alignments &retAlign,
409
+ const Moses::ChartKBestExtractor::Derivation &derivation,
410
+ size_t startTarget) const
411
+ {
412
+ const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
413
+
414
+ size_t totalTargetSize = 0;
415
+ size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
416
+
417
+ const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
418
+
419
+ size_t thisSourceSize = CalcSourceSize(&hypo);
420
+
421
+ // position of each terminal word in translation rule, irrespective of alignment
422
+ // if non-term, number is undefined
423
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
424
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
425
+
426
+ const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
427
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
428
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
429
+
430
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
431
+ "Error");
432
+
433
+ size_t targetInd = 0;
434
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
435
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
436
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
437
+ size_t sourceInd = targetPos2SourceInd[targetPos];
438
+ size_t sourcePos = sourceInd2pos[sourceInd];
439
+
440
+ const Moses::ChartKBestExtractor::Derivation &subderivation =
441
+ *derivation.subderivations[sourceInd];
442
+
443
+ // calc source size
444
+ size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
445
+ sourceOffsets[sourcePos] = sourceSize;
446
+
447
+ // calc target size.
448
+ // Recursively look thru child hypos
449
+ size_t currStartTarget = startTarget + totalTargetSize;
450
+ size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
451
+ currStartTarget);
452
+ targetOffsets[targetPos] = targetSize;
453
+
454
+ totalTargetSize += targetSize;
455
+ ++targetInd;
456
+ } else {
457
+ ++totalTargetSize;
458
+ }
459
+ }
460
+
461
+ // convert position within translation rule to absolute position within
462
+ // source sentence / output sentence
463
+ ShiftOffsets(sourceOffsets, startSource);
464
+ ShiftOffsets(targetOffsets, startTarget);
465
+
466
+ // get alignments from this hypo
467
+ const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
468
+
469
+ // add to output arg, offsetting by source & target
470
+ AlignmentInfo::const_iterator iter;
471
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
472
+ const std::pair<size_t,size_t> &align = *iter;
473
+ size_t relSource = align.first;
474
+ size_t relTarget = align.second;
475
+ size_t absSource = sourceOffsets[relSource];
476
+ size_t absTarget = targetOffsets[relTarget];
477
+
478
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
479
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
480
+ UTIL_THROW_IF2(!ret.second, "Error");
481
+ }
482
+
483
+ return totalTargetSize;
484
+ }
485
+
486
+ void ChartManager::OutputAlignment(OutputCollector *collector) const
487
+ {
488
+ if (collector == NULL) {
489
+ return;
490
+ }
491
+
492
+ ostringstream out;
493
+
494
+ const ChartHypothesis *hypo = GetBestHypothesis();
495
+ if (hypo) {
496
+ Alignments retAlign;
497
+ OutputAlignment(retAlign, hypo, 0);
498
+
499
+ // output alignments
500
+ Alignments::const_iterator iter;
501
+ for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
502
+ const pair<size_t, size_t> &alignPoint = *iter;
503
+ out << alignPoint.first << "-" << alignPoint.second << " ";
504
+ }
505
+ }
506
+ out << endl;
507
+
508
+ collector->Write(m_source.GetTranslationId(), out.str());
509
+
510
+ }
511
+
512
+ size_t ChartManager::OutputAlignment(Alignments &retAlign,
513
+ const Moses::ChartHypothesis *hypo,
514
+ size_t startTarget) const
515
+ {
516
+ size_t totalTargetSize = 0;
517
+ size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
518
+
519
+ const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
520
+
521
+ size_t thisSourceSize = CalcSourceSize(hypo);
522
+
523
+ // position of each terminal word in translation rule, irrespective of alignment
524
+ // if non-term, number is undefined
525
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
526
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
527
+
528
+ const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
529
+
530
+ const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
531
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
532
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
533
+
534
+ UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
535
+
536
+ size_t targetInd = 0;
537
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
538
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
539
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
540
+ size_t sourceInd = targetPos2SourceInd[targetPos];
541
+ size_t sourcePos = sourceInd2pos[sourceInd];
542
+
543
+ const ChartHypothesis *prevHypo = prevHypos[sourceInd];
544
+
545
+ // calc source size
546
+ size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
547
+ sourceOffsets[sourcePos] = sourceSize;
548
+
549
+ // calc target size.
550
+ // Recursively look thru child hypos
551
+ size_t currStartTarget = startTarget + totalTargetSize;
552
+ size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
553
+ targetOffsets[targetPos] = targetSize;
554
+
555
+ totalTargetSize += targetSize;
556
+ ++targetInd;
557
+ } else {
558
+ ++totalTargetSize;
559
+ }
560
+ }
561
+
562
+ // convert position within translation rule to absolute position within
563
+ // source sentence / output sentence
564
+ ShiftOffsets(sourceOffsets, startSource);
565
+ ShiftOffsets(targetOffsets, startTarget);
566
+
567
+ // get alignments from this hypo
568
+ const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
569
+
570
+ // add to output arg, offsetting by source & target
571
+ AlignmentInfo::const_iterator iter;
572
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
573
+ const std::pair<size_t,size_t> &align = *iter;
574
+ size_t relSource = align.first;
575
+ size_t relTarget = align.second;
576
+ size_t absSource = sourceOffsets[relSource];
577
+ size_t absTarget = targetOffsets[relTarget];
578
+
579
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
580
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
581
+ UTIL_THROW_IF2(!ret.second, "Error");
582
+
583
+ }
584
+
585
+ return totalTargetSize;
586
+ }
587
+
588
+ void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
589
+ {
590
+ if (collector) {
591
+ OutputDetailedTranslationReport(collector,
592
+ GetBestHypothesis(),
593
+ static_cast<const Sentence&>(m_source),
594
+ m_source.GetTranslationId());
595
+ }
596
+ }
597
+
598
+ void ChartManager::OutputDetailedTranslationReport(
599
+ OutputCollector *collector,
600
+ const ChartHypothesis *hypo,
601
+ const Sentence &sentence,
602
+ long translationId) const
603
+ {
604
+ if (hypo == NULL) {
605
+ return;
606
+ }
607
+ std::ostringstream out;
608
+ ApplicationContext applicationContext;
609
+
610
+ OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
611
+ collector->Write(translationId, out.str());
612
+
613
+ //DIMw
614
+ if (options()->output.detailed_all_transrep_filepath.size()) {
615
+ const Sentence &sentence = static_cast<const Sentence &>(m_source);
616
+ size_t nBestSize = options()->nbest.nbest_size;
617
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
618
+ CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
619
+ OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
620
+ }
621
+
622
+ }
623
+
624
+ void ChartManager::OutputTranslationOptions(std::ostream &out,
625
+ ApplicationContext &applicationContext,
626
+ const ChartHypothesis *hypo,
627
+ const Sentence &sentence,
628
+ long translationId) const
629
+ {
630
+ if (hypo != NULL) {
631
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
632
+ out << std::endl;
633
+ }
634
+
635
+ // recursive
636
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
637
+ std::vector<const ChartHypothesis*>::const_iterator iter;
638
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
639
+ const ChartHypothesis *prevHypo = *iter;
640
+ OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
641
+ }
642
+ }
643
+
644
+ void ChartManager::OutputTranslationOption(std::ostream &out,
645
+ ApplicationContext &applicationContext,
646
+ const ChartHypothesis *hypo,
647
+ const Sentence &sentence,
648
+ long translationId) const
649
+ {
650
+ ReconstructApplicationContext(*hypo, sentence, applicationContext);
651
+ out << "Trans Opt " << translationId
652
+ << " " << hypo->GetCurrSourceRange()
653
+ << ": ";
654
+ WriteApplicationContext(out, applicationContext);
655
+ out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
656
+ << "->" << hypo->GetCurrTargetPhrase()
657
+ << " " << hypo->GetFutureScore() << hypo->GetScoreBreakdown();
658
+ }
659
+
660
+ // Given a hypothesis and sentence, reconstructs the 'application context' --
661
+ // the source RHS symbols of the SCFG rule that was applied, plus their spans.
662
+ void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
663
+ const Sentence &sentence,
664
+ ApplicationContext &context) const
665
+ {
666
+ context.clear();
667
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
668
+ std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
669
+ std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
670
+ const Range &span = hypo.GetCurrSourceRange();
671
+ size_t i = span.GetStartPos();
672
+ while (i <= span.GetEndPos()) {
673
+ if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
674
+ // Symbol is a terminal.
675
+ const Word &symbol = sentence.GetWord(i);
676
+ context.push_back(std::make_pair(symbol, Range(i, i)));
677
+ ++i;
678
+ } else {
679
+ // Symbol is a non-terminal.
680
+ const Word &symbol = (*p)->GetTargetLHS();
681
+ const Range &range = (*p)->GetCurrSourceRange();
682
+ context.push_back(std::make_pair(symbol, range));
683
+ i = range.GetEndPos()+1;
684
+ ++p;
685
+ }
686
+ }
687
+ }
688
+
689
+ void ChartManager::OutputUnknowns(OutputCollector *collector) const
690
+ {
691
+ if (collector) {
692
+ long translationId = m_source.GetTranslationId();
693
+ const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
694
+
695
+ std::ostringstream out;
696
+ for (std::vector<Phrase*>::const_iterator p = oovs.begin();
697
+ p != oovs.end(); ++p) {
698
+ out << **p;
699
+ }
700
+ out << std::endl;
701
+ collector->Write(translationId, out.str());
702
+ }
703
+
704
+ }
705
+
706
+ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
707
+ {
708
+ const ChartHypothesis *hypo = GetBestHypothesis();
709
+ if (collector == NULL || hypo == NULL) {
710
+ return;
711
+ }
712
+
713
+ std::ostringstream out;
714
+ ApplicationContext applicationContext;
715
+
716
+ const Sentence &sentence = static_cast<const Sentence &>(m_source);
717
+ const size_t translationId = m_source.GetTranslationId();
718
+
719
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
720
+
721
+ //Tree of full sentence
722
+ const StatefulFeatureFunction* treeStructure;
723
+ treeStructure = StaticData::Instance().GetTreeStructure();
724
+ if (treeStructure != NULL) {
725
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
726
+ for( size_t i=0; i<sff.size(); i++ ) {
727
+ if (sff[i] == treeStructure) {
728
+ const TreeState* tree = static_cast<const TreeState*>(hypo->GetFFState(i));
729
+ out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
730
+ break;
731
+ }
732
+ }
733
+ }
734
+
735
+ collector->Write(translationId, out.str());
736
+
737
+ }
738
+
739
+ void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
740
+ ApplicationContext &applicationContext,
741
+ const ChartHypothesis *hypo,
742
+ const Sentence &sentence,
743
+ long translationId) const
744
+ {
745
+
746
+ if (hypo != NULL) {
747
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
748
+
749
+ const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
750
+
751
+ out << " ||| ";
752
+ if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
753
+ out << " " << *property->GetValueString();
754
+ } else {
755
+ out << " " << "noTreeInfo";
756
+ }
757
+ out << std::endl;
758
+ }
759
+
760
+ // recursive
761
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
762
+ std::vector<const ChartHypothesis*>::const_iterator iter;
763
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
764
+ const ChartHypothesis *prevHypo = *iter;
765
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
766
+ }
767
+ }
768
+
769
+ void ChartManager::OutputSearchGraph(OutputCollector *collector) const
770
+ {
771
+ if (collector) {
772
+ long translationId = m_source.GetTranslationId();
773
+ std::ostringstream out;
774
+ OutputSearchGraphMoses( out);
775
+ collector->Write(translationId, out.str());
776
+ }
777
+ }
778
+
779
+ //DIMw
780
+ void ChartManager::OutputDetailedAllTranslationReport(
781
+ OutputCollector *collector,
782
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
783
+ const Sentence &sentence,
784
+ long translationId) const
785
+ {
786
+ std::ostringstream out;
787
+ ApplicationContext applicationContext;
788
+
789
+ const ChartCellCollection& cells = GetChartCellCollection();
790
+ size_t size = GetSource().GetSize();
791
+ for (size_t width = 1; width <= size; ++width) {
792
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
793
+ size_t endPos = startPos + width - 1;
794
+ Range range(startPos, endPos);
795
+ const ChartCell& cell = cells.Get(range);
796
+ const HypoList* hyps = cell.GetAllSortedHypotheses();
797
+ out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
798
+ HypoList::const_iterator iter;
799
+ size_t c = 1;
800
+ for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
801
+ out << "----------------Item " << c++ << " ---------------------"
802
+ << endl;
803
+ OutputTranslationOptions(out, applicationContext, *iter,
804
+ sentence, translationId);
805
+ }
806
+ }
807
+ }
808
+ collector->Write(translationId, out.str());
809
+ }
810
+
811
+ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
812
+ {
813
+ if (!collector)
814
+ return;
815
+ std::ostringstream out;
816
+ FixPrecision(out);
817
+ if (hypo != NULL) {
818
+ VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
819
+ VERBOSE(3,"Best path: ");
820
+ Backtrack(hypo);
821
+ VERBOSE(3,"0" << std::endl);
822
+
823
+ if (options()->output.ReportHypoScore) {
824
+ out << hypo->GetFutureScore() << " ";
825
+ }
826
+
827
+ if (options()->output.RecoverPath) {
828
+ out << "||| ";
829
+ }
830
+ Phrase outPhrase(ARRAY_SIZE_INCR);
831
+ hypo->GetOutputPhrase(outPhrase);
832
+
833
+ // delete 1st & last
834
+ UTIL_THROW_IF2(outPhrase.GetSize() < 2,
835
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
836
+
837
+ outPhrase.RemoveWord(0);
838
+ outPhrase.RemoveWord(outPhrase.GetSize() - 1);
839
+
840
+ string output = outPhrase.GetStringRep(options()->output.factor_order);
841
+ out << output << endl;
842
+ } else {
843
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
844
+
845
+ if (options()->output.ReportHypoScore) {
846
+ out << "0 ";
847
+ }
848
+
849
+ out << endl;
850
+ }
851
+ collector->Write(translationId, out.str());
852
+ }
853
+
854
+ void ChartManager::Backtrack(const ChartHypothesis *hypo) const
855
+ {
856
+ const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
857
+
858
+ vector<const ChartHypothesis*>::const_iterator iter;
859
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
860
+ const ChartHypothesis *prevHypo = *iter;
861
+
862
+ VERBOSE(3,prevHypo->GetId() << " <= ");
863
+ Backtrack(prevHypo);
864
+ }
865
+ }
866
+
867
+ } // namespace Moses
mosesdecoder/moses/ChartManager.h ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #pragma once
23
+
24
+ #include <vector>
25
+ #include <boost/unordered_map.hpp>
26
+ #include "ChartCell.h"
27
+ #include "ChartCellCollection.h"
28
+ #include "Range.h"
29
+ #include "SentenceStats.h"
30
+ #include "ChartTranslationOptionList.h"
31
+ #include "ChartParser.h"
32
+ #include "ChartKBestExtractor.h"
33
+ #include "BaseManager.h"
34
+ #include "moses/Syntax/KBestExtractor.h"
35
+
36
+ namespace Moses
37
+ {
38
+
39
+ class ChartHypothesis;
40
+ class ChartSearchGraphWriter;
41
+
42
+ /** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
43
+ */
44
+ class ChartManager : public BaseManager
45
+ {
46
+ private:
47
+ ChartCellCollection m_hypoStackColl;
48
+ std::auto_ptr<SentenceStats> m_sentenceStats;
49
+ clock_t m_start; /**< starting time, used for logging */
50
+ unsigned m_hypothesisId; /* For handing out hypothesis ids to ChartHypothesis */
51
+
52
+ ChartParser m_parser;
53
+
54
+ ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
55
+
56
+ /* auxilliary functions for SearchGraphs */
57
+ void FindReachableHypotheses(
58
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
59
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
60
+
61
+ // output
62
+ void OutputNBestList(OutputCollector *collector,
63
+ const ChartKBestExtractor::KBestVec &nBestList,
64
+ long translationId) const;
65
+ size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
66
+ size_t OutputAlignmentNBest(Alignments &retAlign,
67
+ const Moses::ChartKBestExtractor::Derivation &derivation,
68
+ size_t startTarget) const;
69
+ size_t OutputAlignment(Alignments &retAlign,
70
+ const Moses::ChartHypothesis *hypo,
71
+ size_t startTarget) const;
72
+ void OutputDetailedTranslationReport(
73
+ OutputCollector *collector,
74
+ const ChartHypothesis *hypo,
75
+ const Sentence &sentence,
76
+ long translationId) const;
77
+ void OutputTranslationOptions(std::ostream &out,
78
+ ApplicationContext &applicationContext,
79
+ const ChartHypothesis *hypo,
80
+ const Sentence &sentence,
81
+ long translationId) const;
82
+ void OutputTranslationOption(std::ostream &out,
83
+ ApplicationContext &applicationContext,
84
+ const ChartHypothesis *hypo,
85
+ const Sentence &sentence,
86
+ long translationId) const;
87
+ void ReconstructApplicationContext(const ChartHypothesis &hypo,
88
+ const Sentence &sentence,
89
+ ApplicationContext &context) const;
90
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out,
91
+ ApplicationContext &applicationContext,
92
+ const ChartHypothesis *hypo,
93
+ const Sentence &sentence,
94
+ long translationId) const;
95
+ void OutputDetailedAllTranslationReport(
96
+ OutputCollector *collector,
97
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
98
+ const Sentence &sentence,
99
+ long translationId) const;
100
+ void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const;
101
+ void Backtrack(const ChartHypothesis *hypo) const;
102
+
103
+ public:
104
+ ChartManager(ttasksptr const& ttask);
105
+ ~ChartManager();
106
+ void Decode();
107
+ void AddXmlChartOptions();
108
+ const ChartHypothesis *GetBestHypothesis() const;
109
+ void CalcNBest(size_t n, std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList, bool onlyDistinct=false) const;
110
+
111
+ /** "Moses" (osg) type format */
112
+ void OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const;
113
+
114
+ /** Output in (modified) Kenneth hypergraph format */
115
+ void OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const;
116
+
117
+ //! debug data collected when decoding sentence
118
+ SentenceStats& GetSentenceStats() const {
119
+ return *m_sentenceStats;
120
+ }
121
+
122
+ //DIMw
123
+ const ChartCellCollection& GetChartCellCollection() const {
124
+ return m_hypoStackColl;
125
+ }
126
+
127
+ void CalcDecoderStatistics() const {
128
+ }
129
+
130
+ void ResetSentenceStats(const InputType& source) {
131
+ m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
132
+ }
133
+
134
+ //! contigious hypo id for each input sentence. For debugging purposes
135
+ unsigned GetNextHypoId() {
136
+ return m_hypothesisId++;
137
+ }
138
+
139
+ const ChartParser &GetParser() const {
140
+ return m_parser;
141
+ }
142
+
143
+ // outputs
144
+ void OutputBest(OutputCollector *collector) const;
145
+ void OutputNBest(OutputCollector *collector) const;
146
+ void OutputLatticeSamples(OutputCollector *collector) const {
147
+ }
148
+ void OutputAlignment(OutputCollector *collector) const;
149
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
150
+ void OutputUnknowns(OutputCollector *collector) const;
151
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
152
+ void OutputWordGraph(OutputCollector *collector) const {
153
+ }
154
+ void OutputSearchGraph(OutputCollector *collector) const;
155
+ void OutputSearchGraphSLF() const {
156
+ }
157
+ // void OutputSearchGraphHypergraph() const;
158
+
159
+ };
160
+
161
+ }
162
+
mosesdecoder/moses/ChartParser.cpp ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "ChartParser.h"
23
+ #include "ChartParserCallback.h"
24
+ #include "ChartRuleLookupManager.h"
25
+ #include "StaticData.h"
26
+ #include "TreeInput.h"
27
+ #include "Sentence.h"
28
+ #include "DecodeGraph.h"
29
+ #include "moses/FF/UnknownWordPenaltyProducer.h"
30
+ #include "moses/TranslationModel/PhraseDictionary.h"
31
+ #include "moses/TranslationTask.h"
32
+
33
+ using namespace std;
34
+ using namespace Moses;
35
+
36
+ namespace Moses
37
+ {
38
+
39
+ ChartParserUnknown
40
+ ::ChartParserUnknown(ttasksptr const& ttask)
41
+ : m_ttask(ttask)
42
+ { }
43
+
44
+ ChartParserUnknown::~ChartParserUnknown()
45
+ {
46
+ RemoveAllInColl(m_unksrcs);
47
+ }
48
+
49
+ AllOptions::ptr const&
50
+ ChartParserUnknown::
51
+ options() const
52
+ {
53
+ return m_ttask.lock()->options();
54
+ }
55
+
56
+ void
57
+ ChartParserUnknown::
58
+ Process(const Word &sourceWord, const Range &range, ChartParserCallback &to)
59
+ {
60
+ // unknown word, add as trans opt
61
+ const StaticData &staticData = StaticData::Instance();
62
+ const UnknownWordPenaltyProducer &unknownWordPenaltyProducer
63
+ = UnknownWordPenaltyProducer::Instance();
64
+
65
+ size_t isDigit = 0;
66
+ if (options()->unk.drop) {
67
+ const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
68
+ const StringPiece s = f->GetString();
69
+ isDigit = s.find_first_of("0123456789");
70
+ if (isDigit == string::npos)
71
+ isDigit = 0;
72
+ else
73
+ isDigit = 1;
74
+ // modify the starting bitmap
75
+ }
76
+
77
+ Phrase* unksrc = new Phrase(1);
78
+ unksrc->AddWord() = sourceWord;
79
+ Word &newWord = unksrc->GetWord(0);
80
+ newWord.SetIsOOV(true);
81
+
82
+ m_unksrcs.push_back(unksrc);
83
+
84
+ // hack. Once the OOV FF is a phrase table, get rid of this
85
+ PhraseDictionary *firstPt = NULL;
86
+ if (PhraseDictionary::GetColl().size() == 0) {
87
+ firstPt = PhraseDictionary::GetColl()[0];
88
+ }
89
+
90
+ //TranslationOption *transOpt;
91
+ if (! options()->unk.drop || isDigit) {
92
+ // loop
93
+ const UnknownLHSList &lhsList = options()->syntax.unknown_lhs; // staticData.GetUnknownLHS();
94
+ UnknownLHSList::const_iterator iterLHS;
95
+ for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
96
+ const string &targetLHSStr = iterLHS->first;
97
+ float prob = iterLHS->second;
98
+
99
+ // lhs
100
+ //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
101
+ Word *targetLHS = new Word(true);
102
+
103
+ targetLHS->CreateFromString(Output, options()->output.factor_order,
104
+ targetLHSStr, true);
105
+ UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
106
+
107
+ // add to dictionary
108
+ TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
109
+ Word &targetWord = targetPhrase->AddWord();
110
+ targetWord.CreateUnknownWord(sourceWord);
111
+
112
+ // scores
113
+ float unknownScore = FloorScore(TransformScore(prob));
114
+
115
+ targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
116
+ targetPhrase->SetTargetLHS(targetLHS);
117
+ targetPhrase->SetAlignmentInfo("0-0");
118
+ targetPhrase->EvaluateInIsolation(*unksrc);
119
+
120
+ if (!options()->output.detailed_tree_transrep_filepath.empty() ||
121
+ options()->nbest.print_trees || staticData.GetTreeStructure() != NULL) {
122
+ std::string prop = "[ ";
123
+ prop += (*targetLHS)[0]->GetString().as_string() + " ";
124
+ prop += sourceWord[0]->GetString().as_string() + " ]";
125
+ targetPhrase->SetProperty("Tree", prop);
126
+ }
127
+
128
+ // chart rule
129
+ to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
130
+ } // for (iterLHS
131
+ } else {
132
+ // drop source word. create blank trans opt
133
+ float unknownScore = FloorScore(-numeric_limits<float>::infinity());
134
+
135
+ TargetPhrase *targetPhrase = new TargetPhrase(firstPt);
136
+ // loop
137
+ const UnknownLHSList &lhsList = options()->syntax.unknown_lhs;//staticData.GetUnknownLHS();
138
+ UnknownLHSList::const_iterator iterLHS;
139
+ for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
140
+ const string &targetLHSStr = iterLHS->first;
141
+ //float prob = iterLHS->second;
142
+
143
+ Word *targetLHS = new Word(true);
144
+ targetLHS->CreateFromString(Output, staticData.options()->output.factor_order,
145
+ targetLHSStr, true);
146
+ UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
147
+
148
+ targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore);
149
+ targetPhrase->EvaluateInIsolation(*unksrc);
150
+
151
+ targetPhrase->SetTargetLHS(targetLHS);
152
+
153
+ // chart rule
154
+ to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range);
155
+ }
156
+ }
157
+ }
158
+
159
+ ChartParser
160
+ ::ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells)
161
+ : m_ttask(ttask)
162
+ , m_unknown(ttask)
163
+ , m_decodeGraphList(StaticData::Instance().GetDecodeGraphs())
164
+ , m_source(*(ttask->GetSource().get()))
165
+ {
166
+ const StaticData &staticData = StaticData::Instance();
167
+
168
+ staticData.InitializeForInput(ttask);
169
+ CreateInputPaths(m_source);
170
+
171
+ const std::vector<PhraseDictionary*> &dictionaries = PhraseDictionary::GetColl();
172
+ assert(dictionaries.size() == m_decodeGraphList.size());
173
+ m_ruleLookupManagers.reserve(dictionaries.size());
174
+ for (std::size_t i = 0; i < dictionaries.size(); ++i) {
175
+ const PhraseDictionary *dict = dictionaries[i];
176
+ PhraseDictionary *nonConstDict = const_cast<PhraseDictionary*>(dict);
177
+ std::size_t maxChartSpan = m_decodeGraphList[i]->GetMaxChartSpan();
178
+ ChartRuleLookupManager *lookupMgr = nonConstDict->CreateRuleLookupManager(*this, cells, maxChartSpan);
179
+ m_ruleLookupManagers.push_back(lookupMgr);
180
+ }
181
+
182
+ }
183
+
184
+ ChartParser::~ChartParser()
185
+ {
186
+ RemoveAllInColl(m_ruleLookupManagers);
187
+ StaticData::Instance().CleanUpAfterSentenceProcessing(m_ttask.lock());
188
+
189
+ InputPathMatrix::const_iterator iterOuter;
190
+ for (iterOuter = m_inputPathMatrix.begin(); iterOuter != m_inputPathMatrix.end(); ++iterOuter) {
191
+ const std::vector<InputPath*> &outer = *iterOuter;
192
+
193
+ std::vector<InputPath*>::const_iterator iterInner;
194
+ for (iterInner = outer.begin(); iterInner != outer.end(); ++iterInner) {
195
+ InputPath *path = *iterInner;
196
+ delete path;
197
+ }
198
+ }
199
+ }
200
+
201
+ void ChartParser::Create(const Range &range, ChartParserCallback &to)
202
+ {
203
+ assert(m_decodeGraphList.size() == m_ruleLookupManagers.size());
204
+
205
+ std::vector <DecodeGraph*>::const_iterator iterDecodeGraph;
206
+ std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin();
207
+ for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) {
208
+ const DecodeGraph &decodeGraph = **iterDecodeGraph;
209
+ assert(decodeGraph.GetSize() == 1);
210
+ ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers;
211
+ size_t maxSpan = decodeGraph.GetMaxChartSpan();
212
+ size_t last = m_source.GetSize()-1;
213
+ if (maxSpan != 0) {
214
+ last = min(last, range.GetStartPos()+maxSpan);
215
+ }
216
+ if (maxSpan == 0 || range.GetNumWordsCovered() <= maxSpan) {
217
+ const InputPath &inputPath = GetInputPath(range);
218
+ ruleLookupManager.GetChartRuleCollection(inputPath, last, to);
219
+ }
220
+ }
221
+
222
+ if (range.GetNumWordsCovered() == 1
223
+ && range.GetStartPos() != 0
224
+ && range.GetStartPos() != m_source.GetSize()-1) {
225
+ bool always = options()->unk.always_create_direct_transopt;
226
+ if (to.Empty() || always) {
227
+ // create unknown words for 1 word coverage where we don't have any trans options
228
+ const Word &sourceWord = m_source.GetWord(range.GetStartPos());
229
+ m_unknown.Process(sourceWord, range, to);
230
+ }
231
+ }
232
+ }
233
+
234
+ void ChartParser::CreateInputPaths(const InputType &input)
235
+ {
236
+ size_t size = input.GetSize();
237
+ m_inputPathMatrix.resize(size);
238
+
239
+ UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
240
+ "Input must be a sentence or a tree, " <<
241
+ "not lattice or confusion networks");
242
+
243
+ TranslationTask const* ttask = m_ttask.lock().get();
244
+ for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
245
+ for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
246
+ size_t endPos = startPos + phaseSize -1;
247
+ vector<InputPath*> &vec = m_inputPathMatrix[startPos];
248
+
249
+ Range range(startPos, endPos);
250
+ Phrase subphrase(input.GetSubString(Range(startPos, endPos)));
251
+ const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
252
+
253
+ InputPath *node;
254
+ if (range.GetNumWordsCovered() == 1) {
255
+ node = new InputPath(ttask, subphrase, labels, range, NULL, NULL);
256
+ vec.push_back(node);
257
+ } else {
258
+ const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
259
+ node = new InputPath(ttask, subphrase, labels, range, &prevNode, NULL);
260
+ vec.push_back(node);
261
+ }
262
+
263
+ //m_inputPathQueue.push_back(node);
264
+ }
265
+ }
266
+ }
267
+
268
+ const InputPath &ChartParser::GetInputPath(const Range &range) const
269
+ {
270
+ return GetInputPath(range.GetStartPos(), range.GetEndPos());
271
+ }
272
+
273
+ const InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) const
274
+ {
275
+ size_t offset = endPos - startPos;
276
+ UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
277
+ "Out of bound: " << offset);
278
+ return *m_inputPathMatrix[startPos][offset];
279
+ }
280
+
281
+ InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos)
282
+ {
283
+ size_t offset = endPos - startPos;
284
+ UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(),
285
+ "Out of bound: " << offset);
286
+ return *m_inputPathMatrix[startPos][offset];
287
+ }
288
+ /*
289
+ const Sentence &ChartParser::GetSentence() const {
290
+ const Sentence &sentence = static_cast<const Sentence&>(m_source);
291
+ return sentence;
292
+ }
293
+ */
294
+ size_t ChartParser::GetSize() const
295
+ {
296
+ return m_source.GetSize();
297
+ }
298
+
299
+ long ChartParser::GetTranslationId() const
300
+ {
301
+ return m_source.GetTranslationId();
302
+ }
303
+
304
+
305
+ AllOptions::ptr const&
306
+ ChartParser::
307
+ options() const
308
+ {
309
+ return m_ttask.lock()->options();
310
+ }
311
+
312
+
313
+ } // namespace Moses
mosesdecoder/moses/ChartParser.h ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ // $Id$
3
+ // vim:tabstop=2
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (C) 2010 Hieu Hoang
7
+
8
+ This library is free software; you can redistribute it and/or
9
+ modify it under the terms of the GNU Lesser General Public
10
+ License as published by the Free Software Foundation; either
11
+ version 2.1 of the License, or (at your option) any later version.
12
+
13
+ This library is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public
19
+ License along with this library; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ ***********************************************************************/
22
+
23
+ #pragma once
24
+
25
+ #include <list>
26
+ #include <vector>
27
+ #include "Range.h"
28
+ #include "StackVec.h"
29
+ #include "InputPath.h"
30
+ #include "TargetPhraseCollection.h"
31
+ namespace Moses
32
+ {
33
+
34
+ class ChartParserCallback;
35
+ class ChartRuleLookupManager;
36
+ class InputType;
37
+ class Sentence;
38
+ class ChartCellCollectionBase;
39
+ class Word;
40
+ class Phrase;
41
+ // class TargetPhraseCollection;
42
+ class DecodeGraph;
43
+
44
+ class ChartParserUnknown
45
+ {
46
+ ttaskwptr m_ttask;
47
+ public:
48
+ ChartParserUnknown(ttasksptr const& ttask);
49
+ ~ChartParserUnknown();
50
+
51
+ void Process(const Word &sourceWord, const Range &range, ChartParserCallback &to);
52
+
53
+ const std::vector<Phrase*> &GetUnknownSources() const {
54
+ return m_unksrcs;
55
+ }
56
+
57
+ private:
58
+ std::vector<Phrase*> m_unksrcs;
59
+ std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
60
+ AllOptions::ptr const& options() const;
61
+ };
62
+
63
+ class ChartParser
64
+ {
65
+ ttaskwptr m_ttask;
66
+ public:
67
+ ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells);
68
+ ~ChartParser();
69
+
70
+ void Create(const Range &range, ChartParserCallback &to);
71
+
72
+ //! the sentence being decoded
73
+ //const Sentence &GetSentence() const;
74
+ long GetTranslationId() const;
75
+ size_t GetSize() const;
76
+ const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
77
+ const InputPath &GetInputPath(const Range &range) const;
78
+ const std::vector<Phrase*> &GetUnknownSources() const {
79
+ return m_unknown.GetUnknownSources();
80
+ }
81
+
82
+ AllOptions::ptr const& options() const;
83
+
84
+ private:
85
+ ChartParserUnknown m_unknown;
86
+ std::vector <DecodeGraph*> m_decodeGraphList;
87
+ std::vector<ChartRuleLookupManager*> m_ruleLookupManagers;
88
+ InputType const& m_source; /**< source sentence to be translated */
89
+
90
+ typedef std::vector< std::vector<InputPath*> > InputPathMatrix;
91
+ InputPathMatrix m_inputPathMatrix;
92
+
93
+ void CreateInputPaths(const InputType &input);
94
+ InputPath &GetInputPath(size_t startPos, size_t endPos);
95
+
96
+ };
97
+
98
+ }
99
+
mosesdecoder/moses/ChartParserCallback.h ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "StackVec.h"
4
+
5
+ #include <list>
6
+ #include "TargetPhraseCollection.h"
7
+
8
+ namespace Moses
9
+ {
10
+
11
+ class TargetPhraseCollection;
12
+ class Range;
13
+ class TargetPhrase;
14
+ class InputPath;
15
+ class InputType;
16
+ class ChartCellLabel;
17
+
18
+ class ChartParserCallback
19
+ {
20
+ public:
21
+ virtual ~ChartParserCallback() {}
22
+
23
+ virtual void Add(const TargetPhraseCollection &, const StackVec &, const Range &) = 0;
24
+
25
+ virtual bool Empty() const = 0;
26
+
27
+ virtual void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const Range &range) = 0;
28
+
29
+ virtual void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) = 0;
30
+
31
+ virtual float GetBestScore(const ChartCellLabel *chartCell) const = 0;
32
+
33
+ };
34
+
35
+ } // namespace Moses
mosesdecoder/moses/ChartRuleLookupManager.cpp ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #include "ChartRuleLookupManager.h"
2
+ #include "ChartParser.h"
3
+
4
+ namespace Moses
5
+ {
6
+ ChartRuleLookupManager::~ChartRuleLookupManager()
7
+ {}
8
+ } // namespace Moses
9
+
mosesdecoder/moses/ChartRuleLookupManager.h ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+ #ifndef moses_ChartRuleLookupManager_h
22
+ #define moses_ChartRuleLookupManager_h
23
+
24
+ #include "ChartCellCollection.h"
25
+ #include "InputType.h"
26
+
27
+ namespace Moses
28
+ {
29
+ class ChartParser;
30
+ class ChartParserCallback;
31
+ class Range;
32
+ class Sentence;
33
+
34
+ /** Defines an interface for looking up rules in a rule table. Concrete
35
+ * implementation classes should correspond to specific PhraseDictionary
36
+ * subclasses (memory or on-disk). Since a ChartRuleLookupManager object
37
+ * maintains sentence-specific state, exactly one should be created for
38
+ * each sentence that is to be decoded.
39
+ */
40
+ class ChartRuleLookupManager
41
+ {
42
+ public:
43
+ ChartRuleLookupManager(const ChartParser &parser,
44
+ const ChartCellCollectionBase &cellColl)
45
+ : m_parser(parser)
46
+ , m_cellCollection(cellColl) {}
47
+
48
+ virtual ~ChartRuleLookupManager();
49
+
50
+ const ChartCellLabelSet &GetTargetLabelSet(size_t begin, size_t end) const {
51
+ return m_cellCollection.GetBase(Range(begin, end)).GetTargetLabelSet();
52
+ }
53
+
54
+ const ChartParser &GetParser() const {
55
+ return m_parser;
56
+ }
57
+ //const Sentence &GetSentence() const;
58
+
59
+ const ChartCellLabel &GetSourceAt(size_t at) const {
60
+ return m_cellCollection.GetSourceWordLabel(at);
61
+ }
62
+
63
+ /** abstract function. Return a vector of translation options for given a range in the input sentence
64
+ * \param range source range for which you want the translation options
65
+ * \param outColl return argument
66
+ */
67
+ virtual void GetChartRuleCollection(
68
+ const InputPath &inputPath,
69
+ size_t lastPos, // last position to consider if using lookahead
70
+ ChartParserCallback &outColl) = 0;
71
+
72
+ private:
73
+ //! Non-copyable: copy constructor and assignment operator not implemented.
74
+ ChartRuleLookupManager(const ChartRuleLookupManager &);
75
+ //! Non-copyable: copy constructor and assignment operator not implemented.
76
+ ChartRuleLookupManager &operator=(const ChartRuleLookupManager &);
77
+
78
+ const ChartParser &m_parser;
79
+ const ChartCellCollectionBase &m_cellCollection;
80
+ };
81
+
82
+ } // namespace Moses
83
+
84
+ #endif
mosesdecoder/moses/ChartTranslationOption.cpp ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "ChartTranslationOptions.h"
2
+ #include "InputType.h"
3
+ #include "InputPath.h"
4
+
5
+ namespace Moses
6
+ {
7
+ ChartTranslationOption::ChartTranslationOption(const TargetPhrase &targetPhrase)
8
+ :m_targetPhrase(targetPhrase)
9
+ ,m_scoreBreakdown(targetPhrase.GetScoreBreakdown())
10
+ {
11
+ }
12
+
13
+ void ChartTranslationOption::EvaluateWithSourceContext(const InputType &input,
14
+ const InputPath &inputPath,
15
+ const StackVec &stackVec)
16
+ {
17
+ const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
18
+
19
+ for (size_t i = 0; i < ffs.size(); ++i) {
20
+ const FeatureFunction &ff = *ffs[i];
21
+ ff.EvaluateWithSourceContext(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown);
22
+ }
23
+ }
24
+
25
+
26
+ std::ostream& operator<<(std::ostream &out, const ChartTranslationOption &transOpt)
27
+ {
28
+ out << transOpt.m_targetPhrase << " " << transOpt.m_scoreBreakdown;
29
+ return out;
30
+ }
31
+
32
+ }
33
+
mosesdecoder/moses/ChartTranslationOption.h ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "ScoreComponentCollection.h"
4
+
5
+ namespace Moses
6
+ {
7
+ class TargetPhrase;
8
+ class InputPath;
9
+ class InputType;
10
+ class StackVec;
11
+
12
+ class ChartTranslationOption
13
+ {
14
+ friend std::ostream& operator<<(std::ostream&, const ChartTranslationOption&);
15
+
16
+ protected:
17
+ const TargetPhrase &m_targetPhrase;
18
+ ScoreComponentCollection m_scoreBreakdown;
19
+ const InputPath *m_inputPath;
20
+ const std::vector<const Word*> *m_ruleSourceFromInputPath; // used by placeholders
21
+
22
+ public:
23
+ ChartTranslationOption(const TargetPhrase &targetPhrase);
24
+
25
+ const TargetPhrase &GetPhrase() const {
26
+ return m_targetPhrase;
27
+ }
28
+
29
+ const InputPath *GetInputPath() const {
30
+ return m_inputPath;
31
+ }
32
+
33
+ void SetInputPath(const InputPath *inputPath) {
34
+ m_inputPath = inputPath;
35
+ }
36
+
37
+ const std::vector<const Word*> *GetSourceRuleFromInputPath() const {
38
+ return m_ruleSourceFromInputPath;
39
+ }
40
+ void SetSourceRuleFromInputPath(const std::vector<const Word*> *obj) {
41
+ m_ruleSourceFromInputPath = obj;
42
+ }
43
+
44
+ const ScoreComponentCollection &GetScores() const {
45
+ return m_scoreBreakdown;
46
+ }
47
+
48
+ void EvaluateWithSourceContext(const InputType &input,
49
+ const InputPath &inputPath,
50
+ const StackVec &stackVec);
51
+ };
52
+
53
+ }
54
+
mosesdecoder/moses/ChartTranslationOptionList.cpp ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2010 Hieu Hoang
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include <algorithm>
21
+ #include <iostream>
22
+ #include <vector>
23
+ #include "StaticData.h"
24
+ #include "ChartTranslationOptionList.h"
25
+ #include "ChartTranslationOptions.h"
26
+ #include "ChartCellCollection.h"
27
+ #include "Range.h"
28
+ #include "InputType.h"
29
+ #include "InputPath.h"
30
+
31
+ using namespace std;
32
+
33
+ namespace Moses
34
+ {
35
+
36
+ ChartTranslationOptionList::
37
+ ChartTranslationOptionList(size_t ruleLimit, const InputType &input)
38
+ : m_size(0)
39
+ , m_ruleLimit(ruleLimit)
40
+ {
41
+ m_scoreThreshold = std::numeric_limits<float>::infinity();
42
+ }
43
+
44
+ ChartTranslationOptionList::~ChartTranslationOptionList()
45
+ {
46
+ RemoveAllInColl(m_collection);
47
+ }
48
+
49
+ void ChartTranslationOptionList::Clear()
50
+ {
51
+ m_size = 0;
52
+ m_scoreThreshold = std::numeric_limits<float>::infinity();
53
+ }
54
+
55
+ class ChartTranslationOptionOrderer
56
+ {
57
+ public:
58
+ bool operator()(const ChartTranslationOptions* itemA, const ChartTranslationOptions* itemB) const {
59
+ return itemA->GetEstimateOfBestScore() > itemB->GetEstimateOfBestScore();
60
+ }
61
+ };
62
+
63
+ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
64
+ const StackVec &stackVec,
65
+ const Range &range)
66
+ {
67
+ if (tpc.IsEmpty()) {
68
+ return;
69
+ }
70
+
71
+ for (size_t i = 0; i < stackVec.size(); ++i) {
72
+ const ChartCellLabel &chartCellLabel = *stackVec[i];
73
+ size_t numHypos = chartCellLabel.GetStack().cube->size();
74
+ if (numHypos == 0) {
75
+ return; // empty stack. These rules can't be used
76
+ }
77
+ }
78
+
79
+ const TargetPhrase &targetPhrase = **(tpc.begin());
80
+ float score = targetPhrase.GetFutureScore();
81
+ for (StackVec::const_iterator p = stackVec.begin(); p != stackVec.end(); ++p) {
82
+ score += (*p)->GetBestScore(this);
83
+ }
84
+
85
+ // If the rule limit has already been reached then don't add the option
86
+ // unless it is better than at least one existing option.
87
+ if (m_ruleLimit && m_size > m_ruleLimit && score < m_scoreThreshold) {
88
+ return;
89
+ }
90
+
91
+ // Add the option to the list.
92
+ if (m_size == m_collection.size()) {
93
+ // m_collection has reached capacity: create a new object.
94
+ m_collection.push_back(new ChartTranslationOptions(tpc, stackVec,
95
+ range, score));
96
+ } else {
97
+ // Overwrite an unused object.
98
+ *(m_collection[m_size]) = ChartTranslationOptions(tpc, stackVec,
99
+ range, score);
100
+ }
101
+ ++m_size;
102
+
103
+ // If the rule limit hasn't been exceeded then update the threshold.
104
+ if (!m_ruleLimit || m_size <= m_ruleLimit) {
105
+ m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
106
+ }
107
+
108
+ // Prune if bursting
109
+ if (m_ruleLimit && m_size == m_ruleLimit * 2) {
110
+ NTH_ELEMENT4(m_collection.begin(),
111
+ m_collection.begin() + m_ruleLimit - 1,
112
+ m_collection.begin() + m_size,
113
+ ChartTranslationOptionOrderer());
114
+ m_scoreThreshold = m_collection[m_ruleLimit-1]->GetEstimateOfBestScore();
115
+ m_size = m_ruleLimit;
116
+ }
117
+ }
118
+
119
+ void
120
+ ChartTranslationOptionList::
121
+ AddPhraseOOV(TargetPhrase &phrase,
122
+ std::list<TargetPhraseCollection::shared_ptr > &waste_memory,
123
+ const Range &range)
124
+ {
125
+ TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
126
+ tpc->Add(&phrase);
127
+ waste_memory.push_back(tpc);
128
+ StackVec empty;
129
+ Add(*tpc, empty, range);
130
+ }
131
+
132
+ void ChartTranslationOptionList::ApplyThreshold(float const threshold)
133
+ {
134
+ if (m_ruleLimit && m_size > m_ruleLimit) {
135
+ // Something's gone wrong if the list has grown to m_ruleLimit * 2
136
+ // without being pruned.
137
+ assert(m_size < m_ruleLimit * 2);
138
+ // Reduce the list to the best m_ruleLimit options. The remaining
139
+ // options can be overwritten on subsequent calls to Add().
140
+ NTH_ELEMENT4(m_collection.begin(),
141
+ m_collection.begin()+m_ruleLimit,
142
+ m_collection.begin()+m_size,
143
+ ChartTranslationOptionOrderer());
144
+ m_size = m_ruleLimit;
145
+ }
146
+
147
+ // keep only those over best + threshold
148
+
149
+ float scoreThreshold = -std::numeric_limits<float>::infinity();
150
+
151
+ CollType::const_iterator iter;
152
+ for (iter = m_collection.begin(); iter != m_collection.begin()+m_size; ++iter) {
153
+ const ChartTranslationOptions *transOpt = *iter;
154
+ float score = transOpt->GetEstimateOfBestScore();
155
+ scoreThreshold = (score > scoreThreshold) ? score : scoreThreshold;
156
+ }
157
+
158
+ scoreThreshold += threshold; // StaticData::Instance().GetTranslationOptionThreshold();
159
+
160
+ CollType::iterator bound = std::partition(m_collection.begin(),
161
+ m_collection.begin()+m_size,
162
+ ScoreThresholdPred(scoreThreshold));
163
+
164
+ m_size = std::distance(m_collection.begin(), bound);
165
+ }
166
+
167
+ float ChartTranslationOptionList::GetBestScore(const ChartCellLabel *chartCell) const
168
+ {
169
+ const HypoList *stack = chartCell->GetStack().cube;
170
+ assert(stack);
171
+ assert(!stack->empty());
172
+ const ChartHypothesis &bestHypo = **(stack->begin());
173
+ return bestHypo.GetFutureScore();
174
+ }
175
+
176
+ void ChartTranslationOptionList::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
177
+ {
178
+ // NEVER iterate over ALL of the collection. Just over the first m_size
179
+ CollType::iterator iter;
180
+ for (iter = m_collection.begin(); iter != m_collection.begin() + m_size; ++iter) {
181
+ ChartTranslationOptions &transOpts = **iter;
182
+ transOpts.EvaluateWithSourceContext(input, inputPath);
183
+ }
184
+
185
+ // get rid of empty trans opts
186
+ size_t numDiscard = 0;
187
+ for (size_t i = 0; i < m_size; ++i) {
188
+ ChartTranslationOptions *transOpts = m_collection[i];
189
+ if (transOpts->GetSize() == 0) {
190
+ //delete transOpts;
191
+ ++numDiscard;
192
+ } else if (numDiscard) {
193
+ SwapTranslationOptions(i - numDiscard, i);
194
+ //m_collection[] = transOpts;
195
+ }
196
+ }
197
+
198
+ size_t newSize = m_size - numDiscard;
199
+ m_size = newSize;
200
+ }
201
+
202
+ void ChartTranslationOptionList::SwapTranslationOptions(size_t a, size_t b)
203
+ {
204
+ ChartTranslationOptions *transOptsA = m_collection[a];
205
+ ChartTranslationOptions *transOptsB = m_collection[b];
206
+ m_collection[a] = transOptsB;
207
+ m_collection[b] = transOptsA;
208
+ }
209
+
210
+ std::ostream& operator<<(std::ostream &out, const ChartTranslationOptionList &obj)
211
+ {
212
+ for (size_t i = 0; i < obj.m_collection.size(); ++i) {
213
+ const ChartTranslationOptions &transOpts = *obj.m_collection[i];
214
+ out << transOpts << endl;
215
+ }
216
+ return out;
217
+ }
218
+
219
+ }
mosesdecoder/moses/ChartTranslationOptionList.h ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2006 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "ChartTranslationOptions.h"
23
+ #include "ChartParserCallback.h"
24
+ #include "StackVec.h"
25
+
26
+ #include <vector>
27
+
28
+ namespace Moses
29
+ {
30
+
31
+ class TargetPhraseCollection;
32
+ class Range;
33
+ class InputType;
34
+ class InputPath;
35
+ class ChartCellLabel;
36
+
37
+ //! a vector of translations options for a specific range, in a specific sentence
38
+ class ChartTranslationOptionList : public ChartParserCallback
39
+ {
40
+ friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptionList&);
41
+
42
+ public:
43
+ ChartTranslationOptionList(size_t ruleLimit, const InputType &input);
44
+ ~ChartTranslationOptionList();
45
+
46
+ const ChartTranslationOptions &Get(size_t i) const {
47
+ return *m_collection[i];
48
+ }
49
+
50
+ //! number of translation options
51
+ size_t GetSize() const {
52
+ return m_size;
53
+ }
54
+
55
+ void Add(const TargetPhraseCollection &, const StackVec &,
56
+ const Range &);
57
+
58
+ void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const Range &range);
59
+
60
+ bool Empty() const {
61
+ return m_size == 0;
62
+ }
63
+
64
+ float GetBestScore(const ChartCellLabel *chartCell) const;
65
+
66
+ void Clear();
67
+ void ApplyThreshold(float threshold);
68
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
69
+
70
+ private:
71
+ typedef std::vector<ChartTranslationOptions*> CollType;
72
+
73
+ struct ScoreThresholdPred {
74
+ ScoreThresholdPred(float threshold) : m_thresholdScore(threshold) {}
75
+ bool operator()(const ChartTranslationOptions *option) {
76
+ return option->GetEstimateOfBestScore() >= m_thresholdScore;
77
+ }
78
+ float m_thresholdScore;
79
+ };
80
+
81
+ void SwapTranslationOptions(size_t a, size_t b);
82
+
83
+ CollType m_collection;
84
+ size_t m_size;
85
+ float m_scoreThreshold;
86
+ const size_t m_ruleLimit;
87
+
88
+ };
89
+
90
+ }
mosesdecoder/moses/ChartTranslationOptions.cpp ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2010 Hieu Hoang
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include "ChartTranslationOptions.h"
21
+ #include "ChartHypothesis.h"
22
+ #include "ChartCellLabel.h"
23
+ #include "ChartTranslationOption.h"
24
+ #include "InputPath.h"
25
+ #include "StaticData.h"
26
+ #include "TranslationTask.h"
27
+
28
+ using namespace std;
29
+
30
+ namespace Moses
31
+ {
32
+
33
+ ChartTranslationOptions::ChartTranslationOptions(const TargetPhraseCollection &targetPhraseColl,
34
+ const StackVec &stackVec,
35
+ const Range &range,
36
+ float score)
37
+ : m_stackVec(stackVec)
38
+ , m_wordsRange(&range)
39
+ , m_estimateOfBestScore(score)
40
+ {
41
+ TargetPhraseCollection::const_iterator iter;
42
+ for (iter = targetPhraseColl.begin(); iter != targetPhraseColl.end(); ++iter) {
43
+ const TargetPhrase *origTP = *iter;
44
+
45
+ boost::shared_ptr<ChartTranslationOption> ptr(new ChartTranslationOption(*origTP));
46
+ m_collection.push_back(ptr);
47
+ }
48
+ }
49
+
50
+ ChartTranslationOptions::~ChartTranslationOptions()
51
+ {
52
+
53
+ }
54
+
55
+ //! functor to compare (chart) hypotheses by (descending) score
56
+ class ChartTranslationOptionScoreOrderer
57
+ {
58
+ public:
59
+ bool operator()(const boost::shared_ptr<ChartTranslationOption> &transOptA
60
+ , const boost::shared_ptr<ChartTranslationOption> &transOptB) const {
61
+ const ScoreComponentCollection &scoresA = transOptA->GetScores();
62
+ const ScoreComponentCollection &scoresB = transOptB->GetScores();
63
+ return scoresA.GetWeightedScore() > scoresB.GetWeightedScore();
64
+ }
65
+ };
66
+
67
+ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
68
+ {
69
+ SetInputPath(&inputPath);
70
+ // if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) {
71
+ if (inputPath.ttask->options()->input.placeholder_factor != NOT_FOUND) {
72
+ CreateSourceRuleFromInputPath();
73
+ }
74
+
75
+ CollType::iterator iter;
76
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
77
+ ChartTranslationOption &transOpt = **iter;
78
+ transOpt.SetInputPath(&inputPath);
79
+ transOpt.EvaluateWithSourceContext(input, inputPath, m_stackVec);
80
+ }
81
+
82
+ // get rid of -inf trans opts
83
+ size_t numDiscard = 0;
84
+ for (size_t i = 0; i < m_collection.size(); ++i) {
85
+ ChartTranslationOption *transOpt = m_collection[i].get();
86
+
87
+ if (transOpt->GetScores().GetWeightedScore() == - std::numeric_limits<float>::infinity()) {
88
+ ++numDiscard;
89
+ } else if (numDiscard) {
90
+ m_collection[i - numDiscard] = m_collection[i];
91
+ }
92
+ }
93
+
94
+ size_t newSize = m_collection.size() - numDiscard;
95
+ m_collection.resize(newSize);
96
+
97
+ // sort if necessary
98
+ const StaticData &staticData = StaticData::Instance();
99
+ if (staticData.RequireSortingAfterSourceContext()) {
100
+ std::sort(m_collection.begin()
101
+ , m_collection.begin() + newSize
102
+ , ChartTranslationOptionScoreOrderer());
103
+ }
104
+
105
+ }
106
+
107
+ void ChartTranslationOptions::SetInputPath(const InputPath *inputPath)
108
+ {
109
+ CollType::iterator iter;
110
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
111
+ ChartTranslationOption &transOpt = **iter;
112
+ transOpt.SetInputPath(inputPath);
113
+ }
114
+ }
115
+
116
+ void ChartTranslationOptions::CreateSourceRuleFromInputPath()
117
+ {
118
+ if (m_collection.size() == 0) {
119
+ return;
120
+ }
121
+
122
+ const InputPath *inputPath = m_collection.front()->GetInputPath();
123
+ assert(inputPath);
124
+ std::vector<const Word*> &ruleSourceFromInputPath = inputPath->AddRuleSourceFromInputPath();
125
+
126
+ size_t chartCellIndex = 0;
127
+ const ChartCellLabel *chartCellLabel = (chartCellIndex < m_stackVec.size()) ? m_stackVec[chartCellIndex] : NULL;
128
+
129
+ size_t ind = 0;
130
+ for (size_t sourcePos = m_wordsRange->GetStartPos(); sourcePos <= m_wordsRange->GetEndPos(); ++sourcePos, ++ind) {
131
+ if (chartCellLabel) {
132
+ if (sourcePos == chartCellLabel->GetCoverage().GetEndPos()) {
133
+ // end of child range. push an empty word to denote non-term
134
+ ruleSourceFromInputPath.push_back(NULL);
135
+ ++chartCellIndex;
136
+ chartCellLabel = (chartCellIndex < m_stackVec.size()) ? m_stackVec[chartCellIndex] : NULL;
137
+ } else if (sourcePos >= chartCellLabel->GetCoverage().GetStartPos()) {
138
+ // in the range of child hypo. do nothing
139
+ } else {
140
+ // not yet reached child range. add word
141
+ ruleSourceFromInputPath.push_back(&inputPath->GetPhrase().GetWord(ind));
142
+ }
143
+ } else {
144
+ // no child in sight. add word
145
+ ruleSourceFromInputPath.push_back(&inputPath->GetPhrase().GetWord(ind));
146
+ }
147
+ }
148
+
149
+ // save it to each trans opt
150
+ CollType::iterator iter;
151
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
152
+ ChartTranslationOption &transOpt = **iter;
153
+ transOpt.SetSourceRuleFromInputPath(&ruleSourceFromInputPath);
154
+ }
155
+
156
+ }
157
+
158
+ std::ostream& operator<<(std::ostream &out, const ChartTranslationOptions &obj)
159
+ {
160
+ for (size_t i = 0; i < obj.m_collection.size(); ++i) {
161
+ const ChartTranslationOption &transOpt = *obj.m_collection[i];
162
+ out << transOpt << endl;
163
+ }
164
+
165
+ return out;
166
+ }
167
+
168
+ }
mosesdecoder/moses/ChartTranslationOptions.h ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2010 Hieu Hoang
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "StackVec.h"
23
+ #include "TargetPhrase.h"
24
+ #include "TargetPhraseCollection.h"
25
+ #include "Range.h"
26
+
27
+ #include <vector>
28
+ #include <boost/shared_ptr.hpp>
29
+ #include "ChartTranslationOption.h"
30
+
31
+ namespace Moses
32
+ {
33
+ class ChartTranslationOption;
34
+ class InputPath;
35
+ class InputType;
36
+
37
+ /** Similar to a DottedRule, but contains a direct reference to a list
38
+ * of translations and provdes an estimate of the best score. For a specific range in the input sentence
39
+ */
40
+ class ChartTranslationOptions
41
+ {
42
+ friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptions&);
43
+
44
+ public:
45
+ typedef std::vector<boost::shared_ptr<ChartTranslationOption> > CollType;
46
+
47
+ /** Constructor
48
+ \param targetPhraseColl @todo dunno
49
+ \param stackVec @todo dunno
50
+ \param range the range in the source sentence this translation option covers
51
+ \param score @todo dunno
52
+ */
53
+ ChartTranslationOptions(const TargetPhraseCollection &targetPhraseColl,
54
+ const StackVec &stackVec,
55
+ const Range &range,
56
+ float score);
57
+ ~ChartTranslationOptions();
58
+
59
+ static float CalcEstimateOfBestScore(const TargetPhraseCollection &,
60
+ const StackVec &);
61
+
62
+ size_t GetSize() const {
63
+ return m_collection.size();
64
+ }
65
+
66
+ //! @todo dunno
67
+ const StackVec &GetStackVec() const {
68
+ return m_stackVec;
69
+ }
70
+
71
+ //! @todo isn't the translation suppose to just contain 1 target phrase, not a whole collection of them?
72
+ const CollType &GetTargetPhrases() const {
73
+ return m_collection;
74
+ }
75
+
76
+ //! the range in the source sentence this translation option covers
77
+ const Range &GetSourceWordsRange() const {
78
+ return *m_wordsRange;
79
+ }
80
+
81
+ /** return an estimate of the best score possible with this translation option.
82
+ * the estimate is the sum of the top target phrase's estimated score plus the
83
+ * scores of the best child hypotheses.
84
+ */
85
+ inline float GetEstimateOfBestScore() const {
86
+ return m_estimateOfBestScore;
87
+ }
88
+
89
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
90
+
91
+ void SetInputPath(const InputPath *inputPath);
92
+
93
+ void CreateSourceRuleFromInputPath();
94
+
95
+ private:
96
+
97
+ StackVec m_stackVec; //! vector of hypothesis list!
98
+ CollType m_collection;
99
+
100
+ const Range *m_wordsRange;
101
+ float m_estimateOfBestScore;
102
+ };
103
+
104
+ }
mosesdecoder/moses/ConfusionNet.cpp ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
2
+ // $Id$
3
+
4
+ #include "ConfusionNet.h"
5
+ #include <sstream>
6
+
7
+ #include "FactorCollection.h"
8
+ #include "Util.h"
9
+ #include "TranslationOptionCollectionConfusionNet.h"
10
+ #include "StaticData.h"
11
+ #include "Sentence.h"
12
+ #include "moses/FF/InputFeature.h"
13
+ #include "util/exception.hh"
14
+ #include "moses/TranslationTask.h"
15
+ namespace Moses
16
+ {
17
+ struct CNStats {
18
+ size_t created,destr,read,colls,words;
19
+
20
+ CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
21
+ ~CNStats() {
22
+ print(std::cerr);
23
+ }
24
+
25
+ void createOne() {
26
+ ++created;
27
+ }
28
+ void destroyOne() {
29
+ ++destr;
30
+ }
31
+
32
+ void collect(const ConfusionNet& cn) {
33
+ ++read;
34
+ colls+=cn.GetSize();
35
+ for(size_t i=0; i<cn.GetSize(); ++i)
36
+ words+=cn[i].size();
37
+ }
38
+ void print(std::ostream& out) const {
39
+ if(created>0) {
40
+ out<<"confusion net statistics:\n"
41
+ " created:\t"<<created<<"\n"
42
+ " destroyed:\t"<<destr<<"\n"
43
+ " succ. read:\t"<<read<<"\n"
44
+ " columns:\t"<<colls<<"\n"
45
+ " words:\t"<<words<<"\n"
46
+ " avg. word/column:\t"<<words/(1.0*colls)<<"\n"
47
+ " avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
48
+ "\n\n";
49
+ }
50
+ }
51
+ };
52
+
53
+ CNStats stats;
54
+
55
+ size_t
56
+ ConfusionNet::
57
+ GetColumnIncrement(size_t i, size_t j) const
58
+ {
59
+ (void) i;
60
+ (void) j;
61
+ return 1;
62
+ }
63
+
64
+ ConfusionNet::
65
+ ConfusionNet(AllOptions::ptr const& opts) : InputType(opts)
66
+ {
67
+ stats.createOne();
68
+
69
+ if (is_syntax(opts->search.algo)) {
70
+ m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal);
71
+ }
72
+ UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified");
73
+ }
74
+
75
+ ConfusionNet::
76
+ ~ConfusionNet()
77
+ {
78
+ stats.destroyOne();
79
+ }
80
+
81
+ ConfusionNet::
82
+ ConfusionNet(Sentence const& s) : InputType(s.options())
83
+ {
84
+ data.resize(s.GetSize());
85
+ for(size_t i=0; i<s.GetSize(); ++i) {
86
+ ScorePair scorePair;
87
+ std::pair<Word, ScorePair > temp = std::make_pair(s.GetWord(i), scorePair);
88
+ data[i].push_back(temp);
89
+ }
90
+ }
91
+
92
+ bool
93
+ ConfusionNet::
94
+ ReadF(std::istream& in, int format)
95
+ {
96
+ VERBOSE(2, "read confusion net with format "<<format<<"\n");
97
+ switch(format) {
98
+ case 0:
99
+ return ReadFormat0(in);
100
+ case 1:
101
+ return ReadFormat1(in);
102
+ default:
103
+ std::cerr << "ERROR: unknown format '"<<format
104
+ <<"' in ConfusionNet::Read";
105
+ }
106
+ return false;
107
+ }
108
+
109
+ int
110
+ ConfusionNet::
111
+ Read(std::istream& in)
112
+ {
113
+ int rv=ReadF(in,0);
114
+ if(rv) stats.collect(*this);
115
+ return rv;
116
+ }
117
+
118
+ bool
119
+ ConfusionNet::
120
+ ReadFormat0(std::istream& in)
121
+ {
122
+ Clear();
123
+ const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
124
+
125
+ const InputFeature *inputFeature = InputFeature::InstancePtr();
126
+ size_t numInputScores = inputFeature->GetNumInputScores();
127
+ size_t numRealWordCount = inputFeature->GetNumRealWordsInInput();
128
+
129
+ size_t totalCount = numInputScores + numRealWordCount;
130
+ bool addRealWordCount = (numRealWordCount > 0);
131
+
132
+ std::string line;
133
+ while(getline(in,line)) {
134
+ std::istringstream is(line);
135
+ std::string word;
136
+
137
+ Column col;
138
+ while(is>>word) {
139
+ Word w;
140
+ w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
141
+ std::vector<float> probs(totalCount, 0.0);
142
+ for(size_t i=0; i < numInputScores; i++) {
143
+ double prob;
144
+ if (!(is>>prob)) {
145
+ TRACE_ERR("ERROR: unable to parse CN input - bad link probability, "
146
+ << "or wrong number of scores\n");
147
+ return false;
148
+ }
149
+ if(prob<0.0) {
150
+ VERBOSE(1, "WARN: negative prob: "<<prob<<" ->set to 0.0\n");
151
+ prob=0.0;
152
+ } else if (prob>1.0) {
153
+ VERBOSE(1, "WARN: prob > 1.0 : "<<prob<<" -> set to 1.0\n");
154
+ prob=1.0;
155
+ }
156
+ probs[i] = (std::max(static_cast<float>(log(prob)),LOWEST_SCORE));
157
+
158
+ }
159
+ // store 'real' word count in last feature if we have one more
160
+ // weight than we do arc scores and not epsilon
161
+ if (addRealWordCount && word!=EPSILON && word!="")
162
+ probs.back() = -1.0;
163
+
164
+ ScorePair scorePair(probs);
165
+
166
+ col.push_back(std::make_pair(w,scorePair));
167
+ }
168
+ if(col.size()) {
169
+ data.push_back(col);
170
+ ShrinkToFit(data.back());
171
+ } else break;
172
+ }
173
+ return !data.empty();
174
+ }
175
+
176
+ bool
177
+ ConfusionNet::
178
+ ReadFormat1(std::istream& in)
179
+ {
180
+ Clear();
181
+ const std::vector<FactorType>& factorOrder = m_options->input.factor_order;
182
+ std::string line;
183
+ if(!getline(in,line)) return 0;
184
+ size_t s;
185
+ if(getline(in,line)) s=atoi(line.c_str());
186
+ else return 0;
187
+ data.resize(s);
188
+ for(size_t i=0; i<data.size(); ++i) {
189
+ if(!getline(in,line)) return 0;
190
+ std::istringstream is(line);
191
+ if(!(is>>s)) return 0;
192
+ std::string word;
193
+ double prob;
194
+ data[i].resize(s);
195
+ for(size_t j=0; j<s; ++j)
196
+ if(is>>word>>prob) {
197
+ //TODO: we are only reading one prob from this input format, should read many... but this function is unused anyway. -JS
198
+ data[i][j].second.denseScores = std::vector<float> (1);
199
+ data[i][j].second.denseScores.push_back((float) log(prob));
200
+ if(data[i][j].second.denseScores[0]<0) {
201
+ VERBOSE(1, "WARN: neg costs: "<<data[i][j].second.denseScores[0]<<" -> set to 0\n");
202
+ data[i][j].second.denseScores[0]=0.0;
203
+ }
204
+ // String2Word(word,data[i][j].first,factorOrder);
205
+ Word& w = data[i][j].first;
206
+ w.CreateFromString(Input,factorOrder,StringPiece(word),false,false);
207
+ } else return 0;
208
+ }
209
+ return !data.empty();
210
+ }
211
+
212
+ void ConfusionNet::Print(std::ostream& out) const
213
+ {
214
+ out<<"conf net: "<<data.size()<<"\n";
215
+ for(size_t i=0; i<data.size(); ++i) {
216
+ out<<i<<" -- ";
217
+ for(size_t j=0; j<data[i].size(); ++j) {
218
+ out<<"("<<data[i][j].first.ToString()<<", ";
219
+
220
+ // dense
221
+ std::vector<float>::const_iterator iterDense;
222
+ for(iterDense = data[i][j].second.denseScores.begin();
223
+ iterDense < data[i][j].second.denseScores.end();
224
+ ++iterDense) {
225
+ out<<", "<<*iterDense;
226
+ }
227
+
228
+ // sparse
229
+ std::map<StringPiece, float>::const_iterator iterSparse;
230
+ for(iterSparse = data[i][j].second.sparseScores.begin();
231
+ iterSparse != data[i][j].second.sparseScores.end();
232
+ ++iterSparse) {
233
+ out << ", " << iterSparse->first << "=" << iterSparse->second;
234
+ }
235
+
236
+ out<<") ";
237
+ }
238
+ out<<"\n";
239
+ }
240
+ out<<"\n\n";
241
+ }
242
+
243
+ #ifdef _WIN32
244
+ #pragma warning(disable:4716)
245
+ #endif
246
+ Phrase
247
+ ConfusionNet::
248
+ GetSubString(const Range&) const
249
+ {
250
+ UTIL_THROW2("ERROR: call to ConfusionNet::GetSubString\n");
251
+ //return Phrase(Input);
252
+ }
253
+
254
+ std::string
255
+ ConfusionNet::
256
+ GetStringRep(const std::vector<FactorType> /* factorsToPrint */) const //not well defined yet
257
+ {
258
+ TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
259
+ return "";
260
+ }
261
+ #ifdef _WIN32
262
+ #pragma warning(disable:4716)
263
+ #endif
264
+ const Word& ConfusionNet::GetWord(size_t) const
265
+ {
266
+ UTIL_THROW2("ERROR: call to ConfusionNet::GetFactorArray\n");
267
+ }
268
+ #ifdef _WIN32
269
+ #pragma warning(default:4716)
270
+ #endif
271
+ std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
272
+ {
273
+ cn.Print(out);
274
+ return out;
275
+ }
276
+
277
+ TranslationOptionCollection*
278
+ ConfusionNet::
279
+ CreateTranslationOptionCollection(ttasksptr const& ttask) const
280
+ {
281
+ // size_t maxNoTransOptPerCoverage
282
+ // = ttask->options()->search.max_trans_opt_per_cov;
283
+ // float translationOptionThreshold
284
+ // = ttask->options()->search.trans_opt_threshold;
285
+ TranslationOptionCollection *rv
286
+ = new TranslationOptionCollectionConfusionNet(ttask, *this);
287
+ //, maxNoTransOptPerCoverage, translationOptionThreshold);
288
+ assert(rv);
289
+ return rv;
290
+ }
291
+
292
+ }
293
+
294
+
mosesdecoder/moses/ConfusionNet.h ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ #ifndef moses_ConfusionNet_h
4
+ #define moses_ConfusionNet_h
5
+
6
+ #include <vector>
7
+ #include <iostream>
8
+ #include "Word.h"
9
+ #include "InputType.h"
10
+ #include "NonTerminal.h"
11
+ #include "util/exception.hh"
12
+
13
+ namespace Moses
14
+ {
15
+
16
+ class FactorCollection;
17
+ class TranslationOptionCollection;
18
+ class Sentence;
19
+ class TranslationTask;
20
+
21
+ /** An input to the decoder where each position can be 1 of a number of words,
22
+ * each with an associated probability. Compared with a sentence, where each position is a word
23
+ */
24
+ class ConfusionNet : public InputType
25
+ {
26
+ public:
27
+ typedef std::vector<std::pair<Word, ScorePair > > Column;
28
+
29
+ protected:
30
+ std::vector<Column> data;
31
+ NonTerminalSet m_defaultLabelSet;
32
+
33
+ bool ReadFormat0(std::istream&);
34
+ bool ReadFormat1(std::istream&);
35
+ void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder);
36
+
37
+ public:
38
+ ConfusionNet(AllOptions::ptr const& opts);
39
+ virtual ~ConfusionNet();
40
+
41
+ ConfusionNet(Sentence const& s);
42
+
43
+ InputTypeEnum GetType() const {
44
+ return ConfusionNetworkInput;
45
+ }
46
+
47
+ const Column& GetColumn(size_t i) const {
48
+ UTIL_THROW_IF2(i >= data.size(),
49
+ "Out of bounds. Trying to access " << i
50
+ << " when vector only contains " << data.size());
51
+ return data[i];
52
+ }
53
+ const Column& operator[](size_t i) const {
54
+ return GetColumn(i);
55
+ }
56
+ virtual size_t GetColumnIncrement(size_t i, size_t j) const; //! returns 1 for CNs
57
+
58
+ bool Empty() const {
59
+ return data.empty();
60
+ }
61
+ size_t GetSize() const {
62
+ return data.size();
63
+ }
64
+ void Clear() {
65
+ data.clear();
66
+ }
67
+
68
+ bool ReadF(std::istream&, int format=0);
69
+ virtual void Print(std::ostream&) const;
70
+
71
+ int Read(std::istream& in);
72
+
73
+ Phrase GetSubString(const Range&) const; //TODO not defined
74
+ std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
75
+ const Word& GetWord(size_t pos) const;
76
+
77
+ TranslationOptionCollection*
78
+ CreateTranslationOptionCollection(ttasksptr const& ttask) const;
79
+
80
+ const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
81
+ return m_defaultLabelSet;
82
+ }
83
+
84
+
85
+ };
86
+
87
+ std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn);
88
+
89
+
90
+ }
91
+
92
+ #endif
mosesdecoder/moses/ContextScope.h ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
2
+ // A class to store "local" information (such as task-specific caches).
3
+ // The idea is for each translation task to have a scope, which stores
4
+ // shared pointers to task-specific objects such as caches and priors.
5
+ // Since these objects are referenced via shared pointers, sopes can
6
+ // share information.
7
+ #pragma once
8
+
9
+ #ifdef WITH_THREADS
10
+ #include <boost/thread/shared_mutex.hpp>
11
+ #include <boost/thread/locks.hpp>
12
+ #include <boost/foreach.hpp>
13
+ #endif
14
+
15
+ // for some reason, the xmlrpc_c headers must be included AFTER the
16
+ // boost thread-related ones ...
17
+ #include "xmlrpc-c.h"
18
+
19
+ #include <map>
20
+ #include <boost/shared_ptr.hpp>
21
+ #include "TypeDef.h"
22
+ #include "Util.h"
23
+
24
+ namespace Moses
25
+ {
26
+ class ContextScope
27
+ {
28
+ protected:
29
+ typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
30
+ typedef scratchpad_t::iterator iter_t;
31
+ typedef scratchpad_t::value_type entry_t;
32
+ typedef scratchpad_t::const_iterator const_iter_t;
33
+ scratchpad_t m_scratchpad;
34
+ #ifdef WITH_THREADS
35
+ mutable boost::shared_mutex m_lock;
36
+ #endif
37
+ SPTR<std::map<std::string,float> const> m_context_weights;
38
+ public:
39
+ typedef boost::shared_ptr<ContextScope> ptr;
40
+ template<typename T>
41
+ boost::shared_ptr<void> const&
42
+ set(void const* const key, boost::shared_ptr<T> const& val) {
43
+ #ifdef WITH_THREADS
44
+ boost::unique_lock<boost::shared_mutex> lock(m_lock);
45
+ #endif
46
+ return (m_scratchpad[key] = val);
47
+ }
48
+
49
+ template<typename T>
50
+ boost::shared_ptr<T> const
51
+ get(void const* key, bool CreateNewIfNecessary=false) {
52
+ #ifdef WITH_THREADS
53
+ using boost::shared_mutex;
54
+ using boost::upgrade_lock;
55
+ // T const* key = reinterpret_cast<T const*>(xkey);
56
+ upgrade_lock<shared_mutex> lock(m_lock);
57
+ #endif
58
+ iter_t m = m_scratchpad.find(key);
59
+ boost::shared_ptr< T > ret;
60
+ if (m != m_scratchpad.end()) {
61
+ if (m->second == NULL && CreateNewIfNecessary) {
62
+ #ifdef WITH_THREADS
63
+ boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
64
+ #endif
65
+ m->second.reset(new T);
66
+ }
67
+ ret = boost::static_pointer_cast< T >(m->second);
68
+ return ret;
69
+ }
70
+ if (!CreateNewIfNecessary) return ret;
71
+ #ifdef WITH_THREADS
72
+ boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
73
+ #endif
74
+ ret.reset(new T);
75
+ m_scratchpad[key] = ret;
76
+ return ret;
77
+ }
78
+
79
+ ContextScope() { }
80
+
81
+ ContextScope(ContextScope const& other) {
82
+ #ifdef WITH_THREADS
83
+ boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
84
+ boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
85
+ #endif
86
+ m_scratchpad = other.m_scratchpad;
87
+ }
88
+
89
+ SPTR<std::map<std::string,float> const>
90
+ GetContextWeights() {
91
+ return m_context_weights;
92
+ }
93
+
94
+ bool
95
+ SetContextWeights(std::string const& spec) {
96
+ if (m_context_weights) return false;
97
+ boost::unique_lock<boost::shared_mutex> lock(m_lock);
98
+ SPTR<std::map<std::string,float> > M(new std::map<std::string, float>);
99
+
100
+ // TO DO; This needs to be done with StringPiece.find, not Tokenize
101
+ // PRIORITY: low
102
+ std::vector<std::string> tokens = Tokenize(spec,":");
103
+ for (std::vector<std::string>::iterator it = tokens.begin();
104
+ it != tokens.end(); it++) {
105
+ std::vector<std::string> key_and_value = Tokenize(*it, ",");
106
+ (*M)[key_and_value[0]] = atof(key_and_value[1].c_str());
107
+ }
108
+ m_context_weights = M;
109
+ return true;
110
+ }
111
+
112
+ bool
113
+ SetContextWeights(SPTR<std::map<std::string,float> const> const& w) {
114
+ if (m_context_weights) return false;
115
+ #ifdef WITH_THREADS
116
+ boost::unique_lock<boost::shared_mutex> lock(m_lock);
117
+ #endif
118
+ m_context_weights = w;
119
+ return true;
120
+ }
121
+
122
+ };
123
+
124
+ };
mosesdecoder/moses/DecodeGraph.cpp ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (C) 2006 University of Edinburgh
7
+
8
+ This library is free software; you can redistribute it and/or
9
+ modify it under the terms of the GNU Lesser General Public
10
+ License as published by the Free Software Foundation; either
11
+ version 2.1 of the License, or (at your option) any later version.
12
+
13
+ This library is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public
19
+ License along with this library; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ ***********************************************************************/
22
+
23
+ #include "DecodeGraph.h"
24
+ #include "DecodeStep.h"
25
+ #include "TypeDef.h"
26
+ #include "Util.h"
27
+
28
+ namespace Moses
29
+ {
30
+ DecodeGraph::~DecodeGraph()
31
+ {
32
+ RemoveAllInColl(m_steps);
33
+ }
34
+
35
+ //! Add another decode step to the graph
36
+ void DecodeGraph::Add(DecodeStep *decodeStep)
37
+ {
38
+ m_steps.push_back(decodeStep);
39
+ decodeStep->SetContainer(this);
40
+ }
41
+
42
+ }
43
+
mosesdecoder/moses/DecodeGraph.h ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (C) 2006 University of Edinburgh
7
+
8
+ This library is free software; you can redistribute it and/or
9
+ modify it under the terms of the GNU Lesser General Public
10
+ License as published by the Free Software Foundation; either
11
+ version 2.1 of the License, or (at your option) any later version.
12
+
13
+ This library is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public
19
+ License along with this library; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ ***********************************************************************/
22
+
23
+ #ifndef moses_DecodeGraph_h
24
+ #define moses_DecodeGraph_h
25
+
26
+ #include "util/exception.hh"
27
+ #include <list>
28
+ #include <iterator>
29
+ #include "TypeDef.h"
30
+
31
+ namespace Moses
32
+ {
33
+
34
+ class DecodeStep;
35
+
36
+ //! list of DecodeSteps which factorizes the translation
37
+ class DecodeGraph
38
+ {
39
+ protected:
40
+ std::list<const DecodeStep*> m_steps;
41
+ size_t m_id; // contiguous unique id, starting from 0
42
+ size_t m_maxChartSpan;
43
+ size_t m_backoff;
44
+
45
+ public:
46
+ /**
47
+ * position: The position of this graph within the decode sequence.
48
+ **/
49
+ DecodeGraph(size_t id)
50
+ : m_id(id)
51
+ , m_maxChartSpan(NOT_FOUND)
52
+ , m_backoff(0) {
53
+ }
54
+
55
+ // for chart decoding
56
+ DecodeGraph(size_t id, size_t maxChartSpan)
57
+ : m_id(id)
58
+ , m_maxChartSpan(maxChartSpan) {
59
+ }
60
+
61
+ //! iterators
62
+ typedef std::list<const DecodeStep*>::iterator iterator;
63
+ typedef std::list<const DecodeStep*>::const_iterator const_iterator;
64
+ const_iterator begin() const {
65
+ return m_steps.begin();
66
+ }
67
+ const_iterator end() const {
68
+ return m_steps.end();
69
+ }
70
+
71
+ virtual ~DecodeGraph();
72
+
73
+ //! Add another decode step to the graph
74
+ void Add(DecodeStep *decodeStep);
75
+
76
+ size_t GetSize() const {
77
+ return m_steps.size();
78
+ }
79
+
80
+ size_t GetMaxChartSpan() const {
81
+ UTIL_THROW_IF2(m_maxChartSpan == NOT_FOUND, "Max chart span not specified");
82
+ return m_maxChartSpan;
83
+ }
84
+
85
+ size_t GetBackoff() const {
86
+ return m_backoff;
87
+ }
88
+
89
+ void SetBackoff(size_t backoff) {
90
+ m_backoff = backoff;
91
+ }
92
+
93
+ size_t GetId() const {
94
+ return m_id;
95
+ }
96
+
97
+ };
98
+
99
+
100
+ }
101
+ #endif
mosesdecoder/moses/DecodeStep.cpp ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "DecodeStep.h"
23
+ #include "GenerationDictionary.h"
24
+ #include "StaticData.h"
25
+ #include "moses/TranslationModel/PhraseDictionary.h"
26
+
27
+ namespace Moses
28
+ {
29
+ DecodeStep::DecodeStep(DecodeFeature *decodeFeature,
30
+ const DecodeStep* prev,
31
+ const std::vector<FeatureFunction*> &features)
32
+ : m_decodeFeature(decodeFeature)
33
+ {
34
+ FactorMask prevOutputFactors;
35
+ if (prev) prevOutputFactors = prev->m_outputFactors;
36
+ m_outputFactors = prevOutputFactors;
37
+ FactorMask conflictMask = (m_outputFactors & decodeFeature->GetOutputFactorMask());
38
+ m_outputFactors |= decodeFeature->GetOutputFactorMask();
39
+ FactorMask newOutputFactorMask = m_outputFactors ^ prevOutputFactors; //xor
40
+ m_newOutputFactors.resize(newOutputFactorMask.count());
41
+ m_conflictFactors.resize(conflictMask.count());
42
+ size_t j=0, k=0;
43
+ for (size_t i = 0; i < MAX_NUM_FACTORS; i++) {
44
+ if (newOutputFactorMask[i]) m_newOutputFactors[j++] = i;
45
+ if (conflictMask[i]) m_conflictFactors[k++] = i;
46
+ }
47
+ VERBOSE(2,"DecodeStep():\n\toutputFactors=" << m_outputFactors
48
+ << "\n\tconflictFactors=" << conflictMask
49
+ << "\n\tnewOutputFactors=" << newOutputFactorMask << std::endl);
50
+
51
+ // find out which feature function can be applied in this decode step
52
+ for (size_t i = 0; i < features.size(); ++i) {
53
+ FeatureFunction *feature = features[i];
54
+ if (feature->IsUseable(m_outputFactors)) {
55
+ m_featuresToApply.push_back(feature);
56
+ } else {
57
+ m_featuresRemaining.push_back(feature);
58
+ }
59
+ }
60
+
61
+ decodeFeature->SetContainer(this);
62
+ }
63
+
64
+ DecodeStep::~DecodeStep() {}
65
+
66
+ /** returns phrase feature (dictionary) for translation step */
67
+ const PhraseDictionary* DecodeStep::GetPhraseDictionaryFeature() const
68
+ {
69
+ return dynamic_cast<const PhraseDictionary*>(m_decodeFeature);
70
+ }
71
+
72
+ /** returns generation feature (dictionary) for generation step */
73
+ const GenerationDictionary* DecodeStep::GetGenerationDictionaryFeature() const
74
+ {
75
+ return dynamic_cast<const GenerationDictionary*>(m_decodeFeature);
76
+ }
77
+
78
+ void DecodeStep::RemoveFeature(const FeatureFunction *ff)
79
+ {
80
+ for (size_t i = 0; i < m_featuresToApply.size(); ++i) {
81
+ if (ff == m_featuresToApply[i]) {
82
+ m_featuresToApply.erase(m_featuresToApply.begin() + i);
83
+ return;
84
+ }
85
+ }
86
+ }
87
+
88
+ }
89
+
90
+
mosesdecoder/moses/DecodeStep.h ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_DecodeStep_h
23
+ #define moses_DecodeStep_h
24
+
25
+ #include "TypeDef.h"
26
+ #include "FactorTypeSet.h"
27
+ #include "Phrase.h"
28
+
29
+ namespace Moses
30
+ {
31
+
32
+ class DecodeFeature;
33
+ class PhraseDictionary;
34
+ class GenerationDictionary;
35
+ class TranslationOption;
36
+ class TranslationOptionCollection;
37
+ class PartialTranslOptColl;
38
+ class FactorCollection;
39
+ class InputType;
40
+ class FeatureFunction;
41
+ class DecodeGraph;
42
+
43
+ /** Specification for a decoding step.
44
+ * The factored translation model consists of Translation and Generation
45
+ * steps, which consult a Dictionary of phrase translations or word
46
+ * generations. This class implements the specification for one of these
47
+ * steps, both the DecodeType and a pointer to the Translation or Generation Feature
48
+ **/
49
+ class DecodeStep
50
+ {
51
+ protected:
52
+ FactorMask m_outputFactors; //! mask of what factors exist on the output side after this decode step
53
+ std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
54
+ std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
55
+ const DecodeFeature* m_decodeFeature;
56
+ const DecodeGraph *m_container;
57
+
58
+ std::vector<FeatureFunction*> m_featuresToApply, m_featuresRemaining;
59
+ public:
60
+ DecodeStep(); //! not implemented
61
+ DecodeStep(DecodeFeature *featurePtr,
62
+ const DecodeStep* prevDecodeStep,
63
+ const std::vector<FeatureFunction*> &features);
64
+ virtual ~DecodeStep();
65
+
66
+ //! mask of factors that are present after this decode step
67
+ const FactorMask& GetOutputFactorMask() const {
68
+ return m_outputFactors;
69
+ }
70
+
71
+ //! returns true if this decode step must match some pre-existing factors
72
+ bool IsFilteringStep() const {
73
+ return !m_conflictFactors.empty();
74
+ }
75
+
76
+ //! returns true if this decode step produces one or more new factors
77
+ bool IsFactorProducingStep() const {
78
+ return !m_newOutputFactors.empty();
79
+ }
80
+
81
+ const std::vector<FeatureFunction*> &GetFeaturesRemaining() const {
82
+ return m_featuresRemaining;
83
+ }
84
+
85
+ /*! returns a list (possibly empty) of the (target side) factors that
86
+ * are produced in this decoding step. For example, if a previous step
87
+ * generated factor 1, and this step generates 1,2, then only 2 will be
88
+ * in the returned vector. */
89
+ const std::vector<FactorType>& GetNewOutputFactors() const {
90
+ return m_newOutputFactors;
91
+ }
92
+
93
+ /*! returns a list (possibly empty) of the (target side) factors that
94
+ * are produced BUT ALREADY EXIST and therefore must be checked for
95
+ * conflict or compatibility */
96
+ const std::vector<FactorType>& GetConflictFactors() const {
97
+ return m_conflictFactors;
98
+ }
99
+
100
+ /*! returns phrase table feature for translation step */
101
+ const PhraseDictionary* GetPhraseDictionaryFeature() const;
102
+
103
+ /*! returns generation table feature for generation step */
104
+ const GenerationDictionary* GetGenerationDictionaryFeature() const;
105
+
106
+ void RemoveFeature(const FeatureFunction *ff);
107
+
108
+ void SetContainer(const DecodeGraph *container) {
109
+ m_container = container;
110
+ }
111
+ const DecodeGraph *GetContainer() const {
112
+ return m_container;
113
+ }
114
+
115
+ };
116
+
117
+ }
118
+ #endif
mosesdecoder/moses/DecodeStepGeneration.cpp ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "DecodeStepGeneration.h"
23
+ #include "GenerationDictionary.h"
24
+ #include "TranslationOption.h"
25
+ #include "TranslationOptionCollection.h"
26
+ #include "PartialTranslOptColl.h"
27
+ #include "FactorCollection.h"
28
+
29
+ namespace Moses
30
+ {
31
+ using namespace std;
32
+
33
+ DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict,
34
+ const DecodeStep* prev,
35
+ const std::vector<FeatureFunction*> &features)
36
+ : DecodeStep(dict, prev, features)
37
+ {
38
+ }
39
+
40
+ // helpers
41
+ typedef pair<Word, ScoreComponentCollection> WordPair;
42
+ typedef list< WordPair > WordList;
43
+ // 1st = word
44
+ // 2nd = score
45
+ typedef list< WordPair >::const_iterator WordListIterator;
46
+
47
+ /** used in generation: increases iterators when looping through the exponential number of generation expansions */
48
+ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
49
+ , const vector< WordList > &wordListVector)
50
+ {
51
+ for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++) {
52
+ WordListIterator &iter = wordListIterVector[currPos];
53
+ iter++;
54
+ if (iter != wordListVector[currPos].end()) {
55
+ // eg. 4 -> 5
56
+ return;
57
+ } else {
58
+ // eg 9 -> 10
59
+ iter = wordListVector[currPos].begin();
60
+ }
61
+ }
62
+ }
63
+
64
+ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
65
+ , const DecodeStep &decodeStep
66
+ , PartialTranslOptColl &outputPartialTranslOptColl
67
+ , TranslationOptionCollection * /* toc */
68
+ , bool /*adhereTableLimit*/) const
69
+ {
70
+ if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
71
+ // word deletion
72
+
73
+ TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
74
+ outputPartialTranslOptColl.Add(newTransOpt);
75
+
76
+ return;
77
+ }
78
+
79
+ // normal generation step
80
+ const GenerationDictionary* generationDictionary = decodeStep.GetGenerationDictionaryFeature();
81
+
82
+ const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase();
83
+ const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
84
+ size_t targetLength = targetPhrase.GetSize();
85
+
86
+ // generation list for each word in phrase
87
+ vector< WordList > wordListVector(targetLength);
88
+
89
+ // create generation list
90
+ int wordListVectorPos = 0;
91
+ for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words
92
+ // generatable factors for this word to be put in wordList
93
+ WordList &wordList = wordListVector[wordListVectorPos];
94
+ const Word &word = targetPhrase.GetWord(currPos);
95
+
96
+ // consult dictionary for possible generations for this word
97
+ const OutputWordCollection *wordColl = generationDictionary->FindWord(word);
98
+
99
+ if (wordColl == NULL) {
100
+ // word not found in generation dictionary
101
+ //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
102
+ return; // can't be part of a phrase, special handling
103
+ } else {
104
+ // sort(*wordColl, CompareWordCollScore);
105
+ OutputWordCollection::const_iterator iterWordColl;
106
+ for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) {
107
+ const Word &outputWord = (*iterWordColl).first;
108
+ const ScoreComponentCollection& score = (*iterWordColl).second;
109
+ // enter into word list generated factor(s) and its(their) score(s)
110
+ wordList.push_back(WordPair(outputWord, score));
111
+ }
112
+
113
+ wordListVectorPos++; // done, next word
114
+ }
115
+ }
116
+
117
+ // use generation list (wordList)
118
+ // set up iterators (total number of expansions)
119
+ size_t numIteration = 1;
120
+ vector< WordListIterator > wordListIterVector(targetLength);
121
+ vector< const Word* > mergeWords(targetLength);
122
+ for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
123
+ wordListIterVector[currPos] = wordListVector[currPos].begin();
124
+ numIteration *= wordListVector[currPos].size();
125
+ }
126
+
127
+ // go thru each possible factor for each word & create hypothesis
128
+ for (size_t currIter = 0 ; currIter < numIteration ; currIter++) {
129
+ ScoreComponentCollection generationScore; // total score for this string of words
130
+
131
+ // create vector of words with new factors for last phrase
132
+ for (size_t currPos = 0 ; currPos < targetLength ; currPos++) {
133
+ const WordPair &wordPair = *wordListIterVector[currPos];
134
+ mergeWords[currPos] = &(wordPair.first);
135
+ generationScore.PlusEquals(wordPair.second);
136
+ }
137
+
138
+ // merge with existing trans opt
139
+ Phrase genPhrase( mergeWords);
140
+
141
+ if (IsFilteringStep()) {
142
+ if (!inputPartialTranslOpt.IsCompatible(genPhrase, m_conflictFactors))
143
+ continue;
144
+ }
145
+
146
+ const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
147
+ TargetPhrase outPhrase(inPhrase);
148
+ outPhrase.GetScoreBreakdown().PlusEquals(generationScore);
149
+
150
+ outPhrase.MergeFactors(genPhrase, m_newOutputFactors);
151
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply);
152
+
153
+ const Range &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
154
+
155
+ TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
156
+ assert(newTransOpt);
157
+
158
+ newTransOpt->SetInputPath(inputPath);
159
+
160
+ outputPartialTranslOptColl.Add(newTransOpt);
161
+
162
+ // increment iterators
163
+ IncrementIterators(wordListIterVector, wordListVector);
164
+ }
165
+ }
166
+
167
+ }
168
+
169
+
mosesdecoder/moses/DecodeStepGeneration.h ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_DecodeStepGeneration_h
23
+ #define moses_DecodeStepGeneration_h
24
+
25
+ #include "DecodeStep.h"
26
+
27
+ namespace Moses
28
+ {
29
+
30
+ class GenerationDictionary;
31
+ class Phrase;
32
+ class ScoreComponentCollection;
33
+
34
+ //! subclass of DecodeStep for generation step
35
+ class DecodeStepGeneration : public DecodeStep
36
+ {
37
+ public:
38
+ DecodeStepGeneration(GenerationDictionary* dict,
39
+ const DecodeStep* prev,
40
+ const std::vector<FeatureFunction*> &features);
41
+
42
+
43
+ void Process(const TranslationOption &inputPartialTranslOpt
44
+ , const DecodeStep &decodeStep
45
+ , PartialTranslOptColl &outputPartialTranslOptColl
46
+ , TranslationOptionCollection *toc
47
+ , bool adhereTableLimit) const;
48
+
49
+ private:
50
+ };
51
+
52
+
53
+ }
54
+ #endif
mosesdecoder/moses/DecodeStepTranslation.cpp ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "DecodeStepTranslation.h"
23
+ #include "TranslationOption.h"
24
+ #include "TranslationOptionCollection.h"
25
+ #include "PartialTranslOptColl.h"
26
+ #include "FactorCollection.h"
27
+ #include "util/exception.hh"
28
+
29
+ using namespace std;
30
+
31
+ namespace Moses
32
+ {
33
+ DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* pdf,
34
+ const DecodeStep* prev,
35
+ const std::vector<FeatureFunction*> &features)
36
+ : DecodeStep(pdf, prev, features)
37
+ {
38
+ // don't apply feature functions that are from current phrase table.It should already have been
39
+ // dont by the phrase table.
40
+ const std::vector<FeatureFunction*> &pdfFeatures = pdf->GetFeaturesToApply();
41
+ for (size_t i = 0; i < pdfFeatures.size(); ++i) {
42
+ FeatureFunction *ff = pdfFeatures[i];
43
+ RemoveFeature(ff);
44
+ }
45
+ }
46
+
47
+ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
48
+ , const DecodeStep &decodeStep
49
+ , PartialTranslOptColl &outputPartialTranslOptColl
50
+ , TranslationOptionCollection *toc
51
+ , bool adhereTableLimit
52
+ , TargetPhraseCollection::shared_ptr phraseColl) const
53
+ {
54
+ if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
55
+ // word deletion
56
+ outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
57
+ return;
58
+ }
59
+
60
+ // normal trans step
61
+ const Range &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
62
+ const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
63
+ const PhraseDictionary* phraseDictionary =
64
+ decodeStep.GetPhraseDictionaryFeature();
65
+ const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
66
+ const size_t currSize = inPhrase.GetSize();
67
+ const size_t tableLimit = phraseDictionary->GetTableLimit();
68
+
69
+ if (phraseColl != NULL) {
70
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
71
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
72
+
73
+ for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase) {
74
+ const TargetPhrase& targetPhrase = **iterTargetPhrase;
75
+ // const ScoreComponentCollection &transScores = targetPhrase.GetScoreBreakdown();
76
+ // skip if the
77
+ if (targetPhrase.GetSize() != currSize) continue;
78
+
79
+ TargetPhrase outPhrase(inPhrase);
80
+
81
+ if (IsFilteringStep()) {
82
+ if (!inputPartialTranslOpt.IsCompatible(targetPhrase, m_conflictFactors))
83
+ continue;
84
+ }
85
+
86
+ outPhrase.Merge(targetPhrase, m_newOutputFactors);
87
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
88
+
89
+ TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
90
+ assert(newTransOpt != NULL);
91
+
92
+ newTransOpt->SetInputPath(inputPath);
93
+
94
+ outputPartialTranslOptColl.Add(newTransOpt );
95
+
96
+ }
97
+ } else if (sourceWordsRange.GetNumWordsCovered() == 1) {
98
+ // unknown handler
99
+ //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
100
+ }
101
+ }
102
+
103
+ void
104
+ DecodeStepTranslation::
105
+ ProcessInitialTranslation(InputType const& source,
106
+ PartialTranslOptColl &outputPartialTranslOptColl,
107
+ size_t startPos, size_t endPos,
108
+ bool adhereTableLimit,
109
+ InputPath const& inputPath,
110
+ TargetPhraseCollection::shared_ptr phraseColl) const
111
+ {
112
+ const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
113
+ const size_t tableLimit = phraseDictionary->GetTableLimit();
114
+
115
+ const Range range(startPos, endPos);
116
+
117
+ if (phraseColl != NULL) {
118
+ IFVERBOSE(3) {
119
+ if(source.GetType() == SentenceInput)
120
+ TRACE_ERR("[" << source.GetSubString(range) << "; "
121
+ << startPos << "-" << endPos << "]\n");
122
+ else
123
+ TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
124
+ }
125
+
126
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
127
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
128
+
129
+ for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase) {
130
+ const TargetPhrase &targetPhrase = **iterTargetPhrase;
131
+ TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
132
+
133
+ transOpt->SetInputPath(inputPath);
134
+
135
+ outputPartialTranslOptColl.Add (transOpt);
136
+
137
+ VERBOSE(3,"\t" << targetPhrase << "\n");
138
+ }
139
+ VERBOSE(3,std::endl);
140
+ }
141
+ }
142
+
143
+ void
144
+ DecodeStepTranslation::
145
+ ProcessInitialTransLEGACY(InputType const& source,
146
+ PartialTranslOptColl &outputPartialTranslOptColl,
147
+ size_t startPos, size_t endPos,
148
+ bool adhereTableLimit,
149
+ InputPathList const& inputPathList) const
150
+ {
151
+ const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
152
+ const size_t tableLimit = phraseDictionary->GetTableLimit();
153
+
154
+ const Range range(startPos, endPos);
155
+ TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
156
+ = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,range);
157
+
158
+ if (phraseColl != NULL) {
159
+ IFVERBOSE(3) {
160
+ if(source.GetType() == SentenceInput)
161
+ TRACE_ERR("[" << source.GetSubString(range) << "; "
162
+ << startPos << "-" << endPos << "]\n");
163
+ else
164
+ TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
165
+ }
166
+
167
+ const std::vector<Phrase> &sourcePhrases = phraseColl->GetSourcePhrases();
168
+
169
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
170
+ std::vector<Phrase>::const_iterator iterSourcePhrase;
171
+ iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
172
+
173
+ for (iterTargetPhrase = phraseColl->begin(), iterSourcePhrase = sourcePhrases.begin()
174
+ ; iterTargetPhrase != iterEnd
175
+ ; ++iterTargetPhrase, ++iterSourcePhrase) {
176
+ assert(iterSourcePhrase != sourcePhrases.end());
177
+
178
+ const TargetPhrase &targetPhrase = **iterTargetPhrase;
179
+ const Phrase &sourcePhrase = *iterSourcePhrase;
180
+
181
+ const InputPath &inputPath = GetInputPathLEGACY(targetPhrase, sourcePhrase, inputPathList);
182
+
183
+ TranslationOption *transOpt = new TranslationOption(range, targetPhrase);
184
+ transOpt->SetInputPath(inputPath);
185
+
186
+ outputPartialTranslOptColl.Add (transOpt);
187
+
188
+ VERBOSE(3,"\t" << targetPhrase << "\n");
189
+ }
190
+ VERBOSE(3,std::endl);
191
+ }
192
+ }
193
+
194
+ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
195
+ const TargetPhrase targetPhrase,
196
+ const Phrase sourcePhrase,
197
+ const InputPathList &inputPathList) const
198
+ {
199
+ const Word &wordFromPt = sourcePhrase.GetWord(0);
200
+
201
+ InputPathList::const_iterator iter;
202
+ for (iter = inputPathList.begin(); iter != inputPathList.end(); ++iter) {
203
+ const InputPath &inputPath = **iter;
204
+ const Phrase &phraseFromIP = inputPath.GetPhrase();
205
+
206
+ const Word *wordIP = NULL;
207
+ for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
208
+ const Word &tempWord = phraseFromIP.GetWord(i);
209
+ if (!tempWord.IsEpsilon()) {
210
+ wordIP = &tempWord;
211
+ break;
212
+ }
213
+ }
214
+
215
+ // const Range &range = inputPath.GetWordsRange();
216
+
217
+ if (wordIP && *wordIP == wordFromPt) {
218
+ return inputPath;
219
+ }
220
+ }
221
+
222
+ UTIL_THROW(util::Exception, "Input path not found");
223
+ }
224
+
225
+ void
226
+ DecodeStepTranslation::
227
+ ProcessLEGACY(TranslationOption const& in,
228
+ DecodeStep const& decodeStep,
229
+ PartialTranslOptColl &out,
230
+ TranslationOptionCollection *toc,
231
+ bool adhereTableLimit) const
232
+ {
233
+ if (in.GetTargetPhrase().GetSize() == 0) {
234
+ // word deletion
235
+ out.Add(new TranslationOption(in));
236
+ return;
237
+ }
238
+
239
+ // normal trans step
240
+ Range const& srcRange = in.GetSourceWordsRange();
241
+ InputPath const& inputPath = in.GetInputPath();
242
+ PhraseDictionary const* pdict = decodeStep.GetPhraseDictionaryFeature();
243
+ TargetPhrase const& inPhrase = in.GetTargetPhrase();
244
+ size_t const currSize = inPhrase.GetSize();
245
+ size_t const tableLimit = pdict->GetTableLimit();
246
+
247
+ TargetPhraseCollectionWithSourcePhrase::shared_ptr phraseColl
248
+ = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
249
+
250
+ if (phraseColl != NULL) {
251
+ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
252
+ iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
253
+ ? phraseColl->begin() + tableLimit : phraseColl->end());
254
+
255
+ for (iterTargetPhrase = phraseColl->begin();
256
+ iterTargetPhrase != iterEnd;
257
+ ++iterTargetPhrase) {
258
+ TargetPhrase const& targetPhrase = **iterTargetPhrase;
259
+ if (targetPhrase.GetSize() != currSize ||
260
+ (IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
261
+ continue;
262
+
263
+ TargetPhrase outPhrase(inPhrase);
264
+ outPhrase.Merge(targetPhrase, m_newOutputFactors);
265
+ outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
266
+
267
+ TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
268
+ assert(newTransOpt != NULL);
269
+
270
+ newTransOpt->SetInputPath(inputPath);
271
+
272
+ out.Add(newTransOpt);
273
+
274
+ }
275
+ }
276
+ }
277
+ }
278
+
279
+
280
+