sleepyhead111 commited on
Commit
5c711d5
·
verified ·
1 Parent(s): 5035531

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. mosesdecoder/moses/AlignmentInfo.cpp +170 -0
  2. mosesdecoder/moses/BitmapContainer.h +275 -0
  3. mosesdecoder/moses/ChartCell.cpp +233 -0
  4. mosesdecoder/moses/ChartCellLabelSet.h +147 -0
  5. mosesdecoder/moses/ChartHypothesisCollection.h +108 -0
  6. mosesdecoder/moses/ChartManager.cpp +867 -0
  7. mosesdecoder/moses/ChartParser.h +99 -0
  8. mosesdecoder/moses/ChartTranslationOptionList.h +90 -0
  9. mosesdecoder/moses/Factor.h +100 -0
  10. mosesdecoder/moses/FactorCollection.cpp +105 -0
  11. mosesdecoder/moses/FactorCollection.h +132 -0
  12. mosesdecoder/moses/FactorTypeSet.cpp +73 -0
  13. mosesdecoder/moses/FactorTypeSet.h +54 -0
  14. mosesdecoder/moses/FilePtr.h +89 -0
  15. mosesdecoder/moses/HypergraphOutput.h +107 -0
  16. mosesdecoder/moses/HypothesisStack.cpp +30 -0
  17. mosesdecoder/moses/HypothesisStackNormal.cpp +294 -0
  18. mosesdecoder/moses/IOWrapper.cpp +272 -0
  19. mosesdecoder/moses/InputFileStream.h +48 -0
  20. mosesdecoder/moses/LatticeMBR.cpp +680 -0
  21. mosesdecoder/moses/NonTerminal.cpp +21 -0
  22. mosesdecoder/moses/PartialTranslOptColl.cpp +112 -0
  23. mosesdecoder/moses/RuleCubeQueue.cpp +69 -0
  24. mosesdecoder/moses/RuleCubeQueue.h +66 -0
  25. mosesdecoder/moses/SquareMatrix.h +86 -0
  26. mosesdecoder/moses/StackVec.h +34 -0
  27. mosesdecoder/moses/SyntacticLanguageModelFiles.h +103 -0
  28. mosesdecoder/moses/SyntacticLanguageModelState.h +311 -0
  29. mosesdecoder/moses/Syntax/F2S/DerivationWriter.h +36 -0
  30. mosesdecoder/moses/Syntax/F2S/Forest.h +51 -0
  31. mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.cpp +85 -0
  32. mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.h +40 -0
  33. mosesdecoder/moses/Syntax/F2S/HyperPath.cpp +20 -0
  34. mosesdecoder/moses/Syntax/F2S/HyperPath.h +34 -0
  35. mosesdecoder/moses/Syntax/F2S/HyperPathLoader.h +66 -0
  36. mosesdecoder/moses/Syntax/F2S/HyperTree.cpp +70 -0
  37. mosesdecoder/moses/Syntax/F2S/HyperTree.h +92 -0
  38. mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.cpp +169 -0
  39. mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.h +41 -0
  40. mosesdecoder/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h +34 -0
  41. mosesdecoder/moses/Syntax/F2S/PVertexToStackMap.h +20 -0
  42. mosesdecoder/moses/Syntax/F2S/RuleMatcherCallback.h +51 -0
  43. mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h +203 -0
  44. mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree.h +78 -0
  45. mosesdecoder/moses/Syntax/F2S/TopologicalSorter.cpp +55 -0
  46. mosesdecoder/moses/Syntax/F2S/TopologicalSorter.h +34 -0
  47. mosesdecoder/moses/Syntax/F2S/TreeFragmentTokenizer.cpp +100 -0
  48. mosesdecoder/moses/Syntax/Manager.h +70 -0
  49. mosesdecoder/moses/Syntax/PVertex.h +25 -0
  50. mosesdecoder/moses/Syntax/RuleTable.h +24 -0
mosesdecoder/moses/AlignmentInfo.cpp ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+ #include <algorithm>
20
+ #include <set>
21
+ #include "AlignmentInfo.h"
22
+ #include "TypeDef.h"
23
+ #include "StaticData.h"
24
+ #include "Util.h"
25
+ #include "util/exception.hh"
26
+
27
+ namespace Moses
28
+ {
29
+
30
+ AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
31
+ : m_collection(pairs)
32
+ {
33
+ BuildNonTermIndexMaps();
34
+ }
35
+
36
+ AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
37
+ {
38
+ assert(aln.size()%2==0);
39
+ for (size_t i = 0; i < aln.size(); i+= 2)
40
+ m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
41
+ BuildNonTermIndexMaps();
42
+ }
43
+
44
+ AlignmentInfo::AlignmentInfo(const std::string &str)
45
+ {
46
+ std::vector<std::string> points = Tokenize(str, " ");
47
+ std::vector<std::string>::const_iterator iter;
48
+ for (iter = points.begin(); iter != points.end(); iter++) {
49
+ std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
50
+ UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
51
+ Add(point[0], point[1]);
52
+ }
53
+ }
54
+
55
+ void AlignmentInfo::BuildNonTermIndexMaps()
56
+ {
57
+ if (m_collection.empty()) {
58
+ return;
59
+ }
60
+ const_iterator p = begin();
61
+ size_t maxIndex = p->second;
62
+ for (++p; p != end(); ++p) {
63
+ if (p->second > maxIndex) {
64
+ maxIndex = p->second;
65
+ }
66
+ }
67
+ m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
68
+ m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
69
+ size_t i = 0;
70
+ for (p = begin(); p != end(); ++p) {
71
+ if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
72
+ // 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
73
+ m_nonTermIndexMap.clear();
74
+ m_nonTermIndexMap2.clear();
75
+ return;
76
+ }
77
+ m_nonTermIndexMap[p->second] = i++;
78
+ m_nonTermIndexMap2[p->second] = p->first;
79
+ }
80
+ }
81
+
82
+ std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
83
+ {
84
+ std::set<size_t> ret;
85
+ CollType::const_iterator iter;
86
+ for (iter = begin(); iter != end(); ++iter) {
87
+ // const std::pair<size_t,size_t> &align = *iter;
88
+ if (iter->first == sourcePos) {
89
+ ret.insert(iter->second);
90
+ }
91
+ }
92
+ return ret;
93
+ }
94
+
95
+ std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
96
+ {
97
+ std::set<size_t> ret;
98
+ CollType::const_iterator iter;
99
+ for (iter = begin(); iter != end(); ++iter) {
100
+ // const std::pair<size_t,size_t> &align = *iter;
101
+ if (iter->second == targetPos) {
102
+ ret.insert(iter->first);
103
+ }
104
+ }
105
+ return ret;
106
+ }
107
+
108
+
109
+ bool
110
+ compare_target(std::pair<size_t,size_t> const* a,
111
+ std::pair<size_t,size_t> const* b)
112
+ {
113
+ if(a->second < b->second) return true;
114
+ if(a->second == b->second) return (a->first < b->first);
115
+ return false;
116
+ }
117
+
118
+
119
+ std::vector< const std::pair<size_t,size_t>* >
120
+ AlignmentInfo::
121
+ GetSortedAlignments(WordAlignmentSort SortOrder) const
122
+ {
123
+ std::vector< const std::pair<size_t,size_t>* > ret;
124
+
125
+ CollType::const_iterator iter;
126
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
127
+ const std::pair<size_t,size_t> &alignPair = *iter;
128
+ ret.push_back(&alignPair);
129
+ }
130
+
131
+ switch (SortOrder) {
132
+ case NoSort:
133
+ break;
134
+
135
+ case TargetOrder:
136
+ std::sort(ret.begin(), ret.end(), compare_target);
137
+ break;
138
+
139
+ default:
140
+ UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
141
+ << SortOrder);
142
+ }
143
+
144
+ return ret;
145
+
146
+ }
147
+
148
+ std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
149
+ {
150
+ std::set<size_t> sourcePoses;
151
+
152
+ CollType::const_iterator iter;
153
+ for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
154
+ size_t sourcePos = iter->first;
155
+ sourcePoses.insert(sourcePos);
156
+ }
157
+ std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
158
+ return ret;
159
+ }
160
+
161
+ std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
162
+ {
163
+ AlignmentInfo::const_iterator iter;
164
+ for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) {
165
+ out << iter->first << "-" << iter->second << " ";
166
+ }
167
+ return out;
168
+ }
169
+
170
+ }
mosesdecoder/moses/BitmapContainer.h ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_BitmapContainer_h
23
+ #define moses_BitmapContainer_h
24
+
25
+ #include <queue>
26
+ #include <set>
27
+ #include <vector>
28
+
29
+ #include "Hypothesis.h"
30
+ #include "HypothesisStackCubePruning.h"
31
+ #include "SquareMatrix.h"
32
+ #include "TranslationOption.h"
33
+ #include "TypeDef.h"
34
+ #include "Bitmap.h"
35
+
36
+ #include <boost/unordered_set.hpp>
37
+
38
+ namespace Moses
39
+ {
40
+
41
+ class BitmapContainer;
42
+ class BackwardsEdge;
43
+ class Hypothesis;
44
+ class HypothesisStackCubePruning;
45
+ class HypothesisQueueItem;
46
+ class QueueItemOrderer;
47
+ class TranslationOptionList;
48
+
49
+ typedef std::vector< Hypothesis* > HypothesisSet;
50
+ typedef std::set< BackwardsEdge* > BackwardsEdgeSet;
51
+ typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueItem* >, QueueItemOrderer> HypothesisQueue;
52
+
53
+ ////////////////////////////////////////////////////////////////////////////////
54
+ // Hypothesis Priority Queue Code
55
+ ////////////////////////////////////////////////////////////////////////////////
56
+
57
+ //! 1 item in the priority queue for stack decoding (phrase-based)
58
+ class HypothesisQueueItem
59
+ {
60
+ private:
61
+ size_t m_hypothesis_pos, m_translation_pos;
62
+ Hypothesis *m_hypothesis;
63
+ BackwardsEdge *m_edge;
64
+ boost::shared_ptr<TargetPhrase> m_target_phrase;
65
+
66
+ HypothesisQueueItem();
67
+
68
+ public:
69
+ HypothesisQueueItem(const size_t hypothesis_pos
70
+ , const size_t translation_pos
71
+ , Hypothesis *hypothesis
72
+ , BackwardsEdge *edge
73
+ , const TargetPhrase *target_phrase = NULL)
74
+ : m_hypothesis_pos(hypothesis_pos)
75
+ , m_translation_pos(translation_pos)
76
+ , m_hypothesis(hypothesis)
77
+ , m_edge(edge) {
78
+ if (target_phrase != NULL) {
79
+ m_target_phrase.reset(new TargetPhrase(*target_phrase));
80
+ }
81
+ }
82
+
83
+ ~HypothesisQueueItem() {
84
+ }
85
+
86
+ int GetHypothesisPos() {
87
+ return m_hypothesis_pos;
88
+ }
89
+
90
+ int GetTranslationPos() {
91
+ return m_translation_pos;
92
+ }
93
+
94
+ Hypothesis *GetHypothesis() {
95
+ return m_hypothesis;
96
+ }
97
+
98
+ BackwardsEdge *GetBackwardsEdge() {
99
+ return m_edge;
100
+ }
101
+
102
+ boost::shared_ptr<TargetPhrase> GetTargetPhrase() {
103
+ return m_target_phrase;
104
+ }
105
+ };
106
+
107
+ //! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
108
+ class QueueItemOrderer
109
+ {
110
+ public:
111
+ bool operator()(HypothesisQueueItem* itemA, HypothesisQueueItem* itemB) const {
112
+ float scoreA = itemA->GetHypothesis()->GetFutureScore();
113
+ float scoreB = itemB->GetHypothesis()->GetFutureScore();
114
+
115
+ if (scoreA < scoreB) {
116
+ return true;
117
+ } else if (scoreA > scoreB) {
118
+ return false;
119
+ } else {
120
+ // Equal scores: break ties by comparing target phrases (if they exist)
121
+ // *Important*: these are pointers to copies of the target phrases from the
122
+ // hypotheses. This class is used to keep priority queues ordered in the
123
+ // background, so comparisons made as those data structures are cleaned up
124
+ // may occur *after* the target phrases in hypotheses have been cleaned up,
125
+ // leading to segfaults if relying on hypotheses to provide target phrases.
126
+ boost::shared_ptr<TargetPhrase> phrA = itemA->GetTargetPhrase();
127
+ boost::shared_ptr<TargetPhrase> phrB = itemB->GetTargetPhrase();
128
+ if (!phrA || !phrB) {
129
+ // Fallback: scoreA < scoreB == false, non-deterministic sort
130
+ return false;
131
+ }
132
+ return (phrA->Compare(*phrB) > 0);
133
+ }
134
+ }
135
+ };
136
+
137
+ ////////////////////////////////////////////////////////////////////////////////
138
+ // Hypothesis Orderer Code
139
+ ////////////////////////////////////////////////////////////////////////////////
140
+ // Allows to compare two Hypothesis objects by the corresponding scores.
141
+ ////////////////////////////////////////////////////////////////////////////////
142
+
143
+ class HypothesisScoreOrderer
144
+ {
145
+ private:
146
+ bool m_deterministic;
147
+
148
+ public:
149
+ HypothesisScoreOrderer(const bool deterministic = false)
150
+ : m_deterministic(deterministic) {}
151
+
152
+ bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const {
153
+
154
+ float scoreA = hypoA->GetFutureScore();
155
+ float scoreB = hypoB->GetFutureScore();
156
+
157
+ if (scoreA > scoreB) {
158
+ return true;
159
+ } else if (scoreA < scoreB) {
160
+ return false;
161
+ } else {
162
+ if (m_deterministic) {
163
+ // Equal scores: break ties by comparing target phrases
164
+ return (hypoA->GetCurrTargetPhrase().Compare(hypoB->GetCurrTargetPhrase()) < 0);
165
+ }
166
+ // Fallback: scoreA > scoreB == false, non-deterministic sort
167
+ return false;
168
+ }
169
+ }
170
+ };
171
+
172
+ ////////////////////////////////////////////////////////////////////////////////
173
+ // Backwards Edge Code
174
+ ////////////////////////////////////////////////////////////////////////////////
175
+ // Encodes an edge pointing to a BitmapContainer.
176
+ ////////////////////////////////////////////////////////////////////////////////
177
+
178
+ class BackwardsEdge
179
+ {
180
+ private:
181
+ friend class BitmapContainer;
182
+ bool m_initialized;
183
+
184
+ const BitmapContainer &m_prevBitmapContainer;
185
+ BitmapContainer &m_parent;
186
+ const TranslationOptionList &m_translations;
187
+ const SquareMatrix &m_estimatedScores;
188
+ float m_estimatedScore;
189
+
190
+ bool m_deterministic;
191
+
192
+ std::vector< const Hypothesis* > m_hypotheses;
193
+ boost::unordered_set< int > m_seenPosition;
194
+
195
+ // We don't want to instantiate "empty" objects.
196
+ BackwardsEdge();
197
+
198
+ Hypothesis *CreateHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt);
199
+ bool SeenPosition(const size_t x, const size_t y);
200
+ void SetSeenPosition(const size_t x, const size_t y);
201
+
202
+ protected:
203
+ void Initialize();
204
+
205
+ public:
206
+ BackwardsEdge(const BitmapContainer &prevBitmapContainer
207
+ , BitmapContainer &parent
208
+ , const TranslationOptionList &translations
209
+ , const SquareMatrix &estimatedScores
210
+ , const InputType& source
211
+ , const bool deterministic = false);
212
+ ~BackwardsEdge();
213
+
214
+ bool GetInitialized();
215
+ const BitmapContainer &GetBitmapContainer() const;
216
+ int GetDistortionPenalty();
217
+ void PushSuccessors(const size_t x, const size_t y);
218
+ };
219
+
220
+ ////////////////////////////////////////////////////////////////////////////////
221
+ // Bitmap Container Code
222
+ ////////////////////////////////////////////////////////////////////////////////
223
+ // A BitmapContainer encodes an ordered set of hypotheses and a set of edges
224
+ // pointing to the "generating" BitmapContainers. It also stores a priority
225
+ // queue that contains expanded hypotheses from the connected edges.
226
+ ////////////////////////////////////////////////////////////////////////////////
227
+
228
+ class BitmapContainer
229
+ {
230
+ private:
231
+ const Bitmap &m_bitmap;
232
+ HypothesisStackCubePruning &m_stack;
233
+ HypothesisSet m_hypotheses;
234
+ BackwardsEdgeSet m_edges;
235
+ HypothesisQueue m_queue;
236
+ size_t m_numStackInsertions;
237
+ bool m_deterministic;
238
+
239
+ // We always require a corresponding bitmap to be supplied.
240
+ BitmapContainer();
241
+ BitmapContainer(const BitmapContainer &);
242
+ public:
243
+ BitmapContainer(const Bitmap &bitmap
244
+ , HypothesisStackCubePruning &stack
245
+ , bool deterministic = false);
246
+
247
+ // The destructor will also delete all the edges that are
248
+ // connected to this BitmapContainer.
249
+ ~BitmapContainer();
250
+
251
+ void Enqueue(int hypothesis_pos, int translation_pos, Hypothesis *hypothesis, BackwardsEdge *edge);
252
+ HypothesisQueueItem *Dequeue(bool keepValue=false);
253
+ HypothesisQueueItem *Top() const;
254
+ size_t Size();
255
+ bool Empty() const;
256
+
257
+ const Bitmap &GetWordsBitmap() const {
258
+ return m_bitmap;
259
+ }
260
+
261
+ const HypothesisSet &GetHypotheses() const;
262
+ size_t GetHypothesesSize() const;
263
+ const BackwardsEdgeSet &GetBackwardsEdges();
264
+
265
+ void InitializeEdges();
266
+ void ProcessBestHypothesis();
267
+ void EnsureMinStackHyps(const size_t minNumHyps);
268
+ void AddHypothesis(Hypothesis *hypothesis);
269
+ void AddBackwardsEdge(BackwardsEdge *edge);
270
+ void SortHypotheses();
271
+ };
272
+
273
+ }
274
+
275
+ #endif
mosesdecoder/moses/ChartCell.cpp ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <algorithm>
23
+ #include "ChartCell.h"
24
+ #include "ChartCellCollection.h"
25
+ #include "HypergraphOutput.h"
26
+ #include "RuleCubeQueue.h"
27
+ #include "RuleCube.h"
28
+ #include "Range.h"
29
+ #include "Util.h"
30
+ #include "ChartTranslationOptions.h"
31
+ #include "ChartTranslationOptionList.h"
32
+ #include "ChartManager.h"
33
+ #include "util/exception.hh"
34
+
35
+ using namespace std;
36
+
37
+ namespace Moses
38
+ {
39
+
40
+ ChartCellBase::ChartCellBase(size_t startPos, size_t endPos) :
41
+ m_coverage(startPos, endPos),
42
+ m_targetLabelSet(m_coverage) {}
43
+
44
+ ChartCellBase::~ChartCellBase() {}
45
+
46
+ /** Constructor
47
+ * \param startPos endPos range of this cell
48
+ * \param manager pointer back to the manager
49
+ */
50
+ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
51
+ ChartCellBase(startPos, endPos), m_manager(manager)
52
+ {
53
+ m_nBestIsEnabled = manager.options()->nbest.enabled;
54
+ }
55
+
56
+ ChartCell::~ChartCell() {}
57
+
58
+ /** Add the given hypothesis to the cell.
59
+ * Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
60
+ * This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
61
+ * \param hypo Hypothesis to be added
62
+ */
63
+ bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
64
+ {
65
+ const Word &targetLHS = hypo->GetTargetLHS();
66
+ MapType::iterator m = m_hypoColl.find(targetLHS);
67
+ if (m == m_hypoColl.end()) {
68
+ std::pair<Word, ChartHypothesisCollection>
69
+ e(targetLHS, ChartHypothesisCollection(*m_manager.options()));
70
+ m = m_hypoColl.insert(e).first;
71
+ }
72
+ return m->second.AddHypothesis(hypo, m_manager);
73
+ }
74
+
75
+ /** Prune each collection in this cell to a particular size */
76
+ void ChartCell::PruneToSize()
77
+ {
78
+ MapType::iterator iter;
79
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
80
+ ChartHypothesisCollection &coll = iter->second;
81
+ coll.PruneToSize(m_manager);
82
+ }
83
+ }
84
+
85
+ /** Decoding at span level: fill chart cell with hypotheses
86
+ * (implementation of cube pruning)
87
+ * \param transOptList list of applicable rules to create hypotheses for the cell
88
+ * \param allChartCells entire chart - needed to look up underlying hypotheses
89
+ */
90
+ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
91
+ , const ChartCellCollection &allChartCells)
92
+ {
93
+ // priority queue for applicable rules with selected hypotheses
94
+ RuleCubeQueue queue(m_manager);
95
+
96
+ // add all trans opt into queue. using only 1st child node.
97
+ for (size_t i = 0; i < transOptList.GetSize(); ++i) {
98
+ const ChartTranslationOptions &transOpt = transOptList.Get(i);
99
+ RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
100
+ queue.Add(ruleCube);
101
+ }
102
+
103
+ // pluck things out of queue and add to hypo collection
104
+ const size_t popLimit = m_manager.options()->cube.pop_limit;
105
+ for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
106
+ ChartHypothesis *hypo = queue.Pop();
107
+ AddHypothesis(hypo);
108
+ }
109
+ }
110
+
111
+ //! call SortHypotheses() in each hypo collection in this cell
112
+ void ChartCell::SortHypotheses()
113
+ {
114
+ UTIL_THROW_IF2(!m_targetLabelSet.Empty(), "Already sorted");
115
+
116
+ MapType::iterator iter;
117
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
118
+ ChartHypothesisCollection &coll = iter->second;
119
+
120
+ if (coll.GetSize()) {
121
+ coll.SortHypotheses();
122
+ m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
123
+ }
124
+ }
125
+ }
126
+
127
+ /** Return the highest scoring hypothesis out of all the hypo collection in this cell */
128
+ const ChartHypothesis *ChartCell::GetBestHypothesis() const
129
+ {
130
+ const ChartHypothesis *ret = NULL;
131
+ float bestScore = -std::numeric_limits<float>::infinity();
132
+
133
+ MapType::const_iterator iter;
134
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
135
+ const HypoList &sortedList = iter->second.GetSortedHypotheses();
136
+ if (sortedList.size() > 0) {
137
+ const ChartHypothesis *hypo = sortedList[0];
138
+ if (hypo->GetFutureScore() > bestScore) {
139
+ bestScore = hypo->GetFutureScore();
140
+ ret = hypo;
141
+ }
142
+ }
143
+ }
144
+
145
+ return ret;
146
+ }
147
+
148
+ //! call CleanupArcList() in each hypo collection in this cell
149
+ void ChartCell::CleanupArcList()
150
+ {
151
+ // only necessary if n-best calculations are enabled
152
+ if (!m_nBestIsEnabled) return;
153
+
154
+ MapType::iterator iter;
155
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
156
+ ChartHypothesisCollection &coll = iter->second;
157
+ coll.CleanupArcList();
158
+ }
159
+ }
160
+
161
+ //! debug info - size of each hypo collection in this cell
162
+ void ChartCell::OutputSizes(std::ostream &out) const
163
+ {
164
+ MapType::const_iterator iter;
165
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
166
+ const Word &targetLHS = iter->first;
167
+ const ChartHypothesisCollection &coll = iter->second;
168
+
169
+ out << targetLHS << "=" << coll.GetSize() << " ";
170
+ }
171
+ }
172
+
173
+ //! debug info - total number of hypos in all hypo collection in this cell
174
+ size_t ChartCell::GetSize() const
175
+ {
176
+ size_t ret = 0;
177
+ MapType::const_iterator iter;
178
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
179
+ const ChartHypothesisCollection &coll = iter->second;
180
+
181
+ ret += coll.GetSize();
182
+ }
183
+
184
+ return ret;
185
+ }
186
+
187
+ const HypoList *ChartCell::GetAllSortedHypotheses() const
188
+ {
189
+ HypoList *ret = new HypoList();
190
+
191
+ MapType::const_iterator iter;
192
+ for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
193
+ const ChartHypothesisCollection &coll = iter->second;
194
+ const HypoList &list = coll.GetSortedHypotheses();
195
+ std::copy(list.begin(), list.end(), std::inserter(*ret, ret->end()));
196
+ }
197
+ return ret;
198
+ }
199
+
200
+ //! call WriteSearchGraph() for each hypo collection
201
+ void ChartCell::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
202
+ {
203
+ MapType::const_iterator iterOutside;
204
+ for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
205
+ const ChartHypothesisCollection &coll = iterOutside->second;
206
+ coll.WriteSearchGraph(writer, reachable);
207
+ }
208
+ }
209
+
210
+ std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
211
+ {
212
+ ChartCell::MapType::const_iterator iterOutside;
213
+ for (iterOutside = cell.m_hypoColl.begin(); iterOutside != cell.m_hypoColl.end(); ++iterOutside) {
214
+ const Word &targetLHS = iterOutside->first;
215
+ cerr << targetLHS << ":" << endl;
216
+
217
+ const ChartHypothesisCollection &coll = iterOutside->second;
218
+ cerr << coll;
219
+ }
220
+
221
+ /*
222
+ ChartCell::HCType::const_iterator iter;
223
+ for (iter = cell.m_hypos.begin(); iter != cell.m_hypos.end(); ++iter)
224
+ {
225
+ const ChartHypothesis &hypo = **iter;
226
+ out << hypo << endl;
227
+ }
228
+ */
229
+
230
+ return out;
231
+ }
232
+
233
+ } // namespace
mosesdecoder/moses/ChartCellLabelSet.h ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "ChartCellLabel.h"
23
+ #include "NonTerminal.h"
24
+ #include "moses/FactorCollection.h"
25
+
26
+ #include <boost/functional/hash.hpp>
27
+ #include <boost/unordered_map.hpp>
28
+ #include <boost/version.hpp>
29
+
30
+ namespace Moses
31
+ {
32
+
33
+ class ChartHypothesisCollection;
34
+
35
+ /** @todo I have no idea what's in here
36
+ */
37
+ class ChartCellLabelSet
38
+ {
39
+ private:
40
+
41
+ typedef std::vector<ChartCellLabel*> MapType;
42
+
43
+ public:
44
+ typedef MapType::const_iterator const_iterator;
45
+ typedef MapType::iterator iterator;
46
+
47
+ ChartCellLabelSet(const Range &coverage)
48
+ : m_coverage(coverage)
49
+ , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
50
+ , m_size(0) { }
51
+
52
+ ~ChartCellLabelSet() {
53
+ RemoveAllInColl(m_map);
54
+ }
55
+
56
+ // TODO: skip empty elements when iterating, or deprecate this
57
+ const_iterator begin() const {
58
+ return m_map.begin();
59
+ }
60
+ const_iterator end() const {
61
+ return m_map.end();
62
+ }
63
+
64
+ iterator mutable_begin() {
65
+ return m_map.begin();
66
+ }
67
+ iterator mutable_end() {
68
+ return m_map.end();
69
+ }
70
+
71
+ void AddWord(const Word &w) {
72
+ size_t idx = w[0]->GetId();
73
+ if (! ChartCellExists(idx)) {
74
+ m_size++;
75
+
76
+
77
+ m_map[idx] = new ChartCellLabel(m_coverage, w);
78
+ }
79
+ }
80
+
81
+ // Stack is a HypoList or whatever the search algorithm uses.
82
+ void AddConstituent(const Word &w, const HypoList *stack) {
83
+ size_t idx = w[0]->GetId();
84
+ if (ChartCellExists(idx)) {
85
+ ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
86
+ s.cube = stack;
87
+ } else {
88
+ ChartCellLabel::Stack s;
89
+ s.cube = stack;
90
+ m_size++;
91
+ m_map[idx] = new ChartCellLabel(m_coverage, w, s);
92
+ }
93
+ }
94
+
95
+ // grow vector if necessary
96
+ bool ChartCellExists(size_t idx) {
97
+ try {
98
+ if (m_map.at(idx) != NULL) {
99
+ return true;
100
+ }
101
+ } catch (const std::out_of_range& oor) {
102
+ m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
103
+ }
104
+ return false;
105
+ }
106
+
107
+ bool Empty() const {
108
+ return m_size == 0;
109
+ }
110
+
111
+ size_t GetSize() const {
112
+ return m_size;
113
+ }
114
+
115
+ const ChartCellLabel *Find(const Word &w) const {
116
+ size_t idx = w[0]->GetId();
117
+ try {
118
+ return m_map.at(idx);
119
+ } catch (const std::out_of_range& oor) {
120
+ return NULL;
121
+ }
122
+ }
123
+
124
+ const ChartCellLabel *Find(size_t idx) const {
125
+ try {
126
+ return m_map.at(idx);
127
+ } catch (const std::out_of_range& oor) {
128
+ return NULL;
129
+ }
130
+ }
131
+
132
+ ChartCellLabel::Stack &FindOrInsert(const Word &w) {
133
+ size_t idx = w[0]->GetId();
134
+ if (! ChartCellExists(idx)) {
135
+ m_size++;
136
+ m_map[idx] = new ChartCellLabel(m_coverage, w);
137
+ }
138
+ return m_map[idx]->MutableStack();
139
+ }
140
+
141
+ private:
142
+ const Range &m_coverage;
143
+ MapType m_map;
144
+ size_t m_size;
145
+ };
146
+
147
+ }
mosesdecoder/moses/ChartHypothesisCollection.h ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+ #pragma once
22
+
23
+ #include <set>
24
+ #include "ChartHypothesis.h"
25
+ #include "RuleCube.h"
26
+
27
+
28
+ namespace Moses
29
+ {
30
+
31
+ class ChartSearchGraphWriter;
32
+ struct AllOptions;
33
+
34
+ //! functor to compare (chart) hypotheses by (descending) score
35
+ class ChartHypothesisScoreOrderer
36
+ {
37
+ public:
38
+ bool operator()(const ChartHypothesis* hypoA, const ChartHypothesis* hypoB) const {
39
+ return hypoA->GetFutureScore() > hypoB->GetFutureScore();
40
+ }
41
+ };
42
+
43
+ /** Contains a set of unique hypos that have the same HS non-term.
44
+ * ie. 1 of these for each target LHS in each cell
45
+ */
46
+ class ChartHypothesisCollection
47
+ {
48
+ friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&);
49
+
50
+ protected:
51
+ //typedef std::set<ChartHypothesis*, ChartHypothesisRecombinationOrderer> HCType;
52
+ typedef boost::unordered_set< ChartHypothesis*, UnorderedComparer<ChartHypothesis>, UnorderedComparer<ChartHypothesis> > HCType;
53
+ HCType m_hypos;
54
+ HypoList m_hyposOrdered;
55
+
56
+ float m_bestScore; /**< score of the best hypothesis in collection */
57
+ float m_beamWidth; /**< minimum score due to threashold pruning */
58
+ size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
59
+ bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
60
+
61
+ std::pair<HCType::iterator, bool> Add(ChartHypothesis *hypo, ChartManager &manager);
62
+
63
+ public:
64
+ typedef HCType::iterator iterator;
65
+ typedef HCType::const_iterator const_iterator;
66
+ //! iterators
67
+ const_iterator begin() const {
68
+ return m_hypos.begin();
69
+ }
70
+ const_iterator end() const {
71
+ return m_hypos.end();
72
+ }
73
+
74
+ ChartHypothesisCollection(AllOptions const& opts);
75
+ ~ChartHypothesisCollection();
76
+ bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);
77
+
78
+ void Detach(const HCType::iterator &iter);
79
+ void Remove(const HCType::iterator &iter);
80
+
81
+ void PruneToSize(ChartManager &manager);
82
+
83
+ size_t GetSize() const {
84
+ return m_hypos.size();
85
+ }
86
+ size_t GetHypo() const {
87
+ return m_hypos.size();
88
+ }
89
+
90
+ void SortHypotheses();
91
+ void CleanupArcList();
92
+
93
+ //! return vector of hypothesis that has been sorted by score
94
+ const HypoList &GetSortedHypotheses() const {
95
+ return m_hyposOrdered;
96
+ }
97
+
98
+ //! return the best total score of all hypos in this collection
99
+ float GetBestScore() const {
100
+ return m_bestScore;
101
+ }
102
+
103
+ void WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned,bool> &reachable) const;
104
+
105
+ };
106
+
107
+ } // namespace
108
+
mosesdecoder/moses/ChartManager.cpp ADDED
@@ -0,0 +1,867 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <cstdio>
23
+ #include "ChartManager.h"
24
+ #include "ChartCell.h"
25
+ #include "ChartHypothesis.h"
26
+ #include "ChartKBestExtractor.h"
27
+ #include "ChartTranslationOptions.h"
28
+ #include "HypergraphOutput.h"
29
+ #include "StaticData.h"
30
+ #include "DecodeStep.h"
31
+ #include "TreeInput.h"
32
+ #include "moses/FF/StatefulFeatureFunction.h"
33
+ #include "moses/FF/WordPenaltyProducer.h"
34
+ #include "moses/OutputCollector.h"
35
+ #include "moses/ChartKBestExtractor.h"
36
+ #include "moses/HypergraphOutput.h"
37
+ #include "moses/TranslationTask.h"
38
+
39
+ using namespace std;
40
+
41
+ namespace Moses
42
+ {
43
+
44
+ /* constructor. Initialize everything prior to decoding a particular sentence.
45
+ * \param source the sentence to be decoded
46
+ * \param system which particular set of models to use.
47
+ */
48
+ ChartManager::ChartManager(ttasksptr const& ttask)
49
+ : BaseManager(ttask)
50
+ , m_hypoStackColl(m_source, *this)
51
+ , m_start(clock())
52
+ , m_hypothesisId(0)
53
+ , m_parser(ttask, m_hypoStackColl)
54
+ , m_translationOptionList(ttask->options()->syntax.rule_limit, m_source)
55
+ { }
56
+
57
+ ChartManager::~ChartManager()
58
+ {
59
+ clock_t end = clock();
60
+ float et = (end - m_start);
61
+ et /= (float)CLOCKS_PER_SEC;
62
+ VERBOSE(1, "Translation took " << et << " seconds" << endl);
63
+
64
+ }
65
+
66
+ //! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
67
+ void ChartManager::Decode()
68
+ {
69
+
70
+ VERBOSE(1,"Translating: " << m_source << endl);
71
+
72
+ ResetSentenceStats(m_source);
73
+
74
+ VERBOSE(2,"Decoding: " << endl);
75
+ //ChartHypothesis::ResetHypoCount();
76
+
77
+ AddXmlChartOptions();
78
+
79
+ // MAIN LOOP
80
+ size_t size = m_source.GetSize();
81
+ for (int startPos = size-1; startPos >= 0; --startPos) {
82
+ for (size_t width = 1; width <= size-startPos; ++width) {
83
+ size_t endPos = startPos + width - 1;
84
+ Range range(startPos, endPos);
85
+
86
+ // create trans opt
87
+ m_translationOptionList.Clear();
88
+ m_parser.Create(range, m_translationOptionList);
89
+ m_translationOptionList.ApplyThreshold(options()->search.trans_opt_threshold);
90
+
91
+ const InputPath &inputPath = m_parser.GetInputPath(range);
92
+ m_translationOptionList.EvaluateWithSourceContext(m_source, inputPath);
93
+
94
+ // decode
95
+ ChartCell &cell = m_hypoStackColl.Get(range);
96
+ cell.Decode(m_translationOptionList, m_hypoStackColl);
97
+
98
+ m_translationOptionList.Clear();
99
+ cell.PruneToSize();
100
+ cell.CleanupArcList();
101
+ cell.SortHypotheses();
102
+ }
103
+ }
104
+
105
+ IFVERBOSE(1) {
106
+
107
+ for (size_t startPos = 0; startPos < size; ++startPos) {
108
+ cerr.width(3);
109
+ cerr << startPos << " ";
110
+ }
111
+ cerr << endl;
112
+ for (size_t width = 1; width <= size; width++) {
113
+ for( size_t space = 0; space < width-1; space++ ) {
114
+ cerr << " ";
115
+ }
116
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
117
+ Range range(startPos, startPos+width-1);
118
+ cerr.width(3);
119
+ cerr << m_hypoStackColl.Get(range).GetSize() << " ";
120
+ }
121
+ cerr << endl;
122
+ }
123
+ }
124
+ }
125
+
126
+ /** add specific translation options and hypotheses according to the XML override translation scheme.
127
+ * Doesn't seem to do anything about walls and zones.
128
+ * @todo check walls & zones. Check that the implementation doesn't leak, xml options sometimes does if you're not careful
129
+ */
130
+ void ChartManager::AddXmlChartOptions()
131
+ {
132
+ const std::vector <ChartTranslationOptions*> xmlChartOptionsList
133
+ = m_source.GetXmlChartTranslationOptions();
134
+ IFVERBOSE(2) {
135
+ cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl;
136
+ }
137
+ if (xmlChartOptionsList.size() == 0) return;
138
+
139
+ typedef std::vector<ChartTranslationOptions*>::const_iterator citer;
140
+ for(citer i = xmlChartOptionsList.begin(); i != xmlChartOptionsList.end(); ++i) {
141
+ ChartTranslationOptions* opt = *i;
142
+
143
+ const Range &range = opt->GetSourceWordsRange();
144
+
145
+ RuleCubeItem* item = new RuleCubeItem( *opt, m_hypoStackColl );
146
+ ChartHypothesis* hypo = new ChartHypothesis(*opt, *item, *this);
147
+ hypo->EvaluateWhenApplied();
148
+
149
+
150
+ ChartCell &cell = m_hypoStackColl.Get(range);
151
+ cell.AddHypothesis(hypo);
152
+ }
153
+ }
154
+
155
+ //! get best complete translation from the top chart cell.
156
+ const ChartHypothesis *ChartManager::GetBestHypothesis() const
157
+ {
158
+ size_t size = m_source.GetSize();
159
+
160
+ if (size == 0) // empty source
161
+ return NULL;
162
+ else {
163
+ Range range(0, size-1);
164
+ const ChartCell &lastCell = m_hypoStackColl.Get(range);
165
+ return lastCell.GetBestHypothesis();
166
+ }
167
+ }
168
+
169
+ /** Calculate the n-best paths through the output hypergraph.
170
+ * Return the list of paths with the variable ret
171
+ * \param n how may paths to return
172
+ * \param ret return argument
173
+ * \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
174
+ */
175
+ void ChartManager::CalcNBest(
176
+ std::size_t n,
177
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > &nBestList,
178
+ bool onlyDistinct) const
179
+ {
180
+ nBestList.clear();
181
+ if (n == 0 || m_source.GetSize() == 0) {
182
+ return;
183
+ }
184
+
185
+ // Get the list of top-level hypotheses, sorted by score.
186
+ Range range(0, m_source.GetSize()-1);
187
+ const ChartCell &lastCell = m_hypoStackColl.Get(range);
188
+ boost::scoped_ptr<const std::vector<const ChartHypothesis*> > topLevelHypos(
189
+ lastCell.GetAllSortedHypotheses());
190
+ if (!topLevelHypos) {
191
+ return;
192
+ }
193
+
194
+ ChartKBestExtractor extractor;
195
+
196
+ if (!onlyDistinct) {
197
+ // Return the n-best list as is, including duplicate translations.
198
+ extractor.Extract(*topLevelHypos, n, nBestList);
199
+ return;
200
+ }
201
+
202
+ // Determine how many derivations to extract. If the n-best list is
203
+ // restricted to distinct translations then this limit should be bigger
204
+ // than n. The n-best factor determines how much bigger the limit should be,
205
+ // with 0 being 'unlimited.' This actually sets a large-ish limit in case
206
+ // too many translations are identical.
207
+ const std::size_t nBestFactor = options()->nbest.factor;
208
+ std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
209
+
210
+ // Extract the derivations.
211
+ ChartKBestExtractor::KBestVec bigList;
212
+ bigList.reserve(numDerivations);
213
+ extractor.Extract(*topLevelHypos, numDerivations, bigList);
214
+
215
+ // Copy derivations into nBestList, skipping ones with repeated translations.
216
+ std::set<Phrase> distinct;
217
+ for (ChartKBestExtractor::KBestVec::const_iterator p = bigList.begin();
218
+ nBestList.size() < n && p != bigList.end(); ++p) {
219
+ boost::shared_ptr<ChartKBestExtractor::Derivation> derivation = *p;
220
+ Phrase translation = ChartKBestExtractor::GetOutputPhrase(*derivation);
221
+ if (distinct.insert(translation).second) {
222
+ nBestList.push_back(derivation);
223
+ }
224
+ }
225
+ }
226
+
227
+ void ChartManager::WriteSearchGraph(const ChartSearchGraphWriter& writer) const
228
+ {
229
+
230
+ size_t size = m_source.GetSize();
231
+
232
+ // which hypotheses are reachable?
233
+ std::map<unsigned,bool> reachable;
234
+ Range fullRange(0, size-1);
235
+ const ChartCell &lastCell = m_hypoStackColl.Get(fullRange);
236
+ const ChartHypothesis *hypo = lastCell.GetBestHypothesis();
237
+
238
+ if (hypo == NULL) {
239
+ // no hypothesis
240
+ return;
241
+ }
242
+ size_t winners = 0;
243
+ size_t losers = 0;
244
+
245
+ FindReachableHypotheses( hypo, reachable, &winners, &losers);
246
+ writer.WriteHeader(winners, losers);
247
+
248
+ for (size_t width = 1; width <= size; ++width) {
249
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
250
+ size_t endPos = startPos + width - 1;
251
+ Range range(startPos, endPos);
252
+ TRACE_ERR(" " << range << "=");
253
+
254
+ const ChartCell &cell = m_hypoStackColl.Get(range);
255
+ cell.WriteSearchGraph(writer, reachable);
256
+ }
257
+ }
258
+ }
259
+
260
+ void ChartManager::FindReachableHypotheses(
261
+ const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable, size_t* winners, size_t* losers) const
262
+ {
263
+ // do not recurse, if already visited
264
+ if (reachable.find(hypo->GetId()) != reachable.end()) {
265
+ return;
266
+ }
267
+
268
+ // recurse
269
+ reachable[ hypo->GetId() ] = true;
270
+ if (hypo->GetWinningHypothesis() == hypo) {
271
+ (*winners)++;
272
+ } else {
273
+ (*losers)++;
274
+ }
275
+ const std::vector<const ChartHypothesis*> &previous = hypo->GetPrevHypos();
276
+ for(std::vector<const ChartHypothesis*>::const_iterator i = previous.begin(); i != previous.end(); ++i) {
277
+ FindReachableHypotheses( *i, reachable, winners, losers );
278
+ }
279
+
280
+ // also loop over recombined hypotheses (arcs)
281
+ const ChartArcList *arcList = hypo->GetArcList();
282
+ if (arcList) {
283
+ ChartArcList::const_iterator iterArc;
284
+ for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
285
+ const ChartHypothesis &arc = **iterArc;
286
+ FindReachableHypotheses( &arc, reachable, winners, losers );
287
+ }
288
+ }
289
+ }
290
+
291
+ void
292
+ ChartManager::
293
+ OutputSearchGraphAsHypergraph(std::ostream& out) const
294
+ {
295
+ ChartSearchGraphWriterHypergraph writer(options(), &out);
296
+ WriteSearchGraph(writer);
297
+ }
298
+
299
+ void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) const
300
+ {
301
+ ChartSearchGraphWriterMoses writer(options(), &outputSearchGraphStream,
302
+ m_source.GetTranslationId());
303
+ WriteSearchGraph(writer);
304
+ }
305
+
306
+ void ChartManager::OutputBest(OutputCollector *collector) const
307
+ {
308
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
309
+ if (collector && bestHypo) {
310
+ const size_t translationId = m_source.GetTranslationId();
311
+ const ChartHypothesis *bestHypo = GetBestHypothesis();
312
+ OutputBestHypo(collector, bestHypo, translationId);
313
+ }
314
+ }
315
+
316
+ void ChartManager::OutputNBest(OutputCollector *collector) const
317
+ {
318
+ size_t nBestSize = options()->nbest.nbest_size;
319
+ if (nBestSize > 0) {
320
+ const size_t translationId = m_source.GetTranslationId();
321
+
322
+ VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
323
+ << options()->nbest.output_file_path << endl);
324
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
325
+ CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
326
+ OutputNBestList(collector, nBestList, translationId);
327
+ IFVERBOSE(2) {
328
+ PrintUserTime("N-Best Hypotheses Generation Time:");
329
+ }
330
+ }
331
+
332
+ }
333
+
334
+ void ChartManager::OutputNBestList(OutputCollector *collector,
335
+ const ChartKBestExtractor::KBestVec &nBestList,
336
+ long translationId) const
337
+ {
338
+ std::ostringstream out;
339
+
340
+ if (collector->OutputIsCout()) {
341
+ // Set precision only if we're writing the n-best list to cout. This is to
342
+ // preserve existing behaviour, but should probably be done either way.
343
+ FixPrecision(out);
344
+ }
345
+
346
+ NBestOptions const& nbo = options()->nbest;
347
+ bool includeWordAlignment = nbo.include_alignment_info;
348
+ bool PrintNBestTrees = nbo.print_trees;
349
+
350
+ for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
351
+ p != nBestList.end(); ++p) {
352
+ const ChartKBestExtractor::Derivation &derivation = **p;
353
+
354
+ // get the derivation's target-side yield
355
+ Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
356
+
357
+ // delete <s> and </s>
358
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
359
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
360
+ outputPhrase.RemoveWord(0);
361
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
362
+
363
+ // print the translation ID, surface factors, and scores
364
+ out << translationId << " ||| ";
365
+ OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
366
+ out << " ||| ";
367
+ boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
368
+ bool with_labels = options()->nbest.include_feature_labels;
369
+ scoreBreakdown->OutputAllFeatureScores(out, with_labels);
370
+ out << " ||| " << derivation.score;
371
+
372
+ // optionally, print word alignments
373
+ if (includeWordAlignment) {
374
+ out << " ||| ";
375
+ Alignments align;
376
+ OutputAlignmentNBest(align, derivation, 0);
377
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
378
+ ++q) {
379
+ out << q->first << "-" << q->second << " ";
380
+ }
381
+ }
382
+
383
+ // optionally, print tree
384
+ if (PrintNBestTrees) {
385
+ TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
386
+ out << " ||| " << tree->GetString();
387
+ }
388
+
389
+ out << std::endl;
390
+ }
391
+
392
+ assert(collector);
393
+ collector->Write(translationId, out.str());
394
+ }
395
+
396
+ size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
397
+ {
398
+ size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
399
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
400
+ for (size_t i = 0; i < prevHypos.size(); ++i) {
401
+ size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
402
+ ret -= (childSize - 1);
403
+ }
404
+ return ret;
405
+ }
406
+
407
+ size_t ChartManager::OutputAlignmentNBest(
408
+ Alignments &retAlign,
409
+ const Moses::ChartKBestExtractor::Derivation &derivation,
410
+ size_t startTarget) const
411
+ {
412
+ const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
413
+
414
+ size_t totalTargetSize = 0;
415
+ size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
416
+
417
+ const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
418
+
419
+ size_t thisSourceSize = CalcSourceSize(&hypo);
420
+
421
+ // position of each terminal word in translation rule, irrespective of alignment
422
+ // if non-term, number is undefined
423
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
424
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
425
+
426
+ const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
427
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
428
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
429
+
430
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
431
+ "Error");
432
+
433
+ size_t targetInd = 0;
434
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
435
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
436
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
437
+ size_t sourceInd = targetPos2SourceInd[targetPos];
438
+ size_t sourcePos = sourceInd2pos[sourceInd];
439
+
440
+ const Moses::ChartKBestExtractor::Derivation &subderivation =
441
+ *derivation.subderivations[sourceInd];
442
+
443
+ // calc source size
444
+ size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
445
+ sourceOffsets[sourcePos] = sourceSize;
446
+
447
+ // calc target size.
448
+ // Recursively look thru child hypos
449
+ size_t currStartTarget = startTarget + totalTargetSize;
450
+ size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
451
+ currStartTarget);
452
+ targetOffsets[targetPos] = targetSize;
453
+
454
+ totalTargetSize += targetSize;
455
+ ++targetInd;
456
+ } else {
457
+ ++totalTargetSize;
458
+ }
459
+ }
460
+
461
+ // convert position within translation rule to absolute position within
462
+ // source sentence / output sentence
463
+ ShiftOffsets(sourceOffsets, startSource);
464
+ ShiftOffsets(targetOffsets, startTarget);
465
+
466
+ // get alignments from this hypo
467
+ const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
468
+
469
+ // add to output arg, offsetting by source & target
470
+ AlignmentInfo::const_iterator iter;
471
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
472
+ const std::pair<size_t,size_t> &align = *iter;
473
+ size_t relSource = align.first;
474
+ size_t relTarget = align.second;
475
+ size_t absSource = sourceOffsets[relSource];
476
+ size_t absTarget = targetOffsets[relTarget];
477
+
478
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
479
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
480
+ UTIL_THROW_IF2(!ret.second, "Error");
481
+ }
482
+
483
+ return totalTargetSize;
484
+ }
485
+
486
+ void ChartManager::OutputAlignment(OutputCollector *collector) const
487
+ {
488
+ if (collector == NULL) {
489
+ return;
490
+ }
491
+
492
+ ostringstream out;
493
+
494
+ const ChartHypothesis *hypo = GetBestHypothesis();
495
+ if (hypo) {
496
+ Alignments retAlign;
497
+ OutputAlignment(retAlign, hypo, 0);
498
+
499
+ // output alignments
500
+ Alignments::const_iterator iter;
501
+ for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
502
+ const pair<size_t, size_t> &alignPoint = *iter;
503
+ out << alignPoint.first << "-" << alignPoint.second << " ";
504
+ }
505
+ }
506
+ out << endl;
507
+
508
+ collector->Write(m_source.GetTranslationId(), out.str());
509
+
510
+ }
511
+
512
+ size_t ChartManager::OutputAlignment(Alignments &retAlign,
513
+ const Moses::ChartHypothesis *hypo,
514
+ size_t startTarget) const
515
+ {
516
+ size_t totalTargetSize = 0;
517
+ size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
518
+
519
+ const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
520
+
521
+ size_t thisSourceSize = CalcSourceSize(hypo);
522
+
523
+ // position of each terminal word in translation rule, irrespective of alignment
524
+ // if non-term, number is undefined
525
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
526
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
527
+
528
+ const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
529
+
530
+ const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
531
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
532
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
533
+
534
+ UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
535
+
536
+ size_t targetInd = 0;
537
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
538
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
539
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
540
+ size_t sourceInd = targetPos2SourceInd[targetPos];
541
+ size_t sourcePos = sourceInd2pos[sourceInd];
542
+
543
+ const ChartHypothesis *prevHypo = prevHypos[sourceInd];
544
+
545
+ // calc source size
546
+ size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
547
+ sourceOffsets[sourcePos] = sourceSize;
548
+
549
+ // calc target size.
550
+ // Recursively look thru child hypos
551
+ size_t currStartTarget = startTarget + totalTargetSize;
552
+ size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
553
+ targetOffsets[targetPos] = targetSize;
554
+
555
+ totalTargetSize += targetSize;
556
+ ++targetInd;
557
+ } else {
558
+ ++totalTargetSize;
559
+ }
560
+ }
561
+
562
+ // convert position within translation rule to absolute position within
563
+ // source sentence / output sentence
564
+ ShiftOffsets(sourceOffsets, startSource);
565
+ ShiftOffsets(targetOffsets, startTarget);
566
+
567
+ // get alignments from this hypo
568
+ const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
569
+
570
+ // add to output arg, offsetting by source & target
571
+ AlignmentInfo::const_iterator iter;
572
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
573
+ const std::pair<size_t,size_t> &align = *iter;
574
+ size_t relSource = align.first;
575
+ size_t relTarget = align.second;
576
+ size_t absSource = sourceOffsets[relSource];
577
+ size_t absTarget = targetOffsets[relTarget];
578
+
579
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
580
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
581
+ UTIL_THROW_IF2(!ret.second, "Error");
582
+
583
+ }
584
+
585
+ return totalTargetSize;
586
+ }
587
+
588
+ void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
589
+ {
590
+ if (collector) {
591
+ OutputDetailedTranslationReport(collector,
592
+ GetBestHypothesis(),
593
+ static_cast<const Sentence&>(m_source),
594
+ m_source.GetTranslationId());
595
+ }
596
+ }
597
+
598
+ void ChartManager::OutputDetailedTranslationReport(
599
+ OutputCollector *collector,
600
+ const ChartHypothesis *hypo,
601
+ const Sentence &sentence,
602
+ long translationId) const
603
+ {
604
+ if (hypo == NULL) {
605
+ return;
606
+ }
607
+ std::ostringstream out;
608
+ ApplicationContext applicationContext;
609
+
610
+ OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
611
+ collector->Write(translationId, out.str());
612
+
613
+ //DIMw
614
+ if (options()->output.detailed_all_transrep_filepath.size()) {
615
+ const Sentence &sentence = static_cast<const Sentence &>(m_source);
616
+ size_t nBestSize = options()->nbest.nbest_size;
617
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
618
+ CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct);
619
+ OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
620
+ }
621
+
622
+ }
623
+
624
+ void ChartManager::OutputTranslationOptions(std::ostream &out,
625
+ ApplicationContext &applicationContext,
626
+ const ChartHypothesis *hypo,
627
+ const Sentence &sentence,
628
+ long translationId) const
629
+ {
630
+ if (hypo != NULL) {
631
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
632
+ out << std::endl;
633
+ }
634
+
635
+ // recursive
636
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
637
+ std::vector<const ChartHypothesis*>::const_iterator iter;
638
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
639
+ const ChartHypothesis *prevHypo = *iter;
640
+ OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
641
+ }
642
+ }
643
+
644
+ void ChartManager::OutputTranslationOption(std::ostream &out,
645
+ ApplicationContext &applicationContext,
646
+ const ChartHypothesis *hypo,
647
+ const Sentence &sentence,
648
+ long translationId) const
649
+ {
650
+ ReconstructApplicationContext(*hypo, sentence, applicationContext);
651
+ out << "Trans Opt " << translationId
652
+ << " " << hypo->GetCurrSourceRange()
653
+ << ": ";
654
+ WriteApplicationContext(out, applicationContext);
655
+ out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
656
+ << "->" << hypo->GetCurrTargetPhrase()
657
+ << " " << hypo->GetFutureScore() << hypo->GetScoreBreakdown();
658
+ }
659
+
660
+ // Given a hypothesis and sentence, reconstructs the 'application context' --
661
+ // the source RHS symbols of the SCFG rule that was applied, plus their spans.
662
+ void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
663
+ const Sentence &sentence,
664
+ ApplicationContext &context) const
665
+ {
666
+ context.clear();
667
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
668
+ std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
669
+ std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
670
+ const Range &span = hypo.GetCurrSourceRange();
671
+ size_t i = span.GetStartPos();
672
+ while (i <= span.GetEndPos()) {
673
+ if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
674
+ // Symbol is a terminal.
675
+ const Word &symbol = sentence.GetWord(i);
676
+ context.push_back(std::make_pair(symbol, Range(i, i)));
677
+ ++i;
678
+ } else {
679
+ // Symbol is a non-terminal.
680
+ const Word &symbol = (*p)->GetTargetLHS();
681
+ const Range &range = (*p)->GetCurrSourceRange();
682
+ context.push_back(std::make_pair(symbol, range));
683
+ i = range.GetEndPos()+1;
684
+ ++p;
685
+ }
686
+ }
687
+ }
688
+
689
+ void ChartManager::OutputUnknowns(OutputCollector *collector) const
690
+ {
691
+ if (collector) {
692
+ long translationId = m_source.GetTranslationId();
693
+ const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
694
+
695
+ std::ostringstream out;
696
+ for (std::vector<Phrase*>::const_iterator p = oovs.begin();
697
+ p != oovs.end(); ++p) {
698
+ out << **p;
699
+ }
700
+ out << std::endl;
701
+ collector->Write(translationId, out.str());
702
+ }
703
+
704
+ }
705
+
706
+ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
707
+ {
708
+ const ChartHypothesis *hypo = GetBestHypothesis();
709
+ if (collector == NULL || hypo == NULL) {
710
+ return;
711
+ }
712
+
713
+ std::ostringstream out;
714
+ ApplicationContext applicationContext;
715
+
716
+ const Sentence &sentence = static_cast<const Sentence &>(m_source);
717
+ const size_t translationId = m_source.GetTranslationId();
718
+
719
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
720
+
721
+ //Tree of full sentence
722
+ const StatefulFeatureFunction* treeStructure;
723
+ treeStructure = StaticData::Instance().GetTreeStructure();
724
+ if (treeStructure != NULL) {
725
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
726
+ for( size_t i=0; i<sff.size(); i++ ) {
727
+ if (sff[i] == treeStructure) {
728
+ const TreeState* tree = static_cast<const TreeState*>(hypo->GetFFState(i));
729
+ out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
730
+ break;
731
+ }
732
+ }
733
+ }
734
+
735
+ collector->Write(translationId, out.str());
736
+
737
+ }
738
+
739
+ void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
740
+ ApplicationContext &applicationContext,
741
+ const ChartHypothesis *hypo,
742
+ const Sentence &sentence,
743
+ long translationId) const
744
+ {
745
+
746
+ if (hypo != NULL) {
747
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
748
+
749
+ const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
750
+
751
+ out << " ||| ";
752
+ if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
753
+ out << " " << *property->GetValueString();
754
+ } else {
755
+ out << " " << "noTreeInfo";
756
+ }
757
+ out << std::endl;
758
+ }
759
+
760
+ // recursive
761
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
762
+ std::vector<const ChartHypothesis*>::const_iterator iter;
763
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
764
+ const ChartHypothesis *prevHypo = *iter;
765
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
766
+ }
767
+ }
768
+
769
+ void ChartManager::OutputSearchGraph(OutputCollector *collector) const
770
+ {
771
+ if (collector) {
772
+ long translationId = m_source.GetTranslationId();
773
+ std::ostringstream out;
774
+ OutputSearchGraphMoses( out);
775
+ collector->Write(translationId, out.str());
776
+ }
777
+ }
778
+
779
+ //DIMw
780
+ void ChartManager::OutputDetailedAllTranslationReport(
781
+ OutputCollector *collector,
782
+ const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
783
+ const Sentence &sentence,
784
+ long translationId) const
785
+ {
786
+ std::ostringstream out;
787
+ ApplicationContext applicationContext;
788
+
789
+ const ChartCellCollection& cells = GetChartCellCollection();
790
+ size_t size = GetSource().GetSize();
791
+ for (size_t width = 1; width <= size; ++width) {
792
+ for (size_t startPos = 0; startPos <= size-width; ++startPos) {
793
+ size_t endPos = startPos + width - 1;
794
+ Range range(startPos, endPos);
795
+ const ChartCell& cell = cells.Get(range);
796
+ const HypoList* hyps = cell.GetAllSortedHypotheses();
797
+ out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
798
+ HypoList::const_iterator iter;
799
+ size_t c = 1;
800
+ for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
801
+ out << "----------------Item " << c++ << " ---------------------"
802
+ << endl;
803
+ OutputTranslationOptions(out, applicationContext, *iter,
804
+ sentence, translationId);
805
+ }
806
+ }
807
+ }
808
+ collector->Write(translationId, out.str());
809
+ }
810
+
811
+ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
812
+ {
813
+ if (!collector)
814
+ return;
815
+ std::ostringstream out;
816
+ FixPrecision(out);
817
+ if (hypo != NULL) {
818
+ VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
819
+ VERBOSE(3,"Best path: ");
820
+ Backtrack(hypo);
821
+ VERBOSE(3,"0" << std::endl);
822
+
823
+ if (options()->output.ReportHypoScore) {
824
+ out << hypo->GetFutureScore() << " ";
825
+ }
826
+
827
+ if (options()->output.RecoverPath) {
828
+ out << "||| ";
829
+ }
830
+ Phrase outPhrase(ARRAY_SIZE_INCR);
831
+ hypo->GetOutputPhrase(outPhrase);
832
+
833
+ // delete 1st & last
834
+ UTIL_THROW_IF2(outPhrase.GetSize() < 2,
835
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
836
+
837
+ outPhrase.RemoveWord(0);
838
+ outPhrase.RemoveWord(outPhrase.GetSize() - 1);
839
+
840
+ string output = outPhrase.GetStringRep(options()->output.factor_order);
841
+ out << output << endl;
842
+ } else {
843
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
844
+
845
+ if (options()->output.ReportHypoScore) {
846
+ out << "0 ";
847
+ }
848
+
849
+ out << endl;
850
+ }
851
+ collector->Write(translationId, out.str());
852
+ }
853
+
854
+ void ChartManager::Backtrack(const ChartHypothesis *hypo) const
855
+ {
856
+ const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
857
+
858
+ vector<const ChartHypothesis*>::const_iterator iter;
859
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
860
+ const ChartHypothesis *prevHypo = *iter;
861
+
862
+ VERBOSE(3,prevHypo->GetId() << " <= ");
863
+ Backtrack(prevHypo);
864
+ }
865
+ }
866
+
867
+ } // namespace Moses
mosesdecoder/moses/ChartParser.h ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ // $Id$
3
+ // vim:tabstop=2
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (C) 2010 Hieu Hoang
7
+
8
+ This library is free software; you can redistribute it and/or
9
+ modify it under the terms of the GNU Lesser General Public
10
+ License as published by the Free Software Foundation; either
11
+ version 2.1 of the License, or (at your option) any later version.
12
+
13
+ This library is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public
19
+ License along with this library; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ ***********************************************************************/
22
+
23
+ #pragma once
24
+
25
+ #include <list>
26
+ #include <vector>
27
+ #include "Range.h"
28
+ #include "StackVec.h"
29
+ #include "InputPath.h"
30
+ #include "TargetPhraseCollection.h"
31
+ namespace Moses
32
+ {
33
+
34
+ class ChartParserCallback;
35
+ class ChartRuleLookupManager;
36
+ class InputType;
37
+ class Sentence;
38
+ class ChartCellCollectionBase;
39
+ class Word;
40
+ class Phrase;
41
+ // class TargetPhraseCollection;
42
+ class DecodeGraph;
43
+
44
+ class ChartParserUnknown
45
+ {
46
+ ttaskwptr m_ttask;
47
+ public:
48
+ ChartParserUnknown(ttasksptr const& ttask);
49
+ ~ChartParserUnknown();
50
+
51
+ void Process(const Word &sourceWord, const Range &range, ChartParserCallback &to);
52
+
53
+ const std::vector<Phrase*> &GetUnknownSources() const {
54
+ return m_unksrcs;
55
+ }
56
+
57
+ private:
58
+ std::vector<Phrase*> m_unksrcs;
59
+ std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection;
60
+ AllOptions::ptr const& options() const;
61
+ };
62
+
63
+ class ChartParser
64
+ {
65
+ ttaskwptr m_ttask;
66
+ public:
67
+ ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells);
68
+ ~ChartParser();
69
+
70
+ void Create(const Range &range, ChartParserCallback &to);
71
+
72
+ //! the sentence being decoded
73
+ //const Sentence &GetSentence() const;
74
+ long GetTranslationId() const;
75
+ size_t GetSize() const;
76
+ const InputPath &GetInputPath(size_t startPos, size_t endPos) const;
77
+ const InputPath &GetInputPath(const Range &range) const;
78
+ const std::vector<Phrase*> &GetUnknownSources() const {
79
+ return m_unknown.GetUnknownSources();
80
+ }
81
+
82
+ AllOptions::ptr const& options() const;
83
+
84
+ private:
85
+ ChartParserUnknown m_unknown;
86
+ std::vector <DecodeGraph*> m_decodeGraphList;
87
+ std::vector<ChartRuleLookupManager*> m_ruleLookupManagers;
88
+ InputType const& m_source; /**< source sentence to be translated */
89
+
90
+ typedef std::vector< std::vector<InputPath*> > InputPathMatrix;
91
+ InputPathMatrix m_inputPathMatrix;
92
+
93
+ void CreateInputPaths(const InputType &input);
94
+ InputPath &GetInputPath(size_t startPos, size_t endPos);
95
+
96
+ };
97
+
98
+ }
99
+
mosesdecoder/moses/ChartTranslationOptionList.h ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - factored phrase-based language decoder
3
+ Copyright (C) 2006 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include "ChartTranslationOptions.h"
23
+ #include "ChartParserCallback.h"
24
+ #include "StackVec.h"
25
+
26
+ #include <vector>
27
+
28
+ namespace Moses
29
+ {
30
+
31
+ class TargetPhraseCollection;
32
+ class Range;
33
+ class InputType;
34
+ class InputPath;
35
+ class ChartCellLabel;
36
+
37
+ //! a vector of translations options for a specific range, in a specific sentence
38
+ class ChartTranslationOptionList : public ChartParserCallback
39
+ {
40
+ friend std::ostream& operator<<(std::ostream&, const ChartTranslationOptionList&);
41
+
42
+ public:
43
+ ChartTranslationOptionList(size_t ruleLimit, const InputType &input);
44
+ ~ChartTranslationOptionList();
45
+
46
+ const ChartTranslationOptions &Get(size_t i) const {
47
+ return *m_collection[i];
48
+ }
49
+
50
+ //! number of translation options
51
+ size_t GetSize() const {
52
+ return m_size;
53
+ }
54
+
55
+ void Add(const TargetPhraseCollection &, const StackVec &,
56
+ const Range &);
57
+
58
+ void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection::shared_ptr > &waste_memory, const Range &range);
59
+
60
+ bool Empty() const {
61
+ return m_size == 0;
62
+ }
63
+
64
+ float GetBestScore(const ChartCellLabel *chartCell) const;
65
+
66
+ void Clear();
67
+ void ApplyThreshold(float threshold);
68
+ void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath);
69
+
70
+ private:
71
+ typedef std::vector<ChartTranslationOptions*> CollType;
72
+
73
+ struct ScoreThresholdPred {
74
+ ScoreThresholdPred(float threshold) : m_thresholdScore(threshold) {}
75
+ bool operator()(const ChartTranslationOptions *option) {
76
+ return option->GetEstimateOfBestScore() >= m_thresholdScore;
77
+ }
78
+ float m_thresholdScore;
79
+ };
80
+
81
+ void SwapTranslationOptions(size_t a, size_t b);
82
+
83
+ CollType m_collection;
84
+ size_t m_size;
85
+ float m_scoreThreshold;
86
+ const size_t m_ruleLimit;
87
+
88
+ };
89
+
90
+ }
mosesdecoder/moses/Factor.h ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #pragma once
23
+
24
+ #include <ostream>
25
+ #include <string>
26
+ #include "TypeDef.h"
27
+ #include "Util.h"
28
+ #include "util/string_piece.hh"
29
+
30
+ namespace Moses
31
+ {
32
+
33
+ struct FactorFriend;
34
+ class FactorCollection;
35
+
36
+ /** Represents a factor (word, POS, etc).
37
+ * A Factor has a contiguous identifier and string value.
38
+ */
39
+ class Factor
40
+ {
41
+ friend std::ostream& operator<<(std::ostream&, const Factor&);
42
+
43
+ // only these classes are allowed to instantiate this class
44
+ friend class FactorCollection;
45
+ friend struct FactorFriend;
46
+
47
+ // FactorCollection writes here.
48
+ // This is mutable so the pointer can be changed to pool-backed memory.
49
+ mutable StringPiece m_string;
50
+ size_t m_id;
51
+
52
+ //! protected constructor. only friend class, FactorCollection, is allowed to create Factor objects
53
+ Factor() {}
54
+
55
+ // Needed for STL containers. They'll delegate through FactorFriend, which is never exposed publicly.
56
+ Factor(const Factor &factor) : m_string(factor.m_string), m_id(factor.m_id) {}
57
+
58
+ // Not implemented. Shouldn't be called.
59
+ Factor &operator=(const Factor &factor);
60
+
61
+ public:
62
+ //! original string representation of the factor
63
+ StringPiece GetString() const {
64
+ return m_string;
65
+ }
66
+ //! contiguous ID
67
+ inline size_t GetId() const {
68
+ return m_id;
69
+ }
70
+
71
+ /** transitive comparison between 2 factors.
72
+ * -1 = less than
73
+ * +1 = more than
74
+ * 0 = same
75
+ */
76
+ inline int Compare(const Factor &compare) const {
77
+ if (this < &compare)
78
+ return -1;
79
+ if (this > &compare)
80
+ return 1;
81
+ return 0;
82
+ }
83
+ //! transitive comparison used for adding objects into FactorCollection
84
+ inline bool operator<(const Factor &compare) const {
85
+ return this < &compare;
86
+ }
87
+
88
+ // quick equality comparison. Not used
89
+ inline bool operator==(const Factor &compare) const {
90
+ return this == &compare;
91
+ }
92
+
93
+ TO_STRING();
94
+
95
+ };
96
+
97
+ size_t hash_value(const Factor &f);
98
+
99
+ }
100
+
mosesdecoder/moses/FactorCollection.cpp ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <boost/version.hpp>
23
+ #ifdef WITH_THREADS
24
+ #include <boost/thread/locks.hpp>
25
+ #endif
26
+ #include <ostream>
27
+ #include <string>
28
+ #include "FactorCollection.h"
29
+ #include "Util.h"
30
+ #include "util/pool.hh"
31
+
32
+ using namespace std;
33
+
34
+ namespace Moses
35
+ {
36
+ FactorCollection FactorCollection::s_instance;
37
+
38
+ const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool isNonTerminal)
39
+ {
40
+ FactorFriend to_ins;
41
+ to_ins.in.m_string = factorString;
42
+ to_ins.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
43
+ Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
44
+ // If we're threaded, hope a read-only lock is sufficient.
45
+ #ifdef WITH_THREADS
46
+ {
47
+ // read=lock scope
48
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
49
+ Set::const_iterator i = set.find(to_ins);
50
+ if (i != set.end()) return &i->in;
51
+ }
52
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
53
+ #endif // WITH_THREADS
54
+ std::pair<Set::iterator, bool> ret(set.insert(to_ins));
55
+ if (ret.second) {
56
+ ret.first->in.m_string.set(
57
+ memcpy(m_string_backing.Allocate(factorString.size()), factorString.data(), factorString.size()),
58
+ factorString.size());
59
+ if (isNonTerminal) {
60
+ m_factorIdNonTerminal++;
61
+ UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile");
62
+ } else {
63
+ m_factorId++;
64
+ }
65
+ }
66
+ return &ret.first->in;
67
+ }
68
+
69
+ const Factor *FactorCollection::GetFactor(const StringPiece &factorString, bool isNonTerminal)
70
+ {
71
+ FactorFriend to_find;
72
+ to_find.in.m_string = factorString;
73
+ to_find.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId;
74
+ Set & set = (isNonTerminal) ? m_set : m_setNonTerminal;
75
+ {
76
+ // read=lock scope
77
+ #ifdef WITH_THREADS
78
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
79
+ #endif // WITH_THREADS
80
+ Set::const_iterator i = set.find(to_find);
81
+ if (i != set.end()) return &i->in;
82
+ }
83
+ return NULL;
84
+ }
85
+
86
+
87
+ FactorCollection::~FactorCollection() {}
88
+
89
+ TO_STRING_BODY(FactorCollection);
90
+
91
+ // friend
92
+ ostream& operator<<(ostream& out, const FactorCollection& factorCollection)
93
+ {
94
+ #ifdef WITH_THREADS
95
+ boost::shared_lock<boost::shared_mutex> lock(factorCollection.m_accessLock);
96
+ #endif
97
+ for (FactorCollection::Set::const_iterator i = factorCollection.m_set.begin(); i != factorCollection.m_set.end(); ++i) {
98
+ out << i->in;
99
+ }
100
+ return out;
101
+ }
102
+
103
+ }
104
+
105
+
mosesdecoder/moses/FactorCollection.h ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_FactorCollection_h
23
+ #define moses_FactorCollection_h
24
+
25
+ // reserve space for non-terminal symbols (ensuring consecutive numbering, and allowing quick lookup by ID)
26
+ #ifndef moses_MaxNumNonterminals
27
+ #define moses_MaxNumNonterminals 10000
28
+ #endif
29
+
30
+ #ifdef WITH_THREADS
31
+ #include <boost/thread/shared_mutex.hpp>
32
+ #endif
33
+
34
+ #include "util/murmur_hash.hh"
35
+ #include <boost/unordered_set.hpp>
36
+
37
+ #include <functional>
38
+ #include <string>
39
+
40
+ #include "util/string_piece.hh"
41
+ #include "util/pool.hh"
42
+ #include "Factor.h"
43
+
44
+ class System;
45
+
46
+ namespace Moses
47
+ {
48
+
49
+ /** We don't want Factor to be copyable by anybody. But we also want to store
50
+ * it in an STL container. The solution is that Factor's copy constructor is
51
+ * private and friended to FactorFriend. The STL containers can delegate
52
+ * copying, so friending the container isn't sufficient. STL containers see
53
+ * FactorFriend's public copy constructor and everybody else sees Factor's
54
+ * private copy constructor.
55
+ */
56
+ struct FactorFriend {
57
+ Factor in;
58
+ };
59
+
60
+ /** collection of factors
61
+ *
62
+ * All Factors in moses are accessed and created by a FactorCollection.
63
+ * By enforcing this strict creation processes (ie, forbidding factors
64
+ * from being created on the stack, etc), their memory addresses can
65
+ * be used as keys to uniquely identify them.
66
+ * Only 1 FactorCollection object should be created.
67
+ */
68
+ class FactorCollection
69
+ {
70
+ friend std::ostream& operator<<(std::ostream&, const FactorCollection&);
71
+ friend class ::System;
72
+
73
+ struct HashFactor : public std::unary_function<const FactorFriend &, std::size_t> {
74
+ std::size_t operator()(const FactorFriend &factor) const {
75
+ return util::MurmurHashNative(factor.in.m_string.data(), factor.in.m_string.size());
76
+ }
77
+ };
78
+ struct EqualsFactor : public std::binary_function<const FactorFriend &, const FactorFriend &, bool> {
79
+ bool operator()(const FactorFriend &left, const FactorFriend &right) const {
80
+ return left.in.GetString() == right.in.GetString();
81
+ }
82
+ };
83
+ typedef boost::unordered_set<FactorFriend, HashFactor, EqualsFactor> Set;
84
+ Set m_set;
85
+ Set m_setNonTerminal;
86
+
87
+ util::Pool m_string_backing;
88
+
89
+ static FactorCollection s_instance;
90
+ #ifdef WITH_THREADS
91
+ //reader-writer lock
92
+ mutable boost::shared_mutex m_accessLock;
93
+ #endif
94
+
95
+ size_t m_factorIdNonTerminal; /**< unique, contiguous ids, starting from 0, for each non-terminal factor */
96
+ size_t m_factorId; /**< unique, contiguous ids, starting from moses_MaxNumNonterminals, for each terminal factor */
97
+
98
+ //! constructor. only the 1 static variable can be created
99
+ FactorCollection()
100
+ : m_factorIdNonTerminal(0)
101
+ , m_factorId(moses_MaxNumNonterminals) {
102
+ }
103
+
104
+ public:
105
+ static FactorCollection& Instance() {
106
+ return s_instance;
107
+ }
108
+
109
+ ~FactorCollection();
110
+
111
+ /** returns a factor with the same direction, factorType and factorString.
112
+ * If a factor already exist in the collection, return the existing factor, if not create a new 1
113
+ */
114
+ const Factor *AddFactor(const StringPiece &factorString, bool isNonTerminal = false);
115
+
116
+ size_t GetNumNonTerminals() {
117
+ return m_factorIdNonTerminal;
118
+ }
119
+
120
+ const Factor *GetFactor(const StringPiece &factorString, bool isNonTerminal = false);
121
+
122
+ // TODO: remove calls to this function, replacing them with the simpler AddFactor(factorString)
123
+ const Factor *AddFactor(FactorDirection /*direction*/, FactorType /*factorType*/, const StringPiece &factorString, bool isNonTerminal = false) {
124
+ return AddFactor(factorString, isNonTerminal);
125
+ }
126
+
127
+ TO_STRING();
128
+
129
+ };
130
+
131
+ }
132
+ #endif
mosesdecoder/moses/FactorTypeSet.cpp ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "FactorTypeSet.h"
23
+
24
+ using namespace std;
25
+
26
+ namespace Moses
27
+ {
28
+ FactorMask::FactorMask(const vector<FactorType> &factors)
29
+ {
30
+ vector<FactorType>::const_iterator iter;
31
+ for (iter = factors.begin() ; iter != factors.end() ; ++iter) {
32
+ this->set(*iter);
33
+ }
34
+ }
35
+
36
+ bool FactorMask::IsUseable(const FactorMask &other) const
37
+ {
38
+ for (size_t i = 0; i < other.size(); ++i) {
39
+ if (other[i]) {
40
+ if (!this->operator[](i) ) {
41
+ return false;
42
+ }
43
+ }
44
+ }
45
+
46
+ return true;
47
+ }
48
+
49
+ TO_STRING_BODY(FactorMask);
50
+
51
+ // friend
52
+ std::ostream& operator<<(std::ostream& out, const FactorMask& fm)
53
+ {
54
+ out << "FactorMask<";
55
+ bool first = true;
56
+ for (size_t currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
57
+ if (fm[currFactor]) {
58
+ if (first) {
59
+ first = false;
60
+ } else {
61
+ out << ",";
62
+ }
63
+ out << currFactor;
64
+ }
65
+ }
66
+ out << ">";
67
+
68
+ return out;
69
+ }
70
+
71
+ }
72
+
73
+
mosesdecoder/moses/FactorTypeSet.h ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_FactorTypeSet_h
23
+ #define moses_FactorTypeSet_h
24
+
25
+ #include <iostream>
26
+ #include <bitset>
27
+ #include <vector>
28
+ #include "TypeDef.h"
29
+ #include "Util.h"
30
+
31
+ namespace Moses
32
+ {
33
+
34
+ /** set of unique FactorTypes. Used to store what factor types are used in phrase tables etc
35
+ */
36
+ class FactorMask : public std::bitset<MAX_NUM_FACTORS>
37
+ {
38
+ friend std::ostream& operator<<(std::ostream&, const FactorMask&);
39
+
40
+ public:
41
+ //! construct object from list of FactorType.
42
+ explicit FactorMask(const std::vector<FactorType> &factors);
43
+ //! default constructor
44
+ inline FactorMask() {}
45
+ //! copy constructor
46
+ FactorMask(const std::bitset<MAX_NUM_FACTORS>& rhs) : std::bitset<MAX_NUM_FACTORS>(rhs) { }
47
+
48
+ bool IsUseable(const FactorMask &other) const;
49
+
50
+ TO_STRING();
51
+ };
52
+
53
+ }
54
+ #endif
mosesdecoder/moses/FilePtr.h ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /* ---------------------------------------------------------------- */
4
+ /* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
5
+ /* Richard Zens */
6
+ /* ---------------------------------------------------------------- */
7
+
8
+ #ifndef moses_FilePtr_h
9
+ #define moses_FilePtr_h
10
+
11
+ #include "File.h"
12
+
13
+ namespace Moses
14
+ {
15
+
16
+ /** smart pointer for on-demand loading from file
17
+ * requirement: T has a constructor T(FILE*)
18
+ */
19
+ template<typename T> class FilePtr
20
+ {
21
+ public:
22
+ typedef T* Ptr;
23
+ private:
24
+ FILE* f;
25
+ OFF_T pos;
26
+ mutable Ptr t;
27
+ public:
28
+ FilePtr(FILE* f_=0,OFF_T p=0) : f(f_),pos(p),t(0) {}
29
+ ~FilePtr() {}
30
+
31
+ void set(FILE* f_,OFF_T p) {
32
+ f=f_;
33
+ pos=p;
34
+ }
35
+ void free() {
36
+ delete t;
37
+ t=0;
38
+ }
39
+
40
+ T& operator* () {
41
+ load();
42
+ return *t;
43
+ }
44
+ Ptr operator->() {
45
+ load();
46
+ return t;
47
+ }
48
+ operator Ptr () {
49
+ load();
50
+ return t;
51
+ }
52
+
53
+ const T& operator* () const {
54
+ load();
55
+ return *t;
56
+ }
57
+ Ptr operator->() const {
58
+ load();
59
+ return t;
60
+ }
61
+ operator Ptr () const {
62
+ load();
63
+ return t;
64
+ }
65
+
66
+ // direct access to pointer, use with care!
67
+ Ptr getPtr() {
68
+ return t;
69
+ }
70
+ Ptr getPtr() const {
71
+ return t;
72
+ }
73
+
74
+ operator bool() const {
75
+ return (f && pos!=InvalidOffT);
76
+ }
77
+
78
+ void load() const {
79
+ if(t) return;
80
+ if(f && pos!=InvalidOffT) {
81
+ fSeek(f,pos);
82
+ t=new T(f);
83
+ }
84
+ }
85
+ };
86
+
87
+ }
88
+
89
+ #endif
mosesdecoder/moses/HypergraphOutput.h ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (C) 2014- University of Edinburgh
7
+
8
+ This library is free software; you can redistribute it and/or
9
+ modify it under the terms of the GNU Lesser General Public
10
+ License as published by the Free Software Foundation; either
11
+ version 2.1 of the License, or (at your option) any later version.
12
+
13
+ This library is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ Lesser General Public License for more details.
17
+
18
+ You should have received a copy of the GNU Lesser General Public
19
+ License along with this library; if not, write to the Free Software
20
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ ***********************************************************************/
22
+
23
+ #ifndef moses_Hypergraph_Output_h
24
+ #define moses_Hypergraph_Output_h
25
+
26
+ #include <ostream>
27
+ #include "moses/parameters/AllOptions.h"
28
+
29
+ /**
30
+ * Manage the output of hypergraphs.
31
+ **/
32
+
33
+ namespace Moses
34
+ {
35
+
36
+ class ChartHypothesisCollection;
37
+
38
+ template<class M>
39
+ class HypergraphOutput
40
+ {
41
+
42
+ public:
43
+ /** Initialise output directory and create weights file */
44
+ HypergraphOutput(size_t precision);
45
+
46
+ /** Write this hypergraph to file */
47
+ void Write(const M& manager) const;
48
+
49
+ private:
50
+ size_t m_precision;
51
+ std::string m_hypergraphDir;
52
+ std::string m_compression;
53
+ bool m_appendSuffix;
54
+ };
55
+
56
+
57
+ /**
58
+ * ABC for different types of search graph output for chart Moses.
59
+ **/
60
+ class ChartSearchGraphWriter
61
+ {
62
+ protected:
63
+ AllOptions::ptr m_options;
64
+ ChartSearchGraphWriter(AllOptions::ptr const& opts) : m_options(opts) { }
65
+ public:
66
+ virtual void WriteHeader(size_t winners, size_t losers) const = 0;
67
+ virtual void WriteHypos(const ChartHypothesisCollection& hypos,
68
+ const std::map<unsigned, bool> &reachable) const = 0;
69
+
70
+ };
71
+
72
+ /** "Moses" format (osg style) */
73
+ class ChartSearchGraphWriterMoses : public virtual ChartSearchGraphWriter
74
+ {
75
+ public:
76
+ ChartSearchGraphWriterMoses(AllOptions::ptr const& opts,
77
+ std::ostream* out, size_t lineNumber)
78
+ : ChartSearchGraphWriter(opts), m_out(out), m_lineNumber(lineNumber) {}
79
+ virtual void WriteHeader(size_t, size_t) const {
80
+ /* do nothing */
81
+ }
82
+ virtual void WriteHypos(const ChartHypothesisCollection& hypos,
83
+ const std::map<unsigned, bool> &reachable) const;
84
+
85
+ private:
86
+ std::ostream* m_out;
87
+ size_t m_lineNumber;
88
+ };
89
+
90
+ /** Modified version of Kenneth's lazy hypergraph format */
91
+ class ChartSearchGraphWriterHypergraph : public virtual ChartSearchGraphWriter
92
+ {
93
+ public:
94
+ ChartSearchGraphWriterHypergraph(AllOptions::ptr const& opts, std::ostream* out)
95
+ : ChartSearchGraphWriter(opts), m_out(out), m_nodeId(0) { }
96
+ virtual void WriteHeader(size_t winners, size_t losers) const;
97
+ virtual void WriteHypos(const ChartHypothesisCollection& hypos,
98
+ const std::map<unsigned, bool> &reachable) const;
99
+
100
+ private:
101
+ std::ostream* m_out;
102
+ mutable size_t m_nodeId;
103
+ mutable std::map<size_t,size_t> m_hypoIdToNodeId;
104
+ };
105
+
106
+ }
107
+ #endif
mosesdecoder/moses/HypothesisStack.cpp ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #include "HypothesisStack.h"
3
+
4
+ namespace Moses
5
+ {
6
+ HypothesisStack::~HypothesisStack()
7
+ {
8
+ // delete all hypos
9
+ while (m_hypos.begin() != m_hypos.end()) {
10
+ Remove(m_hypos.begin());
11
+ }
12
+ }
13
+
14
+ /** Remove hypothesis pointed to by iterator but don't delete the object. */
15
+ void HypothesisStack::Detach(const HypothesisStack::iterator &iter)
16
+ {
17
+ m_hypos.erase(iter);
18
+ }
19
+
20
+
21
+ void HypothesisStack::Remove(const HypothesisStack::iterator &iter)
22
+ {
23
+ Hypothesis *h = *iter;
24
+ Detach(iter);
25
+ delete h;
26
+ }
27
+
28
+
29
+ }
30
+
mosesdecoder/moses/HypothesisStackNormal.cpp ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include <algorithm>
23
+ #include <set>
24
+ #include <queue>
25
+ #include "HypothesisStackNormal.h"
26
+ #include "TypeDef.h"
27
+ #include "Util.h"
28
+ #include "Manager.h"
29
+ #include "util/exception.hh"
30
+
31
+ using namespace std;
32
+
33
+ namespace Moses
34
+ {
35
+ HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
36
+ HypothesisStack(manager)
37
+ {
38
+ m_nBestIsEnabled = manager.options()->nbest.enabled;
39
+ m_bestScore = -std::numeric_limits<float>::infinity();
40
+ m_worstScore = -std::numeric_limits<float>::infinity();
41
+ }
42
+
43
+ /** remove all hypotheses from the collection */
44
+ void HypothesisStackNormal::RemoveAll()
45
+ {
46
+ while (m_hypos.begin() != m_hypos.end()) {
47
+ Remove(m_hypos.begin());
48
+ }
49
+ }
50
+
51
+ pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesis *hypo)
52
+ {
53
+ std::pair<iterator, bool> ret = m_hypos.insert(hypo);
54
+ if (ret.second) {
55
+ // equiv hypo doesn't exists
56
+ VERBOSE(3,"added hyp to stack");
57
+
58
+ // Update best score, if this hypothesis is new best
59
+ if (hypo->GetFutureScore() > m_bestScore) {
60
+ VERBOSE(3,", best on stack");
61
+ m_bestScore = hypo->GetFutureScore();
62
+ // this may also affect the worst score
63
+ if ( m_bestScore + m_beamWidth > m_worstScore )
64
+ m_worstScore = m_bestScore + m_beamWidth;
65
+ }
66
+ // update best/worst score for stack diversity 1
67
+ if ( m_minHypoStackDiversity == 1 &&
68
+ hypo->GetFutureScore() > GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) {
69
+ SetWorstScoreForBitmap( hypo->GetWordsBitmap().GetID(), hypo->GetFutureScore() );
70
+ }
71
+
72
+ VERBOSE(3,", now size " << m_hypos.size());
73
+
74
+ // prune only if stack is twice as big as needed (lazy pruning)
75
+ size_t toleratedSize = 2*m_maxHypoStackSize-1;
76
+ // add in room for stack diversity
77
+ if (m_minHypoStackDiversity) {
78
+ // so what happens if maxdistortion is negative?
79
+ toleratedSize += m_minHypoStackDiversity
80
+ << m_manager.options()->reordering.max_distortion;
81
+ }
82
+
83
+ if (m_hypos.size() > toleratedSize) {
84
+ PruneToSize(m_maxHypoStackSize);
85
+ } else {
86
+ VERBOSE(3,std::endl);
87
+ }
88
+ }
89
+
90
+ return ret;
91
+ }
92
+
93
+ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
94
+ {
95
+ if (hypo->GetFutureScore() == - std::numeric_limits<float>::infinity()) {
96
+ m_manager.GetSentenceStats().AddDiscarded();
97
+ VERBOSE(3,"discarded, constraint" << std::endl);
98
+ delete hypo;
99
+ return false;
100
+ }
101
+
102
+ // too bad for stack. don't bother adding hypo into collection
103
+ if (m_manager.options()->search.disable_discarding == false
104
+ && hypo->GetFutureScore() < m_worstScore
105
+ && ! ( m_minHypoStackDiversity > 0
106
+ && hypo->GetFutureScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) ) {
107
+ m_manager.GetSentenceStats().AddDiscarded();
108
+ VERBOSE(3,"discarded, too bad for stack" << std::endl);
109
+ delete hypo;
110
+ return false;
111
+ }
112
+
113
+ // over threshold, try to add to collection
114
+ std::pair<iterator, bool> addRet = Add(hypo);
115
+ if (addRet.second) {
116
+ // nothing found. add to collection
117
+ return true;
118
+ }
119
+
120
+ // equiv hypo exists, recombine with other hypo
121
+ iterator &iterExisting = addRet.first;
122
+ Hypothesis *hypoExisting = *iterExisting;
123
+ assert(iterExisting != m_hypos.end());
124
+
125
+ m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
126
+
127
+ // found existing hypo with same target ending.
128
+ // keep the best 1
129
+ if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
130
+ // incoming hypo is better than the one we have
131
+ VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
132
+ if (m_nBestIsEnabled) {
133
+ hypo->AddArc(hypoExisting);
134
+ Detach(iterExisting);
135
+ } else {
136
+ Remove(iterExisting);
137
+ }
138
+
139
+ bool added = Add(hypo).second;
140
+ if (!added) {
141
+ iterExisting = m_hypos.find(hypo);
142
+ UTIL_THROW2("Offending hypo = " << **iterExisting);
143
+ }
144
+ return false;
145
+ } else {
146
+ // already storing the best hypo. discard current hypo
147
+ VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
148
+ if (m_nBestIsEnabled) {
149
+ hypoExisting->AddArc(hypo);
150
+ } else {
151
+ delete hypo;
152
+ }
153
+ return false;
154
+ }
155
+ }
156
+
157
+ void HypothesisStackNormal::PruneToSize(size_t newSize)
158
+ {
159
+ if ( newSize == 0) return; // no limit
160
+ if ( size() <= newSize ) return; // ok, if not over the limit
161
+
162
+ // we need to store a temporary list of hypotheses
163
+ vector< Hypothesis* > hypos = GetSortedListNOTCONST();
164
+ bool* included = (bool*) malloc(sizeof(bool) * hypos.size());
165
+ for(size_t i=0; i<hypos.size(); i++) included[i] = false;
166
+
167
+ // clear out original set
168
+ for( iterator iter = m_hypos.begin(); iter != m_hypos.end(); ) {
169
+ iterator removeHyp = iter++;
170
+ Detach(removeHyp);
171
+ }
172
+
173
+ // add best hyps for each coverage according to minStackDiversity
174
+ if ( m_minHypoStackDiversity > 0 ) {
175
+ map< WordsBitmapID, size_t > diversityCount;
176
+ for(size_t i=0; i<hypos.size(); i++) {
177
+ Hypothesis *hyp = hypos[i];
178
+ WordsBitmapID coverage = hyp->GetWordsBitmap().GetID();;
179
+ if (diversityCount.find( coverage ) == diversityCount.end())
180
+ diversityCount[ coverage ] = 0;
181
+
182
+ if (diversityCount[ coverage ] < m_minHypoStackDiversity) {
183
+ m_hypos.insert( hyp );
184
+ included[i] = true;
185
+ diversityCount[ coverage ]++;
186
+ if (diversityCount[ coverage ] == m_minHypoStackDiversity)
187
+ SetWorstScoreForBitmap( coverage, hyp->GetFutureScore());
188
+ }
189
+ }
190
+ }
191
+
192
+ // only add more if stack not full after satisfying minStackDiversity
193
+ if ( size() < newSize ) {
194
+
195
+ // add best remaining hypotheses
196
+ for(size_t i=0; i<hypos.size()
197
+ && size() < newSize
198
+ && hypos[i]->GetFutureScore() > m_bestScore+m_beamWidth; i++) {
199
+ if (! included[i]) {
200
+ m_hypos.insert( hypos[i] );
201
+ included[i] = true;
202
+ if (size() == newSize)
203
+ m_worstScore = hypos[i]->GetFutureScore();
204
+ }
205
+ }
206
+ }
207
+
208
+ // delete hypotheses that have not been included
209
+ for(size_t i=0; i<hypos.size(); i++) {
210
+ if (! included[i]) {
211
+ delete hypos[i];
212
+ m_manager.GetSentenceStats().AddPruning();
213
+ }
214
+ }
215
+ free(included);
216
+
217
+ // some reporting....
218
+ VERBOSE(3,", pruned to size " << size() << endl);
219
+ IFVERBOSE(3) {
220
+ TRACE_ERR("stack now contains: ");
221
+ for(iterator iter = m_hypos.begin(); iter != m_hypos.end(); iter++) {
222
+ Hypothesis *hypo = *iter;
223
+ TRACE_ERR( hypo->GetId() << " (" << hypo->GetFutureScore() << ") ");
224
+ }
225
+ TRACE_ERR( endl);
226
+ }
227
+ }
228
+
229
+ const Hypothesis *HypothesisStackNormal::GetBestHypothesis() const
230
+ {
231
+ if (!m_hypos.empty()) {
232
+ const_iterator iter = m_hypos.begin();
233
+ Hypothesis *bestHypo = *iter;
234
+ while (++iter != m_hypos.end()) {
235
+ Hypothesis *hypo = *iter;
236
+ if (hypo->GetFutureScore() > bestHypo->GetFutureScore())
237
+ bestHypo = hypo;
238
+ }
239
+ return bestHypo;
240
+ }
241
+ return NULL;
242
+ }
243
+
244
+ vector<const Hypothesis*> HypothesisStackNormal::GetSortedList() const
245
+ {
246
+ vector<const Hypothesis*> ret;
247
+ ret.reserve(m_hypos.size());
248
+ std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
249
+ sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
250
+
251
+ return ret;
252
+ }
253
+
254
+ vector<Hypothesis*> HypothesisStackNormal::GetSortedListNOTCONST()
255
+ {
256
+ vector<Hypothesis*> ret;
257
+ ret.reserve(m_hypos.size());
258
+ std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
259
+ sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
260
+
261
+ return ret;
262
+ }
263
+
264
+ void HypothesisStackNormal::CleanupArcList()
265
+ {
266
+ // only necessary if n-best calculations are enabled
267
+ if (!m_nBestIsEnabled) return;
268
+
269
+ iterator iter;
270
+ for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
271
+ Hypothesis *mainHypo = *iter;
272
+ mainHypo->CleanupArcList(this->m_manager.options()->nbest.nbest_size, this->m_manager.options()->NBestDistinct());
273
+ }
274
+ }
275
+
276
+ TO_STRING_BODY(HypothesisStackNormal);
277
+
278
+
279
+ // friend
280
+ std::ostream& operator<<(std::ostream& out, const HypothesisStackNormal& hypoColl)
281
+ {
282
+ HypothesisStackNormal::const_iterator iter;
283
+
284
+ for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter) {
285
+ const Hypothesis &hypo = **iter;
286
+ out << hypo << endl;
287
+
288
+ }
289
+ return out;
290
+ }
291
+
292
+
293
+ }
294
+
mosesdecoder/moses/IOWrapper.cpp ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (c) 2006 University of Edinburgh
6
+ All rights reserved.
7
+
8
+ Redistribution and use in source and binary forms, with or without modification,
9
+ are permitted provided that the following conditions are met:
10
+
11
+ * Redistributions of source code must retain the above copyright notice,
12
+ this list of conditions and the following disclaimer.
13
+ * Redistributions in binary form must reproduce the above copyright notice,
14
+ this list of conditions and the following disclaimer in the documentation
15
+ and/or other materials provided with the distribution.
16
+ * Neither the name of the University of Edinburgh nor the names of its contributors
17
+ may be used to endorse or promote products derived from this software
18
+ without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
24
+ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
28
+ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ POSSIBILITY OF SUCH DAMAGE.
31
+ ***********************************************************************/
32
+
33
+ #include <iostream>
34
+ #include <stack>
35
+ #include <boost/algorithm/string.hpp>
36
+ #include <boost/foreach.hpp>
37
+
38
+ #include "moses/Syntax/KBestExtractor.h"
39
+ #include "moses/Syntax/PVertex.h"
40
+ #include "moses/Syntax/S2T/DerivationWriter.h"
41
+
42
+ #include "moses/Hypothesis.h"
43
+ #include "moses/TrellisPathList.h"
44
+ #include "moses/StaticData.h"
45
+ #include "moses/InputFileStream.h"
46
+ #include "moses/FF/StatefulFeatureFunction.h"
47
+ #include "moses/TreeInput.h"
48
+ #include "moses/ForestInput.h"
49
+ #include "moses/ConfusionNet.h"
50
+ #include "moses/WordLattice.h"
51
+ #include "moses/ChartManager.h"
52
+
53
+ #include "IOWrapper.h"
54
+
55
+ #include <boost/filesystem.hpp>
56
+ #include <boost/iostreams/device/file.hpp>
57
+ #include <boost/iostreams/filter/bzip2.hpp>
58
+ #include <boost/iostreams/filter/gzip.hpp>
59
+ #include <boost/iostreams/filtering_stream.hpp>
60
+
61
+ using namespace std;
62
+
63
+ namespace Moses
64
+ {
65
+
66
+ IOWrapper::IOWrapper(AllOptions const& opts)
67
+ : m_options(new AllOptions(opts))
68
+ , m_nBestStream(NULL)
69
+ , m_surpressSingleBestOutput(false)
70
+ , m_look_ahead(0)
71
+ , m_look_back(0)
72
+ , m_buffered_ahead(0)
73
+ , spe_src(NULL)
74
+ , spe_trg(NULL)
75
+ , spe_aln(NULL)
76
+ {
77
+ const StaticData &staticData = StaticData::Instance();
78
+ Parameter const& P = staticData.GetParameter();
79
+
80
+ // context buffering for context-sensitive decoding
81
+ m_look_ahead = m_options->context.look_ahead;
82
+ m_look_back = m_options->context.look_back;
83
+ m_inputType = m_options->input.input_type;
84
+
85
+ UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
86
+ "Context-sensitive decoding currently works only with sentence input.");
87
+
88
+ m_currentLine = m_options->output.start_translation_id;
89
+ m_inputFactorOrder = &m_options->input.factor_order;
90
+
91
+ size_t nBestSize = m_options->nbest.nbest_size;
92
+ string nBestFilePath = m_options->nbest.output_file_path;
93
+
94
+ staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
95
+ if (m_inputFilePath.empty()) {
96
+ m_inputFile = NULL;
97
+ m_inputStream = &cin;
98
+ } else {
99
+ VERBOSE(2,"IO from File" << endl);
100
+ m_inputFile = new InputFileStream(m_inputFilePath);
101
+ m_inputStream = m_inputFile;
102
+ }
103
+
104
+ if (nBestSize > 0) {
105
+ m_nBestOutputCollector.reset(new Moses::OutputCollector(nBestFilePath));
106
+ if (m_nBestOutputCollector->OutputIsCout()) {
107
+ m_surpressSingleBestOutput = true;
108
+ }
109
+ }
110
+
111
+ std::string path;
112
+ P.SetParameter<std::string>(path, "output-search-graph-extended", "");
113
+ if (!path.size()) P.SetParameter<std::string>(path, "output-search-graph", "");
114
+ if (path.size()) m_searchGraphOutputCollector.reset(new OutputCollector(path));
115
+
116
+ P.SetParameter<std::string>(path, "output-unknowns", "");
117
+ if (path.size()) m_unknownsCollector.reset(new OutputCollector(path));
118
+
119
+ P.SetParameter<std::string>(path, "alignment-output-file", "");
120
+ if (path.size()) m_alignmentInfoCollector.reset(new OutputCollector(path));
121
+
122
+ P.SetParameter<string>(path, "translation-details", "");
123
+ if (path.size()) m_detailedTranslationCollector.reset(new OutputCollector(path));
124
+
125
+ P.SetParameter<string>(path, "tree-translation-details", "");
126
+ if (path.size()) m_detailTreeFragmentsOutputCollector.reset(new OutputCollector(path));
127
+
128
+ P.SetParameter<string>(path, "output-word-graph", "");
129
+ if (path.size()) m_wordGraphCollector.reset(new OutputCollector(path));
130
+
131
+ size_t latticeSamplesSize = staticData.options()->output.lattice_sample_size;
132
+ string latticeSamplesFile = staticData.options()->output.lattice_sample_filepath;
133
+ if (latticeSamplesSize) {
134
+ m_latticeSamplesCollector.reset(new OutputCollector(latticeSamplesFile));
135
+ if (m_latticeSamplesCollector->OutputIsCout()) {
136
+ m_surpressSingleBestOutput = true;
137
+ }
138
+ }
139
+
140
+ if (!m_surpressSingleBestOutput) {
141
+ m_singleBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
142
+ }
143
+
144
+ // setup file pattern for hypergraph output
145
+ char const* key = "output-search-graph-hypergraph";
146
+ PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
147
+ std::string& fmt = m_hypergraph_output_filepattern;
148
+ // first, determine the output directory
149
+ if (p && p->size() > 2) fmt = p->at(2);
150
+ else if (nBestFilePath.size() && nBestFilePath != "-" &&
151
+ ! boost::starts_with(nBestFilePath, "/dev/stdout")) {
152
+ fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
153
+ if (fmt.empty()) fmt = ".";
154
+ } else fmt = boost::filesystem::current_path().string() + "/hypergraph";
155
+ if (*fmt.rbegin() != '/') fmt += "/";
156
+ std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt"));
157
+ UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2",
158
+ "Unknown compression type '" << extension
159
+ << "' for hypergraph output!");
160
+ fmt += string("%d.") + extension;
161
+
162
+ // input streams for simulated post-editing
163
+ if (staticData.GetParameter().GetParam("spe-src")) {
164
+ spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
165
+ spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
166
+ spe_aln = new ifstream(staticData.GetParameter().GetParam("spe-aln")->at(0).c_str());
167
+ }
168
+ }
169
+
170
+ IOWrapper::~IOWrapper()
171
+ {
172
+ if (m_inputFile != NULL)
173
+ delete m_inputFile;
174
+ // if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
175
+ // outputting n-best to file, rather than stdout. need to close file and delete obj
176
+ // delete m_nBestStream;
177
+ // }
178
+
179
+ // delete m_detailedTranslationReportingStream;
180
+ // delete m_alignmentInfoStream;
181
+ // delete m_unknownsStream;
182
+ // delete m_outputSearchGraphStream;
183
+ // delete m_outputWordGraphStream;
184
+ // delete m_latticeSamplesStream;
185
+ }
186
+
187
+ // InputType*
188
+ // IOWrapper::
189
+ // GetInput(InputType* inputType)
190
+ // {
191
+ // if(inputType->Read(*m_inputStream, *m_inputFactorOrder)) {
192
+ // return inputType;
193
+ // } else {
194
+ // delete inputType;
195
+ // return NULL;
196
+ // }
197
+ // }
198
+
199
+ boost::shared_ptr<InputType>
200
+ IOWrapper::
201
+ GetBufferedInput()
202
+ {
203
+ switch(m_inputType) {
204
+ case SentenceInput:
205
+ return BufferInput<Sentence>();
206
+ case ConfusionNetworkInput:
207
+ return BufferInput<ConfusionNet>();
208
+ case WordLatticeInput:
209
+ return BufferInput<WordLattice>();
210
+ case TreeInputType:
211
+ return BufferInput<TreeInput>();
212
+ case TabbedSentenceInput:
213
+ return BufferInput<TabbedSentence>();
214
+ case ForestInputType:
215
+ return BufferInput<ForestInput>();
216
+ default:
217
+ TRACE_ERR("Unknown input type: " << m_inputType << "\n");
218
+ return boost::shared_ptr<InputType>();
219
+ }
220
+
221
+ }
222
+
223
+ boost::shared_ptr<InputType>
224
+ IOWrapper::
225
+ ReadInput(boost::shared_ptr<std::vector<std::string> >* cw)
226
+ {
227
+ #ifdef WITH_THREADS
228
+ boost::lock_guard<boost::mutex> lock(m_lock);
229
+ #endif
230
+ boost::shared_ptr<InputType> source = GetBufferedInput();
231
+ if (source) {
232
+ source->SetTranslationId(m_currentLine++);
233
+
234
+ // when using a sliding context window, remove obsolete past input from buffer:
235
+ if (m_past_input.size() && m_look_back != std::numeric_limits<size_t>::max()) {
236
+ list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
237
+ for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();)
238
+ cnt += (*m)->GetSize();
239
+ while (m_past_input.begin() != m) m_past_input.pop_front();
240
+ }
241
+
242
+ if (m_look_back)
243
+ m_past_input.push_back(source);
244
+ }
245
+ if (cw) *cw = GetCurrentContextWindow();
246
+ return source;
247
+ }
248
+
249
+ boost::shared_ptr<std::vector<std::string> >
250
+ IOWrapper::
251
+ GetCurrentContextWindow() const
252
+ {
253
+ boost::shared_ptr<std::vector<string> > context(new std::vector<string>);
254
+ BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_past_input)
255
+ context->push_back(i->ToString());
256
+ BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_future_input)
257
+ context->push_back(i->ToString());
258
+ return context;
259
+ }
260
+
261
+
262
+
263
+ std::string
264
+ IOWrapper::
265
+ GetHypergraphOutputFileName(size_t const id) const
266
+ {
267
+ return str(boost::format(m_hypergraph_output_filepattern) % id);
268
+ }
269
+
270
+
271
+ } // namespace
272
+
mosesdecoder/moses/InputFileStream.h ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_InputFileStream_h
23
+ #define moses_InputFileStream_h
24
+
25
+ #include <cstdlib>
26
+ #include <fstream>
27
+ #include <string>
28
+
29
+ namespace Moses
30
+ {
31
+
32
+ /** Used in place of std::istream, can read zipped files if it ends in .gz
33
+ */
34
+ class InputFileStream : public std::istream
35
+ {
36
+ protected:
37
+ std::streambuf *m_streambuf;
38
+ public:
39
+
40
+ explicit InputFileStream(const std::string &filePath);
41
+ ~InputFileStream();
42
+
43
+ void Close();
44
+ };
45
+
46
+ }
47
+
48
+ #endif
mosesdecoder/moses/LatticeMBR.cpp ADDED
@@ -0,0 +1,680 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * LatticeMBR.cpp
3
+ * moses-cmd
4
+ *
5
+ * Created by Abhishek Arun on 26/01/2010.
6
+ * Copyright 2010 __MyCompanyName__. All rights reserved.
7
+ *
8
+ */
9
+
10
+ #include "LatticeMBR.h"
11
+ #include "moses/StaticData.h"
12
+ #include <algorithm>
13
+ #include <set>
14
+
15
+ using namespace std;
16
+
17
+ namespace Moses
18
+ {
19
+
20
+ size_t bleu_order = 4;
21
+ float UNKNGRAMLOGPROB = -20;
22
+ void GetOutputWords(const TrellisPath &path, vector <Word> &translation)
23
+ {
24
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
25
+
26
+ // print the surface factor of the translation
27
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
28
+ const Hypothesis &edge = *edges[currEdge];
29
+ const Phrase &phrase = edge.GetCurrTargetPhrase();
30
+ size_t size = phrase.GetSize();
31
+ for (size_t pos = 0 ; pos < size ; pos++) {
32
+ translation.push_back(phrase.GetWord(pos));
33
+ }
34
+ }
35
+ }
36
+
37
+
38
+ void extract_ngrams(const vector<Word >& sentence, map < Phrase, int > & allngrams)
39
+ {
40
+ for (int k = 0; k < (int)bleu_order; k++) {
41
+ for(int i =0; i < max((int)sentence.size()-k,0); i++) {
42
+ Phrase ngram( k+1);
43
+ for ( int j = i; j<= i+k; j++) {
44
+ ngram.AddWord(sentence[j]);
45
+ }
46
+ ++allngrams[ngram];
47
+ }
48
+ }
49
+ }
50
+
51
+
52
+
53
+ void NgramScores::addScore(const Hypothesis* node, const Phrase& ngram, float score)
54
+ {
55
+ set<Phrase>::const_iterator ngramIter = m_ngrams.find(ngram);
56
+ if (ngramIter == m_ngrams.end()) {
57
+ ngramIter = m_ngrams.insert(ngram).first;
58
+ }
59
+ map<const Phrase*,float>& ngramScores = m_scores[node];
60
+ map<const Phrase*,float>::iterator scoreIter = ngramScores.find(&(*ngramIter));
61
+ if (scoreIter == ngramScores.end()) {
62
+ ngramScores[&(*ngramIter)] = score;
63
+ } else {
64
+ ngramScores[&(*ngramIter)] = log_sum(score,scoreIter->second);
65
+ }
66
+ }
67
+
68
+ NgramScores::NodeScoreIterator NgramScores::nodeBegin(const Hypothesis* node)
69
+ {
70
+ return m_scores[node].begin();
71
+ }
72
+
73
+
74
+ NgramScores::NodeScoreIterator NgramScores::nodeEnd(const Hypothesis* node)
75
+ {
76
+ return m_scores[node].end();
77
+ }
78
+
79
+ LatticeMBRSolution::LatticeMBRSolution(const TrellisPath& path, bool isMap) :
80
+ m_score(0.0f)
81
+ {
82
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
83
+
84
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
85
+ const Hypothesis &edge = *edges[currEdge];
86
+ const Phrase &phrase = edge.GetCurrTargetPhrase();
87
+ size_t size = phrase.GetSize();
88
+ for (size_t pos = 0 ; pos < size ; pos++) {
89
+ m_words.push_back(phrase.GetWord(pos));
90
+ }
91
+ }
92
+ if (isMap) {
93
+ m_mapScore = path.GetFutureScore();
94
+ } else {
95
+ m_mapScore = 0;
96
+ }
97
+ }
98
+
99
+
100
+ void LatticeMBRSolution::CalcScore(map<Phrase, float>& finalNgramScores, const vector<float>& thetas, float mapWeight)
101
+ {
102
+ m_ngramScores.assign(thetas.size()-1, -10000);
103
+
104
+ map < Phrase, int > counts;
105
+ extract_ngrams(m_words,counts);
106
+
107
+ //Now score this translation
108
+ m_score = thetas[0] * m_words.size();
109
+
110
+ //Calculate the ngramScores, working in log space at first
111
+ for (map < Phrase, int >::iterator ngrams = counts.begin(); ngrams != counts.end(); ++ngrams) {
112
+ float ngramPosterior = UNKNGRAMLOGPROB;
113
+ map<Phrase,float>::const_iterator ngramPosteriorIt = finalNgramScores.find(ngrams->first);
114
+ if (ngramPosteriorIt != finalNgramScores.end()) {
115
+ ngramPosterior = ngramPosteriorIt->second;
116
+ }
117
+ size_t ngramSize = ngrams->first.GetSize();
118
+ m_ngramScores[ngramSize-1] = log_sum(log((float)ngrams->second) + ngramPosterior,m_ngramScores[ngramSize-1]);
119
+ }
120
+
121
+ //convert from log to probability and create weighted sum
122
+ for (size_t i = 0; i < m_ngramScores.size(); ++i) {
123
+ m_ngramScores[i] = exp(m_ngramScores[i]);
124
+ m_score += thetas[i+1] * m_ngramScores[i];
125
+ }
126
+
127
+
128
+ //The map score
129
+ m_score += m_mapScore*mapWeight;
130
+ }
131
+
132
+
133
+ void pruneLatticeFB(Lattice & connectedHyp, map < const Hypothesis*, set <const Hypothesis* > > & outgoingHyps, map<const Hypothesis*, vector<Edge> >& incomingEdges,
134
+ const vector< float> & estimatedScores, const Hypothesis* bestHypo, size_t edgeDensity, float scale)
135
+ {
136
+
137
+ //Need hyp 0 in connectedHyp - Find empty hypothesis
138
+ VERBOSE(2,"Pruning lattice to edge density " << edgeDensity << endl);
139
+ const Hypothesis* emptyHyp = connectedHyp.at(0);
140
+ while (emptyHyp->GetId() != 0) {
141
+ emptyHyp = emptyHyp->GetPrevHypo();
142
+ }
143
+ connectedHyp.push_back(emptyHyp); //Add it to list of hyps
144
+
145
+ //Need hyp 0's outgoing Hyps
146
+ for (size_t i = 0; i < connectedHyp.size(); ++i) {
147
+ if (connectedHyp[i]->GetId() > 0 && connectedHyp[i]->GetPrevHypo()->GetId() == 0)
148
+ outgoingHyps[emptyHyp].insert(connectedHyp[i]);
149
+ }
150
+
151
+ //sort hyps based on estimated scores - do so by copying to multimap
152
+ multimap<float, const Hypothesis*> sortHypsByVal;
153
+ for (size_t i =0; i < estimatedScores.size(); ++i) {
154
+ sortHypsByVal.insert(make_pair(estimatedScores[i], connectedHyp[i]));
155
+ }
156
+
157
+ multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end();
158
+ float bestScore = it->first;
159
+ //store best score as score of hyp 0
160
+ sortHypsByVal.insert(make_pair(bestScore, emptyHyp));
161
+
162
+
163
+ IFVERBOSE(3) {
164
+ for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
165
+ const Hypothesis* currHyp = it->second;
166
+ cerr << "Hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl;
167
+ }
168
+ }
169
+
170
+
171
+ set <const Hypothesis*> survivingHyps; //store hyps that make the cut in this
172
+
173
+ VERBOSE(2, "BEST HYPO TARGET LENGTH : " << bestHypo->GetSize() << endl)
174
+ size_t numEdgesTotal = edgeDensity * bestHypo->GetSize(); //as per Shankar, aim for (density * target length of MAP solution) arcs
175
+ size_t numEdgesCreated = 0;
176
+ VERBOSE(2, "Target edge count: " << numEdgesTotal << endl);
177
+
178
+ float prevScore = -999999;
179
+
180
+ //now iterate over multimap
181
+ for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
182
+ float currEstimatedScore = it->first;
183
+ const Hypothesis* currHyp = it->second;
184
+
185
+ if (numEdgesCreated >= numEdgesTotal && prevScore > currEstimatedScore) //if this hyp has equal estimated score to previous, include its edges too
186
+ break;
187
+
188
+ prevScore = currEstimatedScore;
189
+ VERBOSE(3, "Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
190
+ VERBOSE(3, "Considering hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl)
191
+
192
+ survivingHyps.insert(currHyp); //CurrHyp made the cut
193
+
194
+ // is its best predecessor already included ?
195
+ if (survivingHyps.find(currHyp->GetPrevHypo()) != survivingHyps.end()) { //yes, then add an edge
196
+ vector <Edge>& edges = incomingEdges[currHyp];
197
+ Edge winningEdge(currHyp->GetPrevHypo(),currHyp,scale*(currHyp->GetScore() - currHyp->GetPrevHypo()->GetScore()),currHyp->GetCurrTargetPhrase());
198
+ edges.push_back(winningEdge);
199
+ ++numEdgesCreated;
200
+ }
201
+
202
+ //let's try the arcs too
203
+ const ArcList *arcList = currHyp->GetArcList();
204
+ if (arcList != NULL) {
205
+ ArcList::const_iterator iterArcList;
206
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
207
+ const Hypothesis *loserHypo = *iterArcList;
208
+ const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
209
+ if (survivingHyps.find(loserPrevHypo) != survivingHyps.end()) { //found it, add edge
210
+ double arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
211
+ Edge losingEdge(loserPrevHypo, currHyp, arcScore*scale, loserHypo->GetCurrTargetPhrase());
212
+ vector <Edge>& edges = incomingEdges[currHyp];
213
+ edges.push_back(losingEdge);
214
+ ++numEdgesCreated;
215
+ }
216
+ }
217
+ }
218
+
219
+ //Now if a successor node has already been visited, add an edge connecting the two
220
+ map < const Hypothesis*, set < const Hypothesis* > >::const_iterator outgoingIt = outgoingHyps.find(currHyp);
221
+
222
+ if (outgoingIt != outgoingHyps.end()) {//currHyp does have successors
223
+ const set<const Hypothesis*> & outHyps = outgoingIt->second; //the successors
224
+ for (set<const Hypothesis*>::const_iterator outHypIts = outHyps.begin(); outHypIts != outHyps.end(); ++outHypIts) {
225
+ const Hypothesis* succHyp = *outHypIts;
226
+
227
+ if (survivingHyps.find(succHyp) == survivingHyps.end()) //Have we encountered the successor yet?
228
+ continue; //No, move on to next
229
+
230
+ //Curr Hyp can be : a) the best predecessor of succ b) or an arc attached to succ
231
+ if (succHyp->GetPrevHypo() == currHyp) { //best predecessor
232
+ vector <Edge>& succEdges = incomingEdges[succHyp];
233
+ Edge succWinningEdge(currHyp, succHyp, scale*(succHyp->GetScore() - currHyp->GetScore()), succHyp->GetCurrTargetPhrase());
234
+ succEdges.push_back(succWinningEdge);
235
+ survivingHyps.insert(succHyp);
236
+ ++numEdgesCreated;
237
+ }
238
+
239
+ //now, let's find an arc
240
+ const ArcList *arcList = succHyp->GetArcList();
241
+ if (arcList != NULL) {
242
+ ArcList::const_iterator iterArcList;
243
+ //QUESTION: What happens if there's more than one loserPrevHypo?
244
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
245
+ const Hypothesis *loserHypo = *iterArcList;
246
+ const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
247
+ if (loserPrevHypo == currHyp) { //found it
248
+ vector <Edge>& succEdges = incomingEdges[succHyp];
249
+ double arcScore = loserHypo->GetScore() - currHyp->GetScore();
250
+ Edge losingEdge(currHyp, succHyp,scale* arcScore, loserHypo->GetCurrTargetPhrase());
251
+ succEdges.push_back(losingEdge);
252
+ ++numEdgesCreated;
253
+ }
254
+ }
255
+ }
256
+ }
257
+ }
258
+ }
259
+
260
+ connectedHyp.clear();
261
+ for (set <const Hypothesis*>::iterator it = survivingHyps.begin(); it != survivingHyps.end(); ++it) {
262
+ connectedHyp.push_back(*it);
263
+ }
264
+
265
+ VERBOSE(2, "Done! Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
266
+
267
+ IFVERBOSE(3) {
268
+ cerr << "Surviving hyps: " ;
269
+ for (set <const Hypothesis*>::iterator it = survivingHyps.begin(); it != survivingHyps.end(); ++it) {
270
+ cerr << (*it)->GetId() << " ";
271
+ }
272
+ cerr << endl;
273
+ }
274
+
275
+
276
+ }
277
+
278
+ void calcNgramExpectations(Lattice & connectedHyp, map<const Hypothesis*, vector<Edge> >& incomingEdges,
279
+ map<Phrase, float>& finalNgramScores, bool posteriors)
280
+ {
281
+
282
+ sort(connectedHyp.begin(),connectedHyp.end(),ascendingCoverageCmp); //sort by increasing source word cov
283
+
284
+ /*cerr << "Lattice:" << endl;
285
+ for (Lattice::const_iterator i = connectedHyp.begin(); i != connectedHyp.end(); ++i) {
286
+ const Hypothesis* h = *i;
287
+ cerr << *h << endl;
288
+ const vector<Edge>& edges = incomingEdges[h];
289
+ for (size_t e = 0; e < edges.size(); ++e) {
290
+ cerr << edges[e];
291
+ }
292
+ }*/
293
+
294
+ map<const Hypothesis*, float> forwardScore;
295
+ forwardScore[connectedHyp[0]] = 0.0f; //forward score of hyp 0 is 1 (or 0 in logprob space)
296
+ set< const Hypothesis *> finalHyps; //store completed hyps
297
+
298
+ NgramScores ngramScores;//ngram scores for each hyp
299
+
300
+ for (size_t i = 1; i < connectedHyp.size(); ++i) {
301
+ const Hypothesis* currHyp = connectedHyp[i];
302
+ if (currHyp->GetWordsBitmap().IsComplete()) {
303
+ finalHyps.insert(currHyp);
304
+ }
305
+
306
+ VERBOSE(3, "Processing hyp: " << currHyp->GetId() << ", num words cov= " << currHyp->GetWordsBitmap().GetNumWordsCovered() << endl)
307
+
308
+ vector <Edge> & edges = incomingEdges[currHyp];
309
+ for (size_t e = 0; e < edges.size(); ++e) {
310
+ const Edge& edge = edges[e];
311
+ if (forwardScore.find(currHyp) == forwardScore.end()) {
312
+ forwardScore[currHyp] = forwardScore[edge.GetTailNode()] + edge.GetScore();
313
+ VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] = fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
314
+ } else {
315
+ forwardScore[currHyp] = log_sum(forwardScore[currHyp], forwardScore[edge.GetTailNode()] + edge.GetScore());
316
+ VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] += fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
317
+ }
318
+ }
319
+
320
+ //Process ngrams now
321
+ for (size_t j =0 ; j < edges.size(); ++j) {
322
+ Edge& edge = edges[j];
323
+ const NgramHistory & incomingPhrases = edge.GetNgrams(incomingEdges);
324
+
325
+ //let's first score ngrams introduced by this edge
326
+ for (NgramHistory::const_iterator it = incomingPhrases.begin(); it != incomingPhrases.end(); ++it) {
327
+ const Phrase& ngram = it->first;
328
+ const PathCounts& pathCounts = it->second;
329
+ VERBOSE(4, "Calculating score for: " << it->first << endl)
330
+
331
+ for (PathCounts::const_iterator pathCountIt = pathCounts.begin(); pathCountIt != pathCounts.end(); ++pathCountIt) {
332
+ //Score of an n-gram is forward score of head node of leftmost edge + all edge scores
333
+ const Path& path = pathCountIt->first;
334
+ //cerr << "path count for " << ngram << " is " << pathCountIt->second << endl;
335
+ float score = forwardScore[path[0]->GetTailNode()];
336
+ for (size_t i = 0; i < path.size(); ++i) {
337
+ score += path[i]->GetScore();
338
+ }
339
+ //if we're doing expectations, then the number of times the ngram
340
+ //appears on the path is relevant.
341
+ size_t count = posteriors ? 1 : pathCountIt->second;
342
+ for (size_t k = 0; k < count; ++k) {
343
+ ngramScores.addScore(currHyp,ngram,score);
344
+ }
345
+ }
346
+ }
347
+
348
+ //Now score ngrams that are just being propagated from the history
349
+ for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(edge.GetTailNode());
350
+ it != ngramScores.nodeEnd(edge.GetTailNode()); ++it) {
351
+ const Phrase & currNgram = *(it->first);
352
+ float currNgramScore = it->second;
353
+ VERBOSE(4, "Calculating score for: " << currNgram << endl)
354
+
355
+ // For posteriors, don't double count ngrams
356
+ if (!posteriors || incomingPhrases.find(currNgram) == incomingPhrases.end()) {
357
+ float score = edge.GetScore() + currNgramScore;
358
+ ngramScores.addScore(currHyp,currNgram,score);
359
+ }
360
+ }
361
+
362
+ }
363
+ }
364
+
365
+ float Z = 9999999; //the total score of the lattice
366
+
367
+ //Done - Print out ngram posteriors for final hyps
368
+ for (set< const Hypothesis *>::iterator finalHyp = finalHyps.begin(); finalHyp != finalHyps.end(); ++finalHyp) {
369
+ const Hypothesis* hyp = *finalHyp;
370
+
371
+ for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(hyp); it != ngramScores.nodeEnd(hyp); ++it) {
372
+ const Phrase& ngram = *(it->first);
373
+ if (finalNgramScores.find(ngram) == finalNgramScores.end()) {
374
+ finalNgramScores[ngram] = it->second;
375
+ } else {
376
+ finalNgramScores[ngram] = log_sum(it->second, finalNgramScores[ngram]);
377
+ }
378
+ }
379
+
380
+ if (Z == 9999999) {
381
+ Z = forwardScore[hyp];
382
+ } else {
383
+ Z = log_sum(Z, forwardScore[hyp]);
384
+ }
385
+ }
386
+
387
+ //Z *= scale; //scale the score
388
+
389
+ for (map<Phrase, float>::iterator finalScoresIt = finalNgramScores.begin(); finalScoresIt != finalNgramScores.end(); ++finalScoresIt) {
390
+ finalScoresIt->second = finalScoresIt->second - Z;
391
+ IFVERBOSE(2) {
392
+ VERBOSE(2,finalScoresIt->first << " [" << finalScoresIt->second << "]" << endl);
393
+ }
394
+ }
395
+
396
+ }
397
+
398
+ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & incomingEdges)
399
+ {
400
+
401
+ if (m_ngrams.size() > 0)
402
+ return m_ngrams;
403
+
404
+ const Phrase& currPhrase = GetWords();
405
+ //Extract the n-grams local to this edge
406
+ for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
407
+ for (size_t end = start; end < start + bleu_order; ++end) {
408
+ if (end < currPhrase.GetSize()) {
409
+ Phrase edgeNgram(end-start+1);
410
+ for (size_t index = start; index <= end; ++index) {
411
+ edgeNgram.AddWord(currPhrase.GetWord(index));
412
+ }
413
+ //cout << "Inserting Phrase : " << edgeNgram << endl;
414
+ vector<const Edge*> edgeHistory;
415
+ edgeHistory.push_back(this);
416
+ storeNgramHistory(edgeNgram, edgeHistory);
417
+ } else {
418
+ break;
419
+ }
420
+ }
421
+ }
422
+
423
+ map<const Hypothesis*, vector<Edge> >::iterator it = incomingEdges.find(m_tailNode);
424
+ if (it != incomingEdges.end()) { //node has incoming edges
425
+ vector<Edge> & inEdges = it->second;
426
+
427
+ for (vector<Edge>::iterator edge = inEdges.begin(); edge != inEdges.end(); ++edge) {//add the ngrams straddling prev and curr edge
428
+ const NgramHistory & edgeIncomingNgrams = edge->GetNgrams(incomingEdges);
429
+ for (NgramHistory::const_iterator edgeInNgramHist = edgeIncomingNgrams.begin(); edgeInNgramHist != edgeIncomingNgrams.end(); ++edgeInNgramHist) {
430
+ const Phrase& edgeIncomingNgram = edgeInNgramHist->first;
431
+ const PathCounts & edgeIncomingNgramPaths = edgeInNgramHist->second;
432
+ size_t back = min(edgeIncomingNgram.GetSize(), edge->GetWordsSize());
433
+ const Phrase& edgeWords = edge->GetWords();
434
+ IFVERBOSE(3) {
435
+ cerr << "Edge: "<< *edge <<endl;
436
+ cerr << "edgeWords: " << edgeWords << endl;
437
+ cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
438
+ }
439
+
440
+ Phrase edgeSuffix(ARRAY_SIZE_INCR);
441
+ Phrase ngramSuffix(ARRAY_SIZE_INCR);
442
+ GetPhraseSuffix(edgeWords,back,edgeSuffix);
443
+ GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);
444
+
445
+ if (ngramSuffix == edgeSuffix) { //we've got the suffix of previous edge
446
+ size_t edgeInNgramSize = edgeIncomingNgram.GetSize();
447
+
448
+ for (size_t i = 0; i < GetWordsSize() && i + edgeInNgramSize < bleu_order ; ++i) {
449
+ Phrase newNgram(edgeIncomingNgram);
450
+ for (size_t j = 0; j <= i ; ++j) {
451
+ newNgram.AddWord(GetWords().GetWord(j));
452
+ }
453
+ VERBOSE(3, "Inserting New Phrase : " << newNgram << endl)
454
+
455
+ for (PathCounts::const_iterator pathIt = edgeIncomingNgramPaths.begin(); pathIt != edgeIncomingNgramPaths.end(); ++pathIt) {
456
+ Path newNgramPath = pathIt->first;
457
+ newNgramPath.push_back(this);
458
+ storeNgramHistory(newNgram, newNgramPath, pathIt->second);
459
+ }
460
+ }
461
+ }
462
+ }
463
+ }
464
+ }
465
+ return m_ngrams;
466
+ }
467
+
468
+ //Add the last lastN words of origPhrase to targetPhrase
469
+ void Edge::GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const
470
+ {
471
+ size_t origSize = origPhrase.GetSize();
472
+ size_t startIndex = origSize - lastN;
473
+ for (size_t index = startIndex; index < origPhrase.GetSize(); ++index) {
474
+ targetPhrase.AddWord(origPhrase.GetWord(index));
475
+ }
476
+ }
477
+
478
+ bool Edge::operator< (const Edge& compare ) const
479
+ {
480
+ if (m_headNode->GetId() < compare.m_headNode->GetId())
481
+ return true;
482
+ if (compare.m_headNode->GetId() < m_headNode->GetId())
483
+ return false;
484
+ if (m_tailNode->GetId() < compare.m_tailNode->GetId())
485
+ return true;
486
+ if (compare.m_tailNode->GetId() < m_tailNode->GetId())
487
+ return false;
488
+ return GetScore() < compare.GetScore();
489
+ }
490
+
491
+ ostream& operator<< (ostream& out, const Edge& edge)
492
+ {
493
+ out << "Head: " << edge.m_headNode->GetId()
494
+ << ", Tail: " << edge.m_tailNode->GetId()
495
+ << ", Score: " << edge.m_score
496
+ << ", Phrase: " << edge.m_targetPhrase << endl;
497
+ return out;
498
+ }
499
+
500
+ bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
501
+ {
502
+ return (a->GetWordsBitmap().GetNumWordsCovered()
503
+ <
504
+ b->GetWordsBitmap().GetNumWordsCovered());
505
+ }
506
+
507
+ void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList,
508
+ vector<LatticeMBRSolution>& solutions, size_t n)
509
+ {
510
+ std::map < int, bool > connected;
511
+ std::vector< const Hypothesis *> connectedList;
512
+ map<Phrase, float> ngramPosteriors;
513
+ std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
514
+ map<const Hypothesis*, vector<Edge> > incomingEdges;
515
+ vector< float> estimatedScores;
516
+ manager.GetForwardBackwardSearchGraph(&connected, &connectedList,
517
+ &outgoingHyps, &estimatedScores);
518
+ LMBR_Options const& lmbr = manager.options()->lmbr;
519
+ MBR_Options const& mbr = manager.options()->mbr;
520
+ pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores,
521
+ manager.GetBestHypothesis(), lmbr.pruning_factor, mbr.scale);
522
+ calcNgramExpectations(connectedList, incomingEdges, ngramPosteriors,true);
523
+
524
+ vector<float> mbrThetas = lmbr.theta;
525
+ float p = lmbr.precision;
526
+ float r = lmbr.ratio;
527
+ float mapWeight = lmbr.map_weight;
528
+ if (mbrThetas.size() == 0) {
529
+ // thetas were not specified on the command line, so use p and r instead
530
+ mbrThetas.push_back(-1); //Theta 0
531
+ mbrThetas.push_back(1/(bleu_order*p));
532
+ for (size_t i = 2; i <= bleu_order; ++i) {
533
+ mbrThetas.push_back(mbrThetas[i-1] / r);
534
+ }
535
+ }
536
+ IFVERBOSE(2) {
537
+ VERBOSE(2,"Thetas: ");
538
+ for (size_t i = 0; i < mbrThetas.size(); ++i) {
539
+ VERBOSE(2,mbrThetas[i] << " ");
540
+ }
541
+ VERBOSE(2,endl);
542
+ }
543
+ TrellisPathList::const_iterator iter;
544
+ size_t ctr = 0;
545
+ LatticeMBRSolutionComparator comparator;
546
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter, ++ctr) {
547
+ const TrellisPath &path = **iter;
548
+ solutions.push_back(LatticeMBRSolution(path,iter==nBestList.begin()));
549
+ solutions.back().CalcScore(ngramPosteriors, mbrThetas, mapWeight);
550
+ sort(solutions.begin(), solutions.end(), comparator);
551
+ while (solutions.size() > n) {
552
+ solutions.pop_back();
553
+ }
554
+ }
555
+ VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
556
+ }
557
+
558
+ vector<Word> doLatticeMBR(const Manager& manager, const TrellisPathList& nBestList)
559
+ {
560
+
561
+ vector<LatticeMBRSolution> solutions;
562
+ getLatticeMBRNBest(manager, nBestList, solutions,1);
563
+ return solutions.at(0).GetWords();
564
+ }
565
+
566
+ const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathList& nBestList)
567
+ {
568
+ static const int BLEU_ORDER = 4;
569
+ static const float SMOOTH = 1;
570
+
571
+ //calculate the ngram expectations
572
+ const StaticData& staticData = StaticData::Instance();
573
+ std::map < int, bool > connected;
574
+ std::vector< const Hypothesis *> connectedList;
575
+ map<Phrase, float> ngramExpectations;
576
+ std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
577
+ map<const Hypothesis*, vector<Edge> > incomingEdges;
578
+ vector< float> estimatedScores;
579
+ manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores);
580
+ LMBR_Options const& lmbr = manager.options()->lmbr;
581
+ MBR_Options const& mbr = manager.options()->mbr;
582
+ pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores,
583
+ manager.GetBestHypothesis(), lmbr.pruning_factor, mbr.scale);
584
+ calcNgramExpectations(connectedList, incomingEdges, ngramExpectations,false);
585
+
586
+ //expected length is sum of expected unigram counts
587
+ //cerr << "Thread " << pthread_self() << " Ngram expectations size: " << ngramExpectations.size() << endl;
588
+ float ref_length = 0.0f;
589
+ for (map<Phrase,float>::const_iterator ref_iter = ngramExpectations.begin();
590
+ ref_iter != ngramExpectations.end(); ++ref_iter) {
591
+ //cerr << "Ngram: " << ref_iter->first << " score: " <<
592
+ // ref_iter->second << endl;
593
+ if (ref_iter->first.GetSize() == 1) {
594
+ ref_length += exp(ref_iter->second);
595
+ // cerr << "Expected for " << ref_iter->first << " is " << exp(ref_iter->second) << endl;
596
+ }
597
+ }
598
+
599
+ VERBOSE(2,"REF Length: " << ref_length << endl);
600
+
601
+ //use the ngram expectations to rescore the nbest list.
602
+ TrellisPathList::const_iterator iter;
603
+ TrellisPathList::const_iterator best = nBestList.end();
604
+ float bestScore = -100000;
605
+ //cerr << "nbest list size: " << nBestList.GetSize() << endl;
606
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
607
+ const TrellisPath &path = **iter;
608
+ vector<Word> words;
609
+ map<Phrase,int> ngrams;
610
+ GetOutputWords(path,words);
611
+ /*for (size_t i = 0; i < words.size(); ++i) {
612
+ cerr << words[i].GetFactor(0)->GetString() << " ";
613
+ }
614
+ cerr << endl;
615
+ */
616
+ extract_ngrams(words,ngrams);
617
+
618
+ vector<float> comps(2*BLEU_ORDER+1);
619
+ float logbleu = 0.0;
620
+ float brevity = 0.0;
621
+ int hyp_length = words.size();
622
+ for (int i = 0; i < BLEU_ORDER; ++i) {
623
+ comps[2*i] = 0.0;
624
+ comps[2*i+1] = max(hyp_length-i,0);
625
+ }
626
+
627
+ for (map<Phrase,int>::const_iterator hyp_iter = ngrams.begin();
628
+ hyp_iter != ngrams.end(); ++hyp_iter) {
629
+ map<Phrase,float>::const_iterator ref_iter = ngramExpectations.find(hyp_iter->first);
630
+ if (ref_iter != ngramExpectations.end()) {
631
+ comps[2*(hyp_iter->first.GetSize()-1)] += min(exp(ref_iter->second), (float)(hyp_iter->second));
632
+ }
633
+
634
+ }
635
+ comps[comps.size()-1] = ref_length;
636
+ /*for (size_t i = 0; i < comps.size(); ++i) {
637
+ cerr << comps[i] << " ";
638
+ }
639
+ cerr << endl;
640
+ */
641
+
642
+ float score = 0.0f;
643
+ if (comps[0] != 0) {
644
+ for (int i=0; i<BLEU_ORDER; i++) {
645
+ if ( i > 0 ) {
646
+ logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
647
+ } else {
648
+ logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
649
+ }
650
+ }
651
+ logbleu /= BLEU_ORDER;
652
+ brevity = 1.0-(float)comps[comps.size()-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
653
+ if (brevity < 0.0) {
654
+ logbleu += brevity;
655
+ }
656
+ score = exp(logbleu);
657
+ }
658
+
659
+ //cerr << "score: " << score << " bestScore: " << bestScore << endl;
660
+ if (score > bestScore) {
661
+ bestScore = score;
662
+ best = iter;
663
+ VERBOSE(2,"NEW BEST: " << score << endl);
664
+ //for (size_t i = 0; i < comps.size(); ++i) {
665
+ // cerr << comps[i] << " ";
666
+ //}
667
+ //cerr << endl;
668
+ }
669
+ }
670
+
671
+ assert (best != nBestList.end());
672
+ return **best;
673
+ //vector<Word> bestWords;
674
+ //GetOutputWords(**best,bestWords);
675
+ //return bestWords;
676
+ }
677
+
678
+ }
679
+
680
+
mosesdecoder/moses/NonTerminal.cpp ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #include "NonTerminal.h"
3
+
4
+ using namespace std;
5
+
6
+ namespace Moses
7
+ {
8
+ std::ostream& operator<<(std::ostream &out, const NonTerminalSet &obj)
9
+ {
10
+ NonTerminalSet::const_iterator iter;
11
+ for (iter = obj.begin(); iter != obj.end(); ++iter) {
12
+ const Word &word = *iter;
13
+ out << word << " ";
14
+ }
15
+
16
+
17
+ return out;
18
+ }
19
+
20
+
21
+ }
mosesdecoder/moses/PartialTranslOptColl.cpp ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "PartialTranslOptColl.h"
23
+ #include <algorithm>
24
+ #include <iostream>
25
+
26
+ using namespace std;
27
+
28
+ namespace Moses
29
+ {
30
+ /** constructor, intializes counters and thresholds */
31
+ PartialTranslOptColl::PartialTranslOptColl(size_t const maxSize)
32
+ {
33
+ m_bestScore = -std::numeric_limits<float>::infinity();
34
+ m_worstScore = -std::numeric_limits<float>::infinity();
35
+ m_maxSize = maxSize; // StaticData::Instance().GetMaxNoPartTransOpt();
36
+ m_totalPruned = 0;
37
+ }
38
+
39
+
40
+ /** add a partial translation option to the collection (without pruning) */
41
+ void PartialTranslOptColl::AddNoPrune(TranslationOption *partialTranslOpt)
42
+ {
43
+ if (partialTranslOpt->GetFutureScore() >= m_worstScore) {
44
+ m_list.push_back(partialTranslOpt);
45
+ if (partialTranslOpt->GetFutureScore() > m_bestScore)
46
+ m_bestScore = partialTranslOpt->GetFutureScore();
47
+ } else {
48
+ m_totalPruned++;
49
+ delete partialTranslOpt;
50
+ }
51
+ }
52
+
53
+ /** add a partial translation option to the collection, prune if necessary.
54
+ * This is done similar to the Prune() in TranslationOptionCollection */
55
+
56
+ void PartialTranslOptColl::Add(TranslationOption *partialTranslOpt)
57
+ {
58
+ // add
59
+ AddNoPrune(partialTranslOpt );
60
+
61
+ // done if not too large (lazy pruning, only if twice as large as max)
62
+ if ( m_list.size() > 2 * m_maxSize ) {
63
+ Prune();
64
+ }
65
+ }
66
+
67
+
68
+ /** helper, used by pruning */
69
+ bool ComparePartialTranslationOption(const TranslationOption *a, const TranslationOption *b)
70
+ {
71
+ return a->GetFutureScore() > b->GetFutureScore();
72
+ }
73
+
74
+ /** pruning, remove partial translation options, if list too big */
75
+ void PartialTranslOptColl::Prune()
76
+ {
77
+ // done if not too big
78
+ if ( m_list.size() <= m_maxSize ) {
79
+ return;
80
+ }
81
+
82
+ // TRACE_ERR( "pruning partial translation options from size " << m_list.size() << std::endl);
83
+
84
+ // find nth element
85
+ NTH_ELEMENT4(m_list.begin(),
86
+ m_list.begin() + m_maxSize,
87
+ m_list.end(),
88
+ ComparePartialTranslationOption);
89
+
90
+ m_worstScore = m_list[ m_maxSize-1 ]->GetFutureScore();
91
+ // delete the rest
92
+ for (size_t i = m_maxSize ; i < m_list.size() ; ++i) {
93
+ delete m_list[i];
94
+ m_totalPruned++;
95
+ }
96
+ m_list.resize(m_maxSize);
97
+ // TRACE_ERR( "pruned to size " << m_list.size() << ", total pruned: " << m_totalPruned << std::endl);
98
+ }
99
+
100
+ // friend
101
+ ostream& operator<<(ostream& out, const PartialTranslOptColl& possibleTranslation)
102
+ {
103
+ for (size_t i = 0; i < possibleTranslation.m_list.size(); ++i) {
104
+ const TranslationOption &transOpt = *possibleTranslation.m_list[i];
105
+ out << transOpt << endl;
106
+ }
107
+ return out;
108
+ }
109
+
110
+ }
111
+
112
+
mosesdecoder/moses/RuleCubeQueue.cpp ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #include "RuleCubeQueue.h"
23
+
24
+ #include "RuleCubeItem.h"
25
+ #include "StaticData.h"
26
+ #include "ChartManager.h"
27
+
28
+ namespace Moses
29
+ {
30
+
31
+ RuleCubeQueue::~RuleCubeQueue()
32
+ {
33
+ while (!m_queue.empty()) {
34
+ RuleCube *cube = m_queue.top();
35
+ m_queue.pop();
36
+ delete cube;
37
+ }
38
+ }
39
+
40
+ void RuleCubeQueue::Add(RuleCube *ruleCube)
41
+ {
42
+ m_queue.push(ruleCube);
43
+ }
44
+
45
+ ChartHypothesis *RuleCubeQueue::Pop()
46
+ {
47
+ // pop the most promising rule cube
48
+ RuleCube *cube = m_queue.top();
49
+ m_queue.pop();
50
+
51
+ // pop the most promising item from the cube and get the corresponding
52
+ // hypothesis
53
+ RuleCubeItem *item = cube->Pop(m_manager);
54
+ if (m_manager.options()->cube.lazy_scoring) {
55
+ item->CreateHypothesis(cube->GetTranslationOption(), m_manager);
56
+ }
57
+ ChartHypothesis *hypo = item->ReleaseHypothesis();
58
+
59
+ // if the cube contains more items then push it back onto the queue
60
+ if (!cube->IsEmpty()) {
61
+ m_queue.push(cube);
62
+ } else {
63
+ delete cube;
64
+ }
65
+
66
+ return hypo;
67
+ }
68
+
69
+ }
mosesdecoder/moses/RuleCubeQueue.h ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+ // vim:tabstop=2
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2010 Hieu Hoang
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #pragma once
23
+
24
+ #include "RuleCube.h"
25
+
26
+ #include <queue>
27
+ #include <vector>
28
+
29
+ namespace Moses
30
+ {
31
+
32
+ class ChartManager;
33
+
34
+ /** Define an ordering between RuleCube based on their best item scores. This
35
+ * is used to order items in the priority queue.
36
+ */
37
+ class RuleCubeOrderer
38
+ {
39
+ public:
40
+ bool operator()(const RuleCube *p, const RuleCube *q) const {
41
+ return p->GetTopScore() < q->GetTopScore();
42
+ }
43
+ };
44
+
45
+ /** @todo how is this used */
46
+ class RuleCubeQueue
47
+ {
48
+ public:
49
+ RuleCubeQueue(ChartManager &manager) : m_manager(manager) {}
50
+ ~RuleCubeQueue();
51
+
52
+ void Add(RuleCube *);
53
+ ChartHypothesis *Pop();
54
+ bool IsEmpty() const {
55
+ return m_queue.empty();
56
+ }
57
+
58
+ private:
59
+ typedef std::priority_queue<RuleCube*, std::vector<RuleCube*>,
60
+ RuleCubeOrderer > Queue;
61
+
62
+ Queue m_queue;
63
+ ChartManager &m_manager;
64
+ };
65
+
66
+ }
mosesdecoder/moses/SquareMatrix.h ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_SquareMatrix_h
23
+ #define moses_SquareMatrix_h
24
+
25
+ #include <iostream>
26
+ #include "TypeDef.h"
27
+ #include "Util.h"
28
+ #include "Bitmap.h"
29
+
30
+ namespace Moses
31
+ {
32
+
33
+ //! A square array of floats to store future costs in the phrase-based decoder
34
+ class SquareMatrix
35
+ {
36
+ friend std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix);
37
+ protected:
38
+ const size_t m_size; /**< length of the square (sentence length) */
39
+ float *m_array; /**< two-dimensional array to store floats */
40
+
41
+ SquareMatrix(); // not implemented
42
+ SquareMatrix(const SquareMatrix &copy); // not implemented
43
+
44
+ public:
45
+ SquareMatrix(size_t size)
46
+ :m_size(size) {
47
+ m_array = (float*) malloc(sizeof(float) * size * size);
48
+ }
49
+ ~SquareMatrix() {
50
+ free(m_array);
51
+ }
52
+
53
+ // set upper triangle
54
+ void InitTriangle(float val);
55
+
56
+ /** Returns length of the square: typically the sentence length */
57
+ inline size_t GetSize() const {
58
+ return m_size;
59
+ }
60
+ /** Get a future cost score for a span */
61
+ inline float GetScore(size_t startPos, size_t endPos) const {
62
+ return m_array[startPos * m_size + endPos];
63
+ }
64
+ /** Set a future cost score for a span */
65
+ inline void SetScore(size_t startPos, size_t endPos, float value) {
66
+ m_array[startPos * m_size + endPos] = value;
67
+ }
68
+ float CalcEstimatedScore( Bitmap const& ) const;
69
+ float CalcEstimatedScore( Bitmap const&, size_t startPos, size_t endPos ) const;
70
+
71
+ TO_STRING();
72
+ };
73
+
74
+ inline std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix)
75
+ {
76
+ for (size_t endPos = 0 ; endPos < matrix.GetSize() ; endPos++) {
77
+ for (size_t startPos = 0 ; startPos < matrix.GetSize() ; startPos++)
78
+ out << matrix.GetScore(startPos, endPos) << " ";
79
+ out << std::endl;
80
+ }
81
+
82
+ return out;
83
+ }
84
+
85
+ }
86
+ #endif
mosesdecoder/moses/StackVec.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2012 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include <vector>
23
+
24
+ namespace Moses
25
+ {
26
+
27
+ class ChartCellLabel;
28
+
29
+ class StackVec : public std::vector<const ChartCellLabel*>
30
+ {
31
+
32
+ };
33
+
34
+ }
mosesdecoder/moses/SyntacticLanguageModelFiles.h ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+
3
+ #ifndef moses_SyntacticLanguageModelFiles_h
4
+ #define moses_SyntacticLanguageModelFiles_h
5
+
6
+ #include "nl-iomacros.h"
7
+ #include "nl-string.h"
8
+
9
+ namespace Moses
10
+ {
11
+
12
+ template <class MH, class MO>
13
+ class SyntacticLanguageModelFiles
14
+ {
15
+
16
+ public:
17
+
18
+ SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths);
19
+ ~SyntacticLanguageModelFiles();
20
+
21
+ MH* getHiddenModel();
22
+ MO* getObservedModel();
23
+
24
+ private:
25
+ MH* hiddenModel;
26
+ MO* observedModel;
27
+
28
+ };
29
+
30
+
31
+ template <class MH, class MO>
32
+ SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths)
33
+ {
34
+
35
+ this->hiddenModel = new MH();
36
+ this->observedModel = new MO();
37
+
38
+ //// I. LOAD MODELS...
39
+ std::cerr << "Reading syntactic language model files...\n";
40
+ // For each model file...
41
+ for ( int a=0, n=filePaths.size(); a<n; a++ ) { // read models
42
+ FILE* pf = fopen(filePaths[a].c_str(),"r"); // Read model file
43
+ if(!pf) {
44
+ std::cerr << "Error loading model file " << filePaths[a] << std::endl;
45
+ return;
46
+ }
47
+ std::cerr << "Loading model \'" << filePaths[a] << "\'...\n";
48
+ int c=' ';
49
+ int i=0;
50
+ int line=1;
51
+ String sBuff(1000); // Lookahead/ctrs/buffers
52
+ CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Get to first record
53
+ while ( c!=-1 && c!='\0' && c!='\5' ) { // For each record
54
+ CONSUME_STR ( pf, c, (c!='\n' && c!='\0' && c!='\5'), sBuff, i, line ); // Consume line
55
+ StringInput si(sBuff.c_array());
56
+ if ( !( sBuff[0]=='#' // Accept comments/fields
57
+ || si>>*(this->hiddenModel)>>"\0"!=NULL
58
+ || si>>*(this->observedModel)>>"\0"!=NULL
59
+ ))
60
+ std::cerr<<"\nERROR: can't parse \'"<<sBuff<<"\' in line "<<line<<"\n\n";
61
+ CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Consume whitespace
62
+ if ( line%100000==0 ) std::cerr<<" "<<line<<" lines read...\n"; // Progress for big models
63
+ }
64
+ std::cerr << "Model \'" << filePaths[a] << "\' loaded.\n";
65
+ }
66
+
67
+ std::cerr << "...reading syntactic language model files completed\n";
68
+
69
+
70
+ }
71
+
72
+
73
+ template <class MH, class MO>
74
+ SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles()
75
+ {
76
+
77
+ VERBOSE(3,"Destructing syntactic language model files" << std::endl);
78
+ delete hiddenModel;
79
+ delete observedModel;
80
+
81
+ }
82
+
83
+
84
+ template <class MH, class MO>
85
+ MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel()
86
+ {
87
+
88
+ return this->hiddenModel;
89
+
90
+ }
91
+
92
+ template <class MH, class MO>
93
+ MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel()
94
+ {
95
+
96
+ return this->observedModel;
97
+
98
+ }
99
+
100
+
101
+ }
102
+
103
+ #endif
mosesdecoder/moses/SyntacticLanguageModelState.h ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+
3
+ #ifndef moses_SyntacticLanguageModelState_h
4
+ #define moses_SyntacticLanguageModelState_h
5
+
6
+ #include "nl-iomacros.h"
7
+ #include "nl-cpt.h"
8
+ #include "nl-hmm.h"
9
+
10
+ #include "SyntacticLanguageModelFiles.h"
11
+ #include "moses/FF/FFState.h"
12
+ #include <string>
13
+
14
+ namespace Moses
15
+ {
16
+
17
+ template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
18
+ class SyntacticLanguageModelState : public FFState
19
+ {
20
+ public:
21
+
22
+ // Initialize an empty LM state
23
+ SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize );
24
+
25
+ // Get the next LM state from an existing LM state and the next word
26
+ SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word );
27
+
28
+
29
+ ~SyntacticLanguageModelState() {
30
+ VERBOSE(3,"Destructing SyntacticLanguageModelState" << std::endl);
31
+ delete randomVariableStore;
32
+ }
33
+
34
+ virtual int Compare(const FFState& other) const;
35
+
36
+ // Get the LM score from this LM state
37
+ double getScore() const;
38
+
39
+ double getProb() const;
40
+
41
+ private:
42
+
43
+ void setScore(double score);
44
+ void printRV();
45
+
46
+ SafeArray1D<Id<int>,pair<YS,LogProb> >* randomVariableStore;
47
+ double prob;
48
+ double score;
49
+ int beamSize;
50
+ SyntacticLanguageModelFiles<MY,MX>* modelData;
51
+ bool sentenceStart;
52
+ };
53
+
54
+
55
+ ////////////////////////////////////////////////////////////////////////////////
56
+
57
+
58
+ template <class MY, class MX, class YS, class B>
59
+ void SyntacticLanguageModelState<MY,MX,YS,B>::printRV()
60
+ {
61
+
62
+ cerr << "*********** BEGIN printRV() ******************" << endl;
63
+ int size=randomVariableStore->getSize();
64
+ cerr << "randomVariableStore->getSize() == " << size << endl;
65
+
66
+ for (int depth=0; depth<size; depth+=1) {
67
+
68
+
69
+ const pair<YS,LogProb> *data = &(randomVariableStore->get(depth));
70
+ std::cerr << "randomVariableStore[" << depth << "]\t" << data->first << "\tprob = " << data->second.toProb() << "\tlogProb = " << double(data->second.toInt())/100 << std::endl;
71
+
72
+ }
73
+ cerr << "*********** END printRV() ******************" << endl;
74
+
75
+ }
76
+
77
+ // Initialize an empty LM state from grammar files
78
+ //
79
+ // nArgs is the number of model files
80
+ // argv is the list of model file names
81
+ //
82
+ template <class MY, class MX, class YS, class B>
83
+ SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize )
84
+ {
85
+
86
+ this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
87
+ this->modelData = modelData;
88
+ this->beamSize = beamSize;
89
+
90
+ // Initialize an empty random variable value
91
+ YS xBEG;
92
+ StringInput(String(BEG_STATE).c_array())>>xBEG>>"\0";
93
+ cerr<<xBEG<<"\n";
94
+
95
+ // cout << "Examining RV store just before RV init" << endl;
96
+ //printRV();
97
+
98
+ // Initialize the random variable store
99
+ this->randomVariableStore->init(1,pair<YS,LogProb>(xBEG,0));
100
+
101
+ this->sentenceStart = true;
102
+
103
+ IFVERBOSE(3) {
104
+ VERBOSE(3,"Examining RV store just after RV init" << endl);
105
+ printRV();
106
+ }
107
+
108
+ // Get score of final frame in HHMM
109
+ LogProb l(1.0);
110
+ //score = l.toDouble();
111
+ setScore(l.toDouble());
112
+ // MY::F_ROOT_OBS = true;
113
+ // this->modelData->getHiddenModel()->setRootObs(true);
114
+
115
+
116
+ }
117
+
118
+
119
+ template <class MY, class MX, class YS, class B>
120
+ int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const
121
+ {
122
+ /*
123
+ const SyntacticLanguageModelState<MY,MX,YS,B>& o =
124
+ static_cast<const SyntacticLanguageModelState<MY,MX,YS,B>&>(other);
125
+
126
+ if (o.score > score) return 1;
127
+ else if (o.score < score) return -1;
128
+ else return 0;
129
+ */
130
+ return 0;
131
+ }
132
+
133
+
134
+ template <class MY, class MX, class YS, class B>
135
+ SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word )
136
+ {
137
+
138
+ // Initialize member variables
139
+ this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
140
+ this->modelData = prev->modelData;
141
+ this->beamSize = prev->beamSize;
142
+ this->randomVariableStore->init(this->beamSize);
143
+ this->sentenceStart=false;
144
+
145
+ YS ysEND;
146
+ StringInput(String(END_STATE).c_array())>>ysEND>>"\0";
147
+
148
+ // Get HHMM model files
149
+ MY& mH = *(modelData->getHiddenModel());
150
+ MX& mO = *(modelData->getObservedModel());
151
+
152
+ // Initialize HHMM
153
+ HMM<MY,MX,YS,B> hmm(mH,mO);
154
+ int MAX_WORDS = 2;
155
+ hmm.init(MAX_WORDS,this->beamSize,prev->randomVariableStore);
156
+ typename MX::RandVarType x(word.c_str());
157
+ // cout << "Examining HHMM just after hmm.init" << endl;
158
+ // hmm.debugPrint();
159
+
160
+
161
+ /* cerr << "*********** BEGIN writeCurr() ******************" << endl;
162
+ hmm.writeCurr(cout,0);
163
+ hmm.writeCurr(cout,1);
164
+ cerr << "*********** END writeCurr() ******************" << endl;
165
+ */
166
+ /*
167
+ {
168
+
169
+ int wnum=1;
170
+ list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list
171
+ for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame
172
+ cout << "HYPOTH " << wnum
173
+ << " " << i->getBackData()
174
+ << " " << x
175
+ << " " << i->getId()
176
+ << " (" << i->getLogProb() << ")"
177
+ << endl; // print RV val
178
+ }
179
+ }
180
+ */
181
+
182
+
183
+ /*
184
+ cerr << "Writing hmm.writeCurr" << endl;
185
+ hmm.writeCurr(cerr,0);
186
+ hmm.writeCurr(cerr,1);
187
+ cerr << "...done writing hmm.writeCurr" << endl;
188
+ */
189
+ hmm.getCurrSum();
190
+
191
+
192
+
193
+ // Initialize observed variable
194
+ // typename MX::RandVarType ov;
195
+ // ov.set(word.c_str(),mO);
196
+ // MY::WORD = ov.getW();
197
+ //bool endOfSentence = prev->sentenceStart;//true;
198
+
199
+ // std::cerr << "About to give HHMM a word of input:\t" << word << std::endl;
200
+
201
+ hmm.updateRanked(x, prev->sentenceStart);
202
+
203
+ // cout << "Examining HHMM just after hmm.updateRanked(" << x << "," << prev->sentenceStart << ")" << endl;
204
+ // hmm.debugPrint();
205
+ /*
206
+ cerr << "*********** BEGIN writeCurr() ******************" << endl;
207
+ hmm.writeCurr(cout,0);
208
+ hmm.writeCurr(cout,1);
209
+ cerr << "*********** END writeCurr() ******************" << endl;
210
+ */
211
+ /*
212
+ {
213
+
214
+ int wnum=1;
215
+ list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list
216
+ for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame
217
+ cout << "HYPOTH " << wnum
218
+ << " " << i->getBackData()
219
+ << " " << x
220
+ << " " << i->getId()
221
+ << " (" << i->getLogProb() << ")"
222
+ << endl; // print RV val
223
+ }
224
+ }
225
+ */
226
+ // X ov(word.c_str());
227
+ //mH.setWord(ov);
228
+ // MY::WORD = ov;//ov.getW();
229
+
230
+ // Update HHMM based on observed variable
231
+ //hmm.updateRanked(ov);
232
+ //mH.setRootObs(true);
233
+ //MY::F_ROOT_OBS = false;
234
+
235
+ // Get the current score
236
+ double currSum = hmm.getCurrSum();
237
+ //VERBOSE(3,"Setting score using currSum for " << scientific << x << " = " << currSum << endl);
238
+ setScore(currSum);
239
+ // cout << "Examining RV store just before RV init via gatherElementsInBeam" << endl;
240
+ // printRV();
241
+
242
+ // Get new hidden random variable store from HHMM
243
+ hmm.gatherElementsInBeam(randomVariableStore);
244
+ // cout << "Examining RV store just after RV init via gatherElementsInBeam" << endl;
245
+ // printRV();
246
+ /*
247
+ cerr << "Writing hmm.writeCurr..." << endl;
248
+ hmm.writeCurr(cerr,0);
249
+ hmm.writeCurr(cerr,1);
250
+ cerr << "...done writing hmm.writeCurr" << endl;
251
+ */
252
+ }
253
+
254
+
255
+ template <class MY, class MX, class YS, class B>
256
+ double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const
257
+ {
258
+
259
+ return prob;
260
+ }
261
+
262
+ template <class MY, class MX, class YS, class B>
263
+ double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const
264
+ {
265
+
266
+ return score;
267
+ }
268
+
269
+
270
+ template <class MY, class MX, class YS, class B>
271
+ void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score)
272
+ {
273
+
274
+
275
+
276
+
277
+ this->prob = score;
278
+
279
+ // We want values to range from -100 to 0
280
+ //
281
+ // If the minimum positive value for a double is min=4.94065645841246544e-324
282
+ // then to scale, we want a logarithmic base such that log_b(min)=-100
283
+ //
284
+ // -100 = log(min) / log(b)
285
+ //
286
+ // log(b) = log(min) / -100
287
+ //
288
+ // b = exp( log(min) / -100 )
289
+ //
290
+ // b = 7.44440071921381
291
+
292
+ // Check for score==0 to avoid causing -infinity with log(score)
293
+ if (score==0) {
294
+ this->score = -100;
295
+ } else {
296
+ double x = log(score) / 7.44440071921381;
297
+ if ( x >= -100) {
298
+ this->score = x;
299
+ } else {
300
+ this->score = -100;
301
+ }
302
+ }
303
+
304
+ VERBOSE(3,"\tSyntacticLanguageModelState has score=" << this->score << endl);
305
+
306
+ }
307
+
308
+
309
+ }
310
+
311
+ #endif
mosesdecoder/moses/Syntax/F2S/DerivationWriter.h ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ostream>
4
+
5
+ #include "moses/Syntax/KBestExtractor.h"
6
+ #include "moses/Word.h"
7
+
8
+ namespace Moses
9
+ {
10
+ namespace Syntax
11
+ {
12
+ struct SHyperedge;
13
+
14
+ namespace F2S
15
+ {
16
+
17
+ // Writes a string representation of a derivation to a std::ostream. This is
18
+ // used by the -translation-details / -T option.
19
+ // TODO Merge this with S2T::DerivationWriter.
20
+ class DerivationWriter
21
+ {
22
+ public:
23
+ // 1-best version.
24
+ static void Write(const SHyperedge&, std::size_t, std::ostream &);
25
+
26
+ // k-best version.
27
+ static void Write(const KBestExtractor::Derivation &, std::size_t,
28
+ std::ostream &);
29
+ private:
30
+ static void WriteLine(const SHyperedge &, std::size_t, std::ostream &);
31
+ static void WriteSymbol(const Word &, std::ostream &);
32
+ };
33
+
34
+ } // namespace F2S
35
+ } // namespace Syntax
36
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/Forest.h ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "vector"
4
+
5
+ #include "moses/Syntax/PVertex.h"
6
+
7
+ namespace Moses
8
+ {
9
+ namespace Syntax
10
+ {
11
+ namespace F2S
12
+ {
13
+
14
+ class Forest
15
+ {
16
+ public:
17
+ struct Vertex;
18
+
19
+ struct Hyperedge {
20
+ Vertex *head;
21
+ std::vector<Vertex *> tail;
22
+ float weight;
23
+ };
24
+
25
+ struct Vertex {
26
+ Vertex(const PVertex &v) : pvertex(v) {}
27
+ ~Vertex(); // Deletes incoming hyperedges.
28
+ PVertex pvertex;
29
+ std::vector<Hyperedge *> incoming;
30
+ };
31
+
32
+ // Constructor.
33
+ Forest() {}
34
+
35
+ // Destructor (deletes vertices).
36
+ ~Forest();
37
+
38
+ // Delete all vertices.
39
+ void Clear();
40
+
41
+ std::vector<Vertex *> vertices;
42
+
43
+ private:
44
+ // Copying is not allowed.
45
+ Forest(const Forest &);
46
+ Forest &operator=(const Forest &);
47
+ };
48
+
49
+ } // namespace F2S
50
+ } // namespace Syntax
51
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.cpp ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "GlueRuleSynthesizer.h"
2
+
3
+ #include <sstream>
4
+
5
+ #include "moses/FF/UnknownWordPenaltyProducer.h"
6
+ #include "util/string_stream.hh"
7
+ #include "moses/parameters/AllOptions.h"
8
+ namespace Moses
9
+ {
10
+ namespace Syntax
11
+ {
12
+ namespace F2S
13
+ {
14
+
15
+ GlueRuleSynthesizer::
16
+ GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &trie)
17
+ : m_input_default_nonterminal(opts.syntax.input_default_non_terminal)
18
+ , m_output_default_nonterminal(opts.syntax.output_default_non_terminal)
19
+ , m_hyperTree(trie)
20
+ {
21
+ Word *lhs = NULL;
22
+ m_dummySourcePhrase.CreateFromString(Input, opts.input.factor_order, "hello", &lhs);
23
+ delete lhs;
24
+ }
25
+
26
+ void GlueRuleSynthesizer::SynthesizeRule(const Forest::Hyperedge &e)
27
+ {
28
+ HyperPath source;
29
+ SynthesizeHyperPath(e, source);
30
+ TargetPhrase *tp = SynthesizeTargetPhrase(e);
31
+ TargetPhraseCollection::shared_ptr tpc
32
+ = GetOrCreateTargetPhraseCollection(m_hyperTree, source);
33
+ tpc->Add(tp);
34
+ }
35
+
36
+ void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e,
37
+ HyperPath &path)
38
+ {
39
+ path.nodeSeqs.clear();
40
+ path.nodeSeqs.resize(2);
41
+ path.nodeSeqs[0].push_back(e.head->pvertex.symbol[0]->GetId());
42
+ for (std::vector<Forest::Vertex*>::const_iterator p = e.tail.begin();
43
+ p != e.tail.end(); ++p) {
44
+ const Forest::Vertex &child = **p;
45
+ path.nodeSeqs[1].push_back(child.pvertex.symbol[0]->GetId());
46
+ }
47
+ }
48
+
49
+ TargetPhrase*
50
+ GlueRuleSynthesizer::
51
+ SynthesizeTargetPhrase(const Forest::Hyperedge &e)
52
+ {
53
+ const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
54
+ UnknownWordPenaltyProducer::Instance();
55
+
56
+ TargetPhrase *targetPhrase = new TargetPhrase();
57
+
58
+ util::StringStream alignmentSS;
59
+ for (std::size_t i = 0; i < e.tail.size(); ++i) {
60
+ const Word &symbol = e.tail[i]->pvertex.symbol;
61
+ if (symbol.IsNonTerminal()) {
62
+ targetPhrase->AddWord(m_output_default_nonterminal);
63
+ } else {
64
+ // TODO Check this
65
+ Word &targetWord = targetPhrase->AddWord();
66
+ targetWord.CreateUnknownWord(symbol);
67
+ }
68
+ alignmentSS << i << "-" << i << " ";
69
+ }
70
+
71
+ // Assign the lowest possible score so that glue rules are only used when
72
+ // absolutely required.
73
+ float score = LOWEST_SCORE;
74
+ targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
75
+ targetPhrase->EvaluateInIsolation(m_dummySourcePhrase);
76
+ Word *targetLhs = new Word(m_output_default_nonterminal);
77
+ targetPhrase->SetTargetLHS(targetLhs);
78
+ targetPhrase->SetAlignmentInfo(alignmentSS.str());
79
+
80
+ return targetPhrase;
81
+ }
82
+
83
+ } // F2S
84
+ } // Syntax
85
+ } // Moses
mosesdecoder/moses/Syntax/F2S/GlueRuleSynthesizer.h ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "moses/Phrase.h"
4
+ #include "moses/TargetPhrase.h"
5
+
6
+ #include "HyperTree.h"
7
+ #include "HyperTreeCreator.h"
8
+ #include "Forest.h"
9
+
10
+ namespace Moses
11
+ {
12
+ class AllOptions;
13
+ namespace Syntax
14
+ {
15
+ namespace F2S
16
+ {
17
+
18
+ class GlueRuleSynthesizer : public HyperTreeCreator
19
+ {
20
+ Word m_input_default_nonterminal;
21
+ Word m_output_default_nonterminal;
22
+ public:
23
+ GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &);
24
+
25
+ // Synthesize the minimal, monotone rule that can be applied to the given
26
+ // hyperedge and add it to the rule trie.
27
+ void SynthesizeRule(const Forest::Hyperedge &);
28
+
29
+ private:
30
+ void SynthesizeHyperPath(const Forest::Hyperedge &, HyperPath &);
31
+
32
+ TargetPhrase *SynthesizeTargetPhrase(const Forest::Hyperedge &);
33
+
34
+ HyperTree &m_hyperTree;
35
+ Phrase m_dummySourcePhrase;
36
+ };
37
+
38
+ } // F2S
39
+ } // Syntax
40
+ } // Moses
mosesdecoder/moses/Syntax/F2S/HyperPath.cpp ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "HyperPath.h"
2
+
3
+ #include <limits>
4
+
5
+ namespace Moses
6
+ {
7
+ namespace Syntax
8
+ {
9
+ namespace F2S
10
+ {
11
+
12
+ const std::size_t HyperPath::kEpsilon =
13
+ std::numeric_limits<std::size_t>::max()-1;
14
+
15
+ const std::size_t HyperPath::kComma =
16
+ std::numeric_limits<std::size_t>::max()-2;
17
+
18
+ } // namespace F2S
19
+ } // namespace Syntax
20
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/HyperPath.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <vector>
4
+
5
+ #include "moses/Factor.h"
6
+
7
+ namespace Moses
8
+ {
9
+ namespace Syntax
10
+ {
11
+ namespace F2S
12
+ {
13
+
14
+ // A HyperPath for representing the source-side tree fragment of a
15
+ // tree-to-string rule. See this paper:
16
+ //
17
+ // Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
18
+ // "Fast Translation Rule Matching for Syntax-based Statistical Machine
19
+ // Translation"
20
+ // In proceedings of EMNLP 2009
21
+ //
22
+ struct HyperPath {
23
+ public:
24
+ typedef std::vector<std::size_t> NodeSeq;
25
+
26
+ static const std::size_t kEpsilon;
27
+ static const std::size_t kComma;
28
+
29
+ std::vector<NodeSeq> nodeSeqs;
30
+ };
31
+
32
+ } // namespace F2S
33
+ } // namespace Syntax
34
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/HyperPathLoader.h ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <stack>
4
+ #include <vector>
5
+
6
+ #include "util/string_piece.hh"
7
+
8
+ #include "moses/FactorCollection.h"
9
+ #include "moses/TypeDef.h"
10
+
11
+ #include "HyperPath.h"
12
+ #include "TreeFragmentTokenizer.h"
13
+
14
+ namespace Moses
15
+ {
16
+ namespace Syntax
17
+ {
18
+ namespace F2S
19
+ {
20
+
21
+ // Parses a string representation of a tree fragment, adding the terminals
22
+ // and non-terminals to FactorCollection::Instance() and building a
23
+ // HyperPath object.
24
+ //
25
+ // This class is designed to be used during rule table loading. Since every
26
+ // rule has a tree fragment on the source-side, Load() may be called millions
27
+ // of times. The algorithm therefore sacrifices readability for speed and
28
+ // shoehorns everything into two passes over the input token sequence.
29
+ //
30
+ class HyperPathLoader
31
+ {
32
+ public:
33
+ void Load(const StringPiece &, HyperPath &);
34
+
35
+ private:
36
+ struct NodeTuple {
37
+ int index; // Preorder index of the node.
38
+ int parent; // Preorder index of the node's parent.
39
+ int depth; // Depth of the node.
40
+ std::size_t symbol; // Either the factor ID of a tree terminal/non-terminal
41
+ // or for virtual nodes, HyperPath::kEpsilon.
42
+ };
43
+
44
+ // Determine the height of the current tree fragment (stored in m_tokenSeq).
45
+ int DetermineHeight() const;
46
+
47
+ // Generate the preorder sequence of NodeTuples for the current tree fragment,
48
+ // including virtual nodes.
49
+ void GenerateNodeTupleSeq(int height);
50
+
51
+ const Factor *AddTerminalFactor(const StringPiece &s) {
52
+ return FactorCollection::Instance().AddFactor(s, false);
53
+ }
54
+
55
+ const Factor *AddNonTerminalFactor(const StringPiece &s) {
56
+ return FactorCollection::Instance().AddFactor(s, true);
57
+ }
58
+
59
+ std::vector<TreeFragmentToken> m_tokenSeq;
60
+ std::vector<NodeTuple> m_nodeTupleSeq;
61
+ std::stack<int> m_parentStack;
62
+ };
63
+
64
+ } // namespace F2S
65
+ } // namespace Syntax
66
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/HyperTree.cpp ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "HyperTree.h"
2
+
3
+ namespace Moses
4
+ {
5
+ namespace Syntax
6
+ {
7
+ namespace F2S
8
+ {
9
+
10
+ void HyperTree::Node::Prune(std::size_t tableLimit)
11
+ {
12
+ // Recusively prune child nodes.
13
+ for (Map::iterator p = m_map.begin(); p != m_map.end(); ++p) {
14
+ p->second.Prune(tableLimit);
15
+ }
16
+ // Prune TargetPhraseCollection at this node.
17
+ m_targetPhraseCollection->Prune(true, tableLimit);
18
+ }
19
+
20
+ void HyperTree::Node::Sort(std::size_t tableLimit)
21
+ {
22
+ // Recusively sort child nodes.
23
+ for (Map::iterator p = m_map.begin(); p != m_map.end(); ++p) {
24
+ p->second.Sort(tableLimit);
25
+ }
26
+ // Sort TargetPhraseCollection at this node.
27
+ m_targetPhraseCollection->Sort(true, tableLimit);
28
+ }
29
+
30
+ HyperTree::Node *HyperTree::Node::GetOrCreateChild(
31
+ const HyperPath::NodeSeq &nodeSeq)
32
+ {
33
+ return &m_map[nodeSeq];
34
+ }
35
+
36
+ const HyperTree::Node *HyperTree::Node::GetChild(
37
+ const HyperPath::NodeSeq &nodeSeq) const
38
+ {
39
+ Map::const_iterator p = m_map.find(nodeSeq);
40
+ return (p == m_map.end()) ? NULL : &p->second;
41
+ }
42
+
43
+ TargetPhraseCollection::shared_ptr HyperTree::GetOrCreateTargetPhraseCollection(
44
+ const HyperPath &hyperPath)
45
+ {
46
+ Node &node = GetOrCreateNode(hyperPath);
47
+ return node.GetTargetPhraseCollection();
48
+ }
49
+
50
+ HyperTree::Node &HyperTree::GetOrCreateNode(const HyperPath &hyperPath)
51
+ {
52
+ const std::size_t height = hyperPath.nodeSeqs.size();
53
+ Node *node = &m_root;
54
+ for (std::size_t i = 0; i < height; ++i) {
55
+ const HyperPath::NodeSeq &nodeSeq = hyperPath.nodeSeqs[i];
56
+ node = node->GetOrCreateChild(nodeSeq);
57
+ }
58
+ return *node;
59
+ }
60
+
61
+ void HyperTree::SortAndPrune(std::size_t tableLimit)
62
+ {
63
+ if (tableLimit) {
64
+ m_root.Sort(tableLimit);
65
+ }
66
+ }
67
+
68
+ } // namespace F2S
69
+ } // namespace Syntax
70
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/HyperTree.h ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <map>
4
+ #include <vector>
5
+
6
+ #include <boost/unordered_map.hpp>
7
+
8
+ #include "moses/Syntax/RuleTable.h"
9
+ #include "moses/TargetPhraseCollection.h"
10
+
11
+ #include "HyperPath.h"
12
+
13
+ namespace Moses
14
+ {
15
+ namespace Syntax
16
+ {
17
+ namespace F2S
18
+ {
19
+
20
+ // A HyperTree for representing a tree-to-string rule table. See this paper:
21
+ //
22
+ // Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
23
+ // "Fast Translation Rule Matching for Syntax-based Statistical Machine
24
+ // Translation"
25
+ // In proceedings of EMNLP 2009
26
+ //
27
+ class HyperTree : public RuleTable
28
+ {
29
+ public:
30
+ class Node
31
+ {
32
+ public:
33
+ typedef boost::unordered_map<HyperPath::NodeSeq, Node> Map;
34
+
35
+ bool IsLeaf() const {
36
+ return m_map.empty();
37
+ }
38
+
39
+ bool HasRules() const {
40
+ return !m_targetPhraseCollection->IsEmpty();
41
+ }
42
+
43
+ void Prune(std::size_t tableLimit);
44
+ void Sort(std::size_t tableLimit);
45
+
46
+ Node *GetOrCreateChild(const HyperPath::NodeSeq &);
47
+
48
+ const Node *GetChild(const HyperPath::NodeSeq &) const;
49
+
50
+ TargetPhraseCollection::shared_ptr
51
+ GetTargetPhraseCollection() const {
52
+ return m_targetPhraseCollection;
53
+ }
54
+
55
+ TargetPhraseCollection::shared_ptr
56
+ GetTargetPhraseCollection() {
57
+ return m_targetPhraseCollection;
58
+ }
59
+
60
+ const Map &GetMap() const {
61
+ return m_map;
62
+ }
63
+
64
+ Node() : m_targetPhraseCollection(new TargetPhraseCollection) { }
65
+
66
+ private:
67
+ Map m_map;
68
+ TargetPhraseCollection::shared_ptr m_targetPhraseCollection;
69
+ };
70
+
71
+ HyperTree(const RuleTableFF *ff) : RuleTable(ff) { }
72
+
73
+ const Node &GetRootNode() const {
74
+ return m_root;
75
+ }
76
+
77
+ private:
78
+ friend class HyperTreeCreator;
79
+
80
+ TargetPhraseCollection::shared_ptr
81
+ GetOrCreateTargetPhraseCollection(const HyperPath &);
82
+
83
+ Node &GetOrCreateNode(const HyperPath &);
84
+
85
+ void SortAndPrune(std::size_t);
86
+
87
+ Node m_root;
88
+ };
89
+
90
+ } // namespace F2S
91
+ } // namespace Syntax
92
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.cpp ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "HyperTreeLoader.h"
2
+
3
+ #include <sys/stat.h>
4
+
5
+ #include <cmath>
6
+ #include <cstdlib>
7
+ #include <fstream>
8
+ #include <string>
9
+ #include <iterator>
10
+ #include <algorithm>
11
+ #include <iostream>
12
+
13
+ #include "moses/FactorCollection.h"
14
+ #include "moses/Word.h"
15
+ #include "moses/Util.h"
16
+ #include "moses/Timer.h"
17
+ #include "moses/InputFileStream.h"
18
+ #include "moses/StaticData.h"
19
+ #include "moses/Range.h"
20
+ #include "moses/ChartTranslationOptionList.h"
21
+ #include "moses/FactorCollection.h"
22
+ #include "moses/Syntax/RuleTableFF.h"
23
+ #include "moses/parameters/AllOptions.h"
24
+ #include "util/file_piece.hh"
25
+ #include "util/string_piece.hh"
26
+ #include "util/tokenize_piece.hh"
27
+ #include "util/double-conversion/double-conversion.h"
28
+ #include "util/exception.hh"
29
+
30
+ #include "HyperPath.h"
31
+ #include "HyperPathLoader.h"
32
+ #include "HyperTree.h"
33
+
34
+ namespace Moses
35
+ {
36
+
37
+ namespace Syntax
38
+ {
39
+ namespace F2S
40
+ {
41
+
42
+ bool HyperTreeLoader::Load(AllOptions const& opts,
43
+ const std::vector<FactorType> &input,
44
+ const std::vector<FactorType> &output,
45
+ const std::string &inFile,
46
+ const RuleTableFF &ff,
47
+ HyperTree &trie,
48
+ boost::unordered_set<std::size_t> &sourceTermSet)
49
+ {
50
+ PrintUserTime(std::string("Start loading HyperTree"));
51
+
52
+ sourceTermSet.clear();
53
+
54
+ std::size_t count = 0;
55
+
56
+ std::ostream *progress = NULL;
57
+ IFVERBOSE(1) progress = &std::cerr;
58
+ util::FilePiece in(inFile.c_str(), progress);
59
+
60
+ // reused variables
61
+ std::vector<float> scoreVector;
62
+ StringPiece line;
63
+
64
+ double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
65
+
66
+ HyperPathLoader hyperPathLoader;
67
+
68
+ Phrase dummySourcePhrase;
69
+ {
70
+ Word *lhs = NULL;
71
+ dummySourcePhrase.CreateFromString(Input, input, "hello", &lhs);
72
+ delete lhs;
73
+ }
74
+
75
+ while(true) {
76
+ try {
77
+ line = in.ReadLine();
78
+ } catch (const util::EndOfFileException &e) {
79
+ break;
80
+ }
81
+
82
+ util::TokenIter<util::MultiCharacter> pipes(line, "|||");
83
+ StringPiece sourceString(*pipes);
84
+ StringPiece targetString(*++pipes);
85
+ StringPiece scoreString(*++pipes);
86
+
87
+ StringPiece alignString;
88
+ if (++pipes) {
89
+ StringPiece temp(*pipes);
90
+ alignString = temp;
91
+ }
92
+
93
+ ++pipes; // counts
94
+
95
+ scoreVector.clear();
96
+ for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
97
+ int processed;
98
+ float score = converter.StringToFloat(s->data(), s->length(), &processed);
99
+ UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count);
100
+ scoreVector.push_back(FloorScore(TransformScore(score)));
101
+ }
102
+ const std::size_t numScoreComponents = ff.GetNumScoreComponents();
103
+ if (scoreVector.size() != numScoreComponents) {
104
+ UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
105
+ << numScoreComponents << ") of score components on line " << count);
106
+ }
107
+
108
+ // Source-side
109
+ HyperPath sourceFragment;
110
+ hyperPathLoader.Load(sourceString, sourceFragment);
111
+ ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
112
+
113
+ // Target-side
114
+ TargetPhrase *targetPhrase = new TargetPhrase(&ff);
115
+ Word *targetLHS = NULL;
116
+ targetPhrase->CreateFromString(Output, output, targetString, &targetLHS);
117
+ targetPhrase->SetTargetLHS(targetLHS);
118
+ targetPhrase->SetAlignmentInfo(alignString);
119
+
120
+ if (++pipes) {
121
+ StringPiece sparseString(*pipes);
122
+ targetPhrase->SetSparseScore(&ff, sparseString);
123
+ }
124
+
125
+ if (++pipes) {
126
+ StringPiece propertiesString(*pipes);
127
+ targetPhrase->SetProperties(propertiesString);
128
+ }
129
+
130
+ targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector);
131
+ targetPhrase->EvaluateInIsolation(dummySourcePhrase,
132
+ ff.GetFeaturesToApply());
133
+
134
+ // Add rule to trie.
135
+ TargetPhraseCollection::shared_ptr phraseColl
136
+ = GetOrCreateTargetPhraseCollection(trie, sourceFragment);
137
+ phraseColl->Add(targetPhrase);
138
+
139
+ count++;
140
+ }
141
+
142
+ // sort and prune each target phrase collection
143
+ if (ff.GetTableLimit()) {
144
+ SortAndPrune(trie, ff.GetTableLimit());
145
+ }
146
+
147
+ return true;
148
+ }
149
+
150
+ void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
151
+ const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
152
+ {
153
+ for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
154
+ p != hp.nodeSeqs.end(); ++p) {
155
+ for (std::vector<std::size_t>::const_iterator q = p->begin();
156
+ q != p->end(); ++q) {
157
+ const std::size_t factorId = *q;
158
+ if (factorId >= moses_MaxNumNonterminals &&
159
+ factorId != HyperPath::kComma &&
160
+ factorId != HyperPath::kEpsilon) {
161
+ sourceTerminalSet.insert(factorId);
162
+ }
163
+ }
164
+ }
165
+ }
166
+
167
+ } // namespace F2S
168
+ } // namespace Syntax
169
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/HyperTreeLoader.h ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <istream>
4
+ #include <vector>
5
+
6
+ #include <boost/unordered_set.hpp>
7
+
8
+ #include "moses/TypeDef.h"
9
+ #include "moses/Syntax/RuleTableFF.h"
10
+
11
+ #include "HyperPath.h"
12
+ #include "HyperTree.h"
13
+ #include "HyperTreeCreator.h"
14
+
15
+ namespace Moses
16
+ {
17
+ class AllOptions;
18
+ namespace Syntax
19
+ {
20
+ namespace F2S
21
+ {
22
+
23
+ class HyperTreeLoader : public HyperTreeCreator
24
+ {
25
+ public:
26
+ bool Load(AllOptions const& opts,
27
+ const std::vector<FactorType> &input,
28
+ const std::vector<FactorType> &output,
29
+ const std::string &inFile,
30
+ const RuleTableFF &,
31
+ HyperTree &,
32
+ boost::unordered_set<std::size_t> &);
33
+
34
+ private:
35
+ void ExtractSourceTerminalSetFromHyperPath(
36
+ const HyperPath &, boost::unordered_set<std::size_t> &);
37
+ };
38
+
39
+ } // namespace F2S
40
+ } // namespace Syntax
41
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/PHyperedgeToSHyperedgeBundle.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "moses/Syntax/PHyperedge.h"
4
+ #include "moses/Syntax/PVertex.h"
5
+ #include "moses/Syntax/SHyperedgeBundle.h"
6
+
7
+ #include "PVertexToStackMap.h"
8
+
9
+ namespace Moses
10
+ {
11
+ namespace Syntax
12
+ {
13
+ namespace F2S
14
+ {
15
+
16
+ // Given a PHyperedge object and SStackSet produces a SHyperedgeBundle object.
17
+ inline void PHyperedgeToSHyperedgeBundle(const PHyperedge &hyperedge,
18
+ const PVertexToStackMap &stackMap,
19
+ SHyperedgeBundle &bundle)
20
+ {
21
+ bundle.inputWeight = hyperedge.label.inputWeight;
22
+ bundle.translations = hyperedge.label.translations;
23
+ bundle.stacks.clear();
24
+ for (std::vector<PVertex*>::const_iterator p = hyperedge.tail.begin();
25
+ p != hyperedge.tail.end(); ++p) {
26
+ PVertexToStackMap::const_iterator q = stackMap.find(*p);
27
+ const SVertexStack &stack = q->second;
28
+ bundle.stacks.push_back(&stack);
29
+ }
30
+ }
31
+
32
+ } // F2S
33
+ } // Syntax
34
+ } // Moses
mosesdecoder/moses/Syntax/F2S/PVertexToStackMap.h ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <boost/unordered_map.hpp>
4
+
5
+ #include "moses/Syntax/PVertex.h"
6
+ #include "moses/Syntax/SVertexStack.h"
7
+
8
+
9
+ namespace Moses
10
+ {
11
+ namespace Syntax
12
+ {
13
+ namespace F2S
14
+ {
15
+
16
+ typedef boost::unordered_map<const PVertex *, SVertexStack> PVertexToStackMap;
17
+
18
+ } // namespace F2S
19
+ } // namespace Syntax
20
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/RuleMatcherCallback.h ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "moses/Syntax/BoundedPriorityContainer.h"
4
+ #include "moses/Syntax/PHyperedge.h"
5
+ #include "moses/Syntax/PVertex.h"
6
+ #include "moses/Syntax/SHyperedgeBundle.h"
7
+ #include "moses/Syntax/SHyperedgeBundleScorer.h"
8
+
9
+ #include "PHyperedgeToSHyperedgeBundle.h"
10
+ #include "PVertexToStackMap.h"
11
+
12
+ namespace Moses
13
+ {
14
+ namespace Syntax
15
+ {
16
+ namespace F2S
17
+ {
18
+
19
+ class RuleMatcherCallback
20
+ {
21
+ private:
22
+ typedef BoundedPriorityContainer<SHyperedgeBundle> Container;
23
+
24
+ public:
25
+ RuleMatcherCallback(const PVertexToStackMap &stackMap, std::size_t ruleLimit)
26
+ : m_stackMap(stackMap)
27
+ , m_container(ruleLimit) {}
28
+
29
+ void operator()(const PHyperedge &hyperedge) {
30
+ PHyperedgeToSHyperedgeBundle(hyperedge, m_stackMap, m_tmpBundle);
31
+ float score = SHyperedgeBundleScorer::Score(m_tmpBundle);
32
+ m_container.SwapIn(m_tmpBundle, score);
33
+ }
34
+
35
+ void ClearContainer() {
36
+ m_container.LazyClear();
37
+ }
38
+
39
+ const Container &GetContainer() {
40
+ return m_container;
41
+ }
42
+
43
+ private:
44
+ const PVertexToStackMap &m_stackMap;
45
+ SHyperedgeBundle m_tmpBundle;
46
+ BoundedPriorityContainer<SHyperedgeBundle> m_container;
47
+ };
48
+
49
+ } // F2S
50
+ } // Syntax
51
+ } // Moses
mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree-inl.h ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ namespace Moses
4
+ {
5
+ namespace Syntax
6
+ {
7
+ namespace F2S
8
+ {
9
+
10
+ template<typename Callback>
11
+ RuleMatcherHyperTree<Callback>::RuleMatcherHyperTree(const HyperTree &ruleTrie)
12
+ : m_ruleTrie(ruleTrie)
13
+ {
14
+ }
15
+
16
+ template<typename Callback>
17
+ void RuleMatcherHyperTree<Callback>::EnumerateHyperedges(
18
+ const Forest::Vertex &v, Callback &callback)
19
+ {
20
+ const HyperTree::Node &root = m_ruleTrie.GetRootNode();
21
+ HyperPath::NodeSeq nodeSeq(1, v.pvertex.symbol[0]->GetId());
22
+ const HyperTree::Node *child = root.GetChild(nodeSeq);
23
+ if (!child) {
24
+ return;
25
+ }
26
+
27
+ m_hyperedge.head = const_cast<PVertex*>(&v.pvertex);
28
+
29
+ // Initialize the queue.
30
+ MatchItem item;
31
+ item.annotatedFNS.fns = FNS(1, &v);
32
+ item.trieNode = child;
33
+ m_queue.push(item);
34
+
35
+ while (!m_queue.empty()) {
36
+ MatchItem item = m_queue.front();
37
+ m_queue.pop();
38
+ if (item.trieNode->HasRules()) {
39
+ const FNS &fns = item.annotatedFNS.fns;
40
+ // Set the output hyperedge's tail.
41
+ m_hyperedge.tail.clear();
42
+ for (FNS::const_iterator p = fns.begin(); p != fns.end(); ++p) {
43
+ const Forest::Vertex *v = *p;
44
+ m_hyperedge.tail.push_back(const_cast<PVertex *>(&(v->pvertex)));
45
+ }
46
+ // Set the output hyperedge label's input weight.
47
+ m_hyperedge.label.inputWeight = 0.0f;
48
+ for (std::vector<const Forest::Hyperedge *>::const_iterator
49
+ p = item.annotatedFNS.fragment.begin();
50
+ p != item.annotatedFNS.fragment.end(); ++p) {
51
+ m_hyperedge.label.inputWeight += (*p)->weight;
52
+ }
53
+ // Set the output hyperedge label's translation set pointer.
54
+ m_hyperedge.label.translations
55
+ = item.trieNode->GetTargetPhraseCollection();
56
+ // Pass the output hyperedge to the callback.
57
+ callback(m_hyperedge);
58
+ }
59
+ PropagateNextLexel(item);
60
+ }
61
+ }
62
+
63
+ template<typename Callback>
64
+ void RuleMatcherHyperTree<Callback>::PropagateNextLexel(const MatchItem &item)
65
+ {
66
+ std::vector<AnnotatedFNS> tfns;
67
+ std::vector<AnnotatedFNS> rfns;
68
+ std::vector<AnnotatedFNS> rfns2;
69
+
70
+ const HyperTree::Node &trieNode = *(item.trieNode);
71
+ const HyperTree::Node::Map &map = trieNode.GetMap();
72
+
73
+ for (HyperTree::Node::Map::const_iterator p = map.begin();
74
+ p != map.end(); ++p) {
75
+ const HyperPath::NodeSeq &edgeLabel = p->first;
76
+ const HyperTree::Node &child = p->second;
77
+
78
+ const int numSubSeqs = CountCommas(edgeLabel) + 1;
79
+
80
+ std::size_t pos = 0;
81
+ for (int i = 0; i < numSubSeqs; ++i) {
82
+ const FNS &fns = item.annotatedFNS.fns;
83
+ tfns.clear();
84
+ if (edgeLabel[pos] == HyperPath::kEpsilon) {
85
+ AnnotatedFNS x;
86
+ x.fns = FNS(1, fns[i]);
87
+ tfns.push_back(x);
88
+ pos += 2;
89
+ } else {
90
+ const int subSeqLength = SubSeqLength(edgeLabel, pos);
91
+ const std::vector<Forest::Hyperedge*> &incoming = fns[i]->incoming;
92
+ for (std::vector<Forest::Hyperedge *>::const_iterator q =
93
+ incoming.begin(); q != incoming.end(); ++q) {
94
+ const Forest::Hyperedge &edge = **q;
95
+ if (MatchChildren(edge.tail, edgeLabel, pos, subSeqLength)) {
96
+ tfns.resize(tfns.size()+1);
97
+ tfns.back().fns.assign(edge.tail.begin(), edge.tail.end());
98
+ tfns.back().fragment.push_back(&edge);
99
+ }
100
+ }
101
+ pos += subSeqLength + 1;
102
+ }
103
+ if (tfns.empty()) {
104
+ rfns.clear();
105
+ break;
106
+ } else if (i == 0) {
107
+ rfns.swap(tfns);
108
+ } else {
109
+ CartesianProduct(rfns, tfns, rfns2);
110
+ rfns.swap(rfns2);
111
+ }
112
+ }
113
+
114
+ for (typename std::vector<AnnotatedFNS>::const_iterator q = rfns.begin();
115
+ q != rfns.end(); ++q) {
116
+ MatchItem newItem;
117
+ newItem.annotatedFNS.fns = q->fns;
118
+ newItem.annotatedFNS.fragment = item.annotatedFNS.fragment;
119
+ newItem.annotatedFNS.fragment.insert(newItem.annotatedFNS.fragment.end(),
120
+ q->fragment.begin(),
121
+ q->fragment.end());
122
+ newItem.trieNode = &child;
123
+ m_queue.push(newItem);
124
+ }
125
+ }
126
+ }
127
+
128
+ template<typename Callback>
129
+ void RuleMatcherHyperTree<Callback>::CartesianProduct(
130
+ const std::vector<AnnotatedFNS> &x,
131
+ const std::vector<AnnotatedFNS> &y,
132
+ std::vector<AnnotatedFNS> &z)
133
+ {
134
+ z.clear();
135
+ z.reserve(x.size() * y.size());
136
+ for (typename std::vector<AnnotatedFNS>::const_iterator p = x.begin();
137
+ p != x.end(); ++p) {
138
+ const AnnotatedFNS &a = *p;
139
+ for (typename std::vector<AnnotatedFNS>::const_iterator q = y.begin();
140
+ q != y.end(); ++q) {
141
+ const AnnotatedFNS &b = *q;
142
+ // Create a new AnnotatedFNS.
143
+ z.resize(z.size()+1);
144
+ AnnotatedFNS &c = z.back();
145
+ // Combine frontier node sequences from a and b.
146
+ c.fns.reserve(a.fns.size() + b.fns.size());
147
+ c.fns.assign(a.fns.begin(), a.fns.end());
148
+ c.fns.insert(c.fns.end(), b.fns.begin(), b.fns.end());
149
+ // Combine tree fragments from a and b.
150
+ c.fragment.reserve(a.fragment.size() + b.fragment.size());
151
+ c.fragment.assign(a.fragment.begin(), a.fragment.end());
152
+ c.fragment.insert(c.fragment.end(), b.fragment.begin(), b.fragment.end());
153
+ }
154
+ }
155
+ }
156
+
157
+ template<typename Callback>
158
+ bool RuleMatcherHyperTree<Callback>::MatchChildren(
159
+ const std::vector<Forest::Vertex *> &children,
160
+ const HyperPath::NodeSeq &edgeLabel,
161
+ std::size_t pos,
162
+ std::size_t subSeqSize)
163
+ {
164
+ if (children.size() != subSeqSize) {
165
+ return false;
166
+ }
167
+ for (size_t i = 0; i < subSeqSize; ++i) {
168
+ if (edgeLabel[pos+i] != children[i]->pvertex.symbol[0]->GetId()) {
169
+ return false;
170
+ }
171
+ }
172
+ return true;
173
+ }
174
+
175
+ template<typename Callback>
176
+ int RuleMatcherHyperTree<Callback>::CountCommas(const HyperPath::NodeSeq &seq)
177
+ {
178
+ int count = 0;
179
+ for (std::vector<std::size_t>::const_iterator p = seq.begin();
180
+ p != seq.end(); ++p) {
181
+ if (*p == HyperPath::kComma) {
182
+ ++count;
183
+ }
184
+ }
185
+ return count;
186
+ }
187
+
188
+ template<typename Callback>
189
+ int RuleMatcherHyperTree<Callback>::SubSeqLength(const HyperPath::NodeSeq &seq,
190
+ int pos)
191
+ {
192
+ int length = 0;
193
+ HyperPath::NodeSeq::size_type curpos = pos;
194
+ while (curpos != seq.size() && seq[curpos] != HyperPath::kComma) {
195
+ ++curpos;
196
+ ++length;
197
+ }
198
+ return length;
199
+ }
200
+
201
+ } // namespace F2S
202
+ } // namespace Syntax
203
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/RuleMatcherHyperTree.h ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "moses/Syntax/PHyperedge.h"
4
+
5
+ #include "Forest.h"
6
+ #include "HyperTree.h"
7
+ #include "RuleMatcher.h"
8
+
9
+ namespace Moses
10
+ {
11
+ namespace Syntax
12
+ {
13
+ namespace F2S
14
+ {
15
+
16
+ // Rule matcher based on the algorithm from this paper:
17
+ //
18
+ // Hui Zhang, Min Zhang, Haizhou Li, and Chew Lim Tan
19
+ // "Fast Translation Rule Matching for Syntax-based Statistical Machine
20
+ // Translation"
21
+ // In proceedings of EMNLP 2009
22
+ //
23
+ template<typename Callback>
24
+ class RuleMatcherHyperTree : public RuleMatcher<Callback>
25
+ {
26
+ public:
27
+ RuleMatcherHyperTree(const HyperTree &);
28
+
29
+ ~RuleMatcherHyperTree() {}
30
+
31
+ void EnumerateHyperedges(const Forest::Vertex &, Callback &);
32
+
33
+ private:
34
+ // Frontier node sequence.
35
+ typedef std::vector<const Forest::Vertex *> FNS;
36
+
37
+ // An AnnotatedFNS is a FNS annotated with the set of forest hyperedges that
38
+ // constitute the tree fragment from which it was derived.
39
+ struct AnnotatedFNS {
40
+ FNS fns;
41
+ std::vector<const Forest::Hyperedge *> fragment;
42
+ };
43
+
44
+ // A MatchItem is like the FP structure in Zhang et al. (2009), but it also
45
+ // records the set of forest hyperedges that constitute the matched tree
46
+ // fragment.
47
+ struct MatchItem {
48
+ AnnotatedFNS annotatedFNS;
49
+ const HyperTree::Node *trieNode;
50
+ };
51
+
52
+ // Implements the Cartsian product operation from line 16 of Algorithm 4
53
+ // (Zhang et al., 2009), which in this implementation also involves
54
+ // combining the fragment information associated with the FNS objects.
55
+ void CartesianProduct(const std::vector<AnnotatedFNS> &,
56
+ const std::vector<AnnotatedFNS> &,
57
+ std::vector<AnnotatedFNS> &);
58
+
59
+ int CountCommas(const HyperPath::NodeSeq &);
60
+
61
+ bool MatchChildren(const std::vector<Forest::Vertex *> &,
62
+ const HyperPath::NodeSeq &, std::size_t, std::size_t);
63
+
64
+ void PropagateNextLexel(const MatchItem &);
65
+
66
+ int SubSeqLength(const HyperPath::NodeSeq &, int);
67
+
68
+ const HyperTree &m_ruleTrie;
69
+ PHyperedge m_hyperedge;
70
+ std::queue<MatchItem> m_queue; // Called "SFP" in Zhang et al. (2009)
71
+ };
72
+
73
+ } // namespace F2S
74
+ } // namespace Syntax
75
+ } // namespace Moses
76
+
77
+ // Implementation
78
+ #include "RuleMatcherHyperTree-inl.h"
mosesdecoder/moses/Syntax/F2S/TopologicalSorter.cpp ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "TopologicalSorter.h"
2
+
3
+ namespace Moses
4
+ {
5
+ namespace Syntax
6
+ {
7
+ namespace F2S
8
+ {
9
+
10
+ void TopologicalSorter::Sort(const Forest &forest,
11
+ std::vector<const Forest::Vertex *> &permutation)
12
+ {
13
+ permutation.clear();
14
+ BuildPredSets(forest);
15
+ m_visited.clear();
16
+ for (std::vector<Forest::Vertex *>::const_iterator
17
+ p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
18
+ if (m_visited.find(*p) == m_visited.end()) {
19
+ Visit(**p, permutation);
20
+ }
21
+ }
22
+ }
23
+
24
+ void TopologicalSorter::BuildPredSets(const Forest &forest)
25
+ {
26
+ m_predSets.clear();
27
+ for (std::vector<Forest::Vertex *>::const_iterator
28
+ p = forest.vertices.begin(); p != forest.vertices.end(); ++p) {
29
+ const Forest::Vertex *head = *p;
30
+ for (std::vector<Forest::Hyperedge *>::const_iterator
31
+ q = head->incoming.begin(); q != head->incoming.end(); ++q) {
32
+ for (std::vector<Forest::Vertex *>::const_iterator
33
+ r = (*q)->tail.begin(); r != (*q)->tail.end(); ++r) {
34
+ m_predSets[head].insert(*r);
35
+ }
36
+ }
37
+ }
38
+ }
39
+
40
+ void TopologicalSorter::Visit(const Forest::Vertex &v,
41
+ std::vector<const Forest::Vertex *> &permutation)
42
+ {
43
+ m_visited.insert(&v);
44
+ const VertexSet &predSet = m_predSets[&v];
45
+ for (VertexSet::const_iterator p = predSet.begin(); p != predSet.end(); ++p) {
46
+ if (m_visited.find(*p) == m_visited.end()) {
47
+ Visit(**p, permutation);
48
+ }
49
+ }
50
+ permutation.push_back(&v);
51
+ }
52
+
53
+ } // namespace F2S
54
+ } // namespace Syntax
55
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/TopologicalSorter.h ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <vector>
4
+
5
+ #include <boost/unordered_map.hpp>
6
+ #include <boost/unordered_set.hpp>
7
+
8
+ #include "Forest.h"
9
+
10
+ namespace Moses
11
+ {
12
+ namespace Syntax
13
+ {
14
+ namespace F2S
15
+ {
16
+
17
+ class TopologicalSorter
18
+ {
19
+ public:
20
+ void Sort(const Forest &, std::vector<const Forest::Vertex *> &);
21
+
22
+ private:
23
+ typedef boost::unordered_set<const Forest::Vertex *> VertexSet;
24
+
25
+ void BuildPredSets(const Forest &);
26
+ void Visit(const Forest::Vertex &, std::vector<const Forest::Vertex *> &);
27
+
28
+ boost::unordered_set<const Forest::Vertex *> m_visited;
29
+ boost::unordered_map<const Forest::Vertex *, VertexSet> m_predSets;
30
+ };
31
+
32
+ } // namespace F2S
33
+ } // namespace Syntax
34
+ } // namespace Moses
mosesdecoder/moses/Syntax/F2S/TreeFragmentTokenizer.cpp ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "TreeFragmentTokenizer.h"
2
+
3
+ #include <cctype>
4
+
5
+ namespace Moses
6
+ {
7
+ namespace Syntax
8
+ {
9
+ namespace F2S
10
+ {
11
+
12
+ TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t,
13
+ StringPiece v, std::size_t p)
14
+ : type(t)
15
+ , value(v)
16
+ , pos(p)
17
+ {
18
+ }
19
+
20
+ TreeFragmentTokenizer::TreeFragmentTokenizer()
21
+ : value_(TreeFragmentToken_EOS, "", -1)
22
+ {
23
+ }
24
+
25
+ TreeFragmentTokenizer::TreeFragmentTokenizer(const StringPiece &s)
26
+ : str_(s)
27
+ , value_(TreeFragmentToken_EOS, "", -1)
28
+ , iter_(s.begin())
29
+ , end_(s.end())
30
+ , pos_(0)
31
+ {
32
+ ++(*this);
33
+ }
34
+
35
+ TreeFragmentTokenizer &TreeFragmentTokenizer::operator++()
36
+ {
37
+ while (iter_ != end_ && (*iter_ == ' ' || *iter_ == '\t')) {
38
+ ++iter_;
39
+ ++pos_;
40
+ }
41
+
42
+ if (iter_ == end_) {
43
+ value_ = TreeFragmentToken(TreeFragmentToken_EOS, "", pos_);
44
+ return *this;
45
+ }
46
+
47
+ if (*iter_ == '[') {
48
+ value_ = TreeFragmentToken(TreeFragmentToken_LSB, "[", pos_);
49
+ ++iter_;
50
+ ++pos_;
51
+ } else if (*iter_ == ']') {
52
+ value_ = TreeFragmentToken(TreeFragmentToken_RSB, "]", pos_);
53
+ ++iter_;
54
+ ++pos_;
55
+ } else {
56
+ std::size_t start = pos_;
57
+ while (true) {
58
+ ++iter_;
59
+ ++pos_;
60
+ if (iter_ == end_ || *iter_ == ' ' || *iter_ == '\t') {
61
+ break;
62
+ }
63
+ if (*iter_ == '[' || *iter_ == ']') {
64
+ break;
65
+ }
66
+ }
67
+ StringPiece word = str_.substr(start, pos_-start);
68
+ value_ = TreeFragmentToken(TreeFragmentToken_WORD, word, start);
69
+ }
70
+
71
+ return *this;
72
+ }
73
+
74
+ TreeFragmentTokenizer TreeFragmentTokenizer::operator++(int)
75
+ {
76
+ TreeFragmentTokenizer tmp(*this);
77
+ ++*this;
78
+ return tmp;
79
+ }
80
+
81
+ bool operator==(const TreeFragmentTokenizer &lhs,
82
+ const TreeFragmentTokenizer &rhs)
83
+ {
84
+ if (lhs.value_.type == TreeFragmentToken_EOS ||
85
+ rhs.value_.type == TreeFragmentToken_EOS) {
86
+ return lhs.value_.type == TreeFragmentToken_EOS &&
87
+ rhs.value_.type == TreeFragmentToken_EOS;
88
+ }
89
+ return lhs.iter_ == rhs.iter_;
90
+ }
91
+
92
+ bool operator!=(const TreeFragmentTokenizer &lhs,
93
+ const TreeFragmentTokenizer &rhs)
94
+ {
95
+ return !(lhs == rhs);
96
+ }
97
+
98
+ } // namespace F2S
99
+ } // namespace Syntax
100
+ } // namespace Moses
mosesdecoder/moses/Syntax/Manager.h ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <boost/unordered_set.hpp>
4
+ #include "moses/InputType.h"
5
+ #include "moses/BaseManager.h"
6
+
7
+ #include "KBestExtractor.h"
8
+
9
+ namespace Moses
10
+ {
11
+ namespace Syntax
12
+ {
13
+
14
+ // Common base class for Moses::Syntax managers.
15
+ class Manager : public BaseManager
16
+ {
17
+ public:
18
+ Manager(ttasksptr const& ttask);
19
+
20
+ // Virtual functions from Moses::BaseManager that are implemented the same
21
+ // way for all Syntax managers.
22
+ void OutputBest(OutputCollector *collector) const;
23
+ void OutputNBest(OutputCollector *collector) const;
24
+ void OutputUnknowns(OutputCollector *collector) const;
25
+
26
+ // Virtual functions from Moses::BaseManager that are no-ops for all Syntax
27
+ // managers.
28
+ void OutputAlignment(OutputCollector *collector) const {}
29
+ void OutputDetailedTreeFragmentsTranslationReport(
30
+ OutputCollector *collector) const {}
31
+ void OutputLatticeSamples(OutputCollector *collector) const {}
32
+ void OutputSearchGraph(OutputCollector *collector) const {}
33
+ // void OutputSearchGraphHypergraph() const {}
34
+
35
+ void
36
+ OutputSearchGraphAsHypergraph
37
+ ( std::string const& fname, size_t const precision ) const
38
+ { }
39
+
40
+ void OutputSearchGraphSLF() const {}
41
+ void OutputWordGraph(OutputCollector *collector) const {}
42
+ void OutputDetailedTranslationReport(OutputCollector *collector) const {}
43
+
44
+ void CalcDecoderStatistics() const {}
45
+
46
+ // Syntax-specific virtual functions that derived classes must implement.
47
+ virtual void ExtractKBest(
48
+ std::size_t k,
49
+ std::vector<boost::shared_ptr<KBestExtractor::Derivation> > &kBestList,
50
+ bool onlyDistinct=false) const = 0;
51
+ virtual const SHyperedge *GetBestSHyperedge() const = 0;
52
+
53
+ protected:
54
+ boost::unordered_set<Word> m_oovs;
55
+
56
+ private:
57
+ // Syntax-specific helper functions used to implement OutputNBest.
58
+ void OutputNBestList(OutputCollector *collector,
59
+ const KBestExtractor::KBestVec &nBestList,
60
+ long translationId) const;
61
+
62
+ std::size_t OutputAlignmentNBest(Alignments &retAlign,
63
+ const KBestExtractor::Derivation &d,
64
+ std::size_t startTarget) const;
65
+
66
+ std::size_t CalcSourceSize(const KBestExtractor::Derivation &d) const;
67
+ };
68
+
69
+ } // Syntax
70
+ } // Moses
mosesdecoder/moses/Syntax/PVertex.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "moses/Word.h"
4
+ #include "moses/Range.h"
5
+
6
+ namespace Moses
7
+ {
8
+ namespace Syntax
9
+ {
10
+
11
+ struct PVertex {
12
+ public:
13
+ PVertex(const Range &wr, const Word &w) : span(wr), symbol(w) {}
14
+
15
+ Range span;
16
+ Word symbol;
17
+ };
18
+
19
+ inline bool operator==(const PVertex &v, const PVertex &w)
20
+ {
21
+ return v.span == w.span && v.symbol == w.symbol;
22
+ }
23
+
24
+ } // Syntax
25
+ } // Moses
mosesdecoder/moses/Syntax/RuleTable.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ namespace Moses
4
+ {
5
+ namespace Syntax
6
+ {
7
+
8
+ class RuleTableFF;
9
+
10
+ // Base class for any data structure representing a synchronous
11
+ // grammar, like a trie (for S2T) or a DFA (for T2S).
12
+ class RuleTable
13
+ {
14
+ public:
15
+ RuleTable(const RuleTableFF *ff) : m_ff(ff) {}
16
+
17
+ virtual ~RuleTable() {}
18
+
19
+ protected:
20
+ const RuleTableFF *m_ff;
21
+ };
22
+
23
+ } // Syntax
24
+ } // Moses