suricodes commited on
Commit
91059e1
·
verified ·
1 Parent(s): 1d88c2e

Upload 235 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +18 -0
  2. mosesdecoder/search/Jamfile +1 -0
  3. mosesdecoder/search/applied.hh +87 -0
  4. mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/edge_generator.o +0 -0
  5. mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/nbest.o +0 -0
  6. mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/rule.o +0 -0
  7. mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/vertex.o +0 -0
  8. mosesdecoder/search/config.hh +38 -0
  9. mosesdecoder/search/context.hh +49 -0
  10. mosesdecoder/search/edge.hh +54 -0
  11. mosesdecoder/search/edge_generator.cc +116 -0
  12. mosesdecoder/search/edge_generator.hh +56 -0
  13. mosesdecoder/search/header.hh +72 -0
  14. mosesdecoder/search/nbest.cc +106 -0
  15. mosesdecoder/search/nbest.hh +80 -0
  16. mosesdecoder/search/rule.cc +43 -0
  17. mosesdecoder/search/rule.hh +25 -0
  18. mosesdecoder/search/types.hh +31 -0
  19. mosesdecoder/search/vertex.cc +204 -0
  20. mosesdecoder/search/vertex.hh +196 -0
  21. mosesdecoder/search/vertex_generator.hh +75 -0
  22. mosesdecoder/symal/Jamfile +2 -0
  23. mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/cmd.o +0 -0
  24. mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal +0 -0
  25. mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal.o +0 -0
  26. mosesdecoder/symal/cmd.c +642 -0
  27. mosesdecoder/symal/cmd.h +35 -0
  28. mosesdecoder/symal/symal.cpp +518 -0
  29. mosesdecoder/symal/symal.vcproj +204 -0
  30. mosesdecoder/util/CMakeLists.txt +81 -0
  31. mosesdecoder/util/Jamfile +41 -0
  32. mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test +3 -0
  33. mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.output +8 -0
  34. mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.run +8 -0
  35. mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.test +1 -0
  36. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing.o +0 -0
  37. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test +3 -0
  38. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.o +0 -0
  39. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.passed +0 -0
  40. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed +0 -0
  41. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed_main.o +0 -0
  42. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/ersatz_progress.o +0 -0
  43. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/exception.o +0 -0
  44. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file.o +0 -0
  45. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece.o +0 -0
  46. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece_test.o +0 -0
  47. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/float_to_string.o +0 -0
  48. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string.o +0 -0
  49. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test +3 -0
  50. mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test.o +0 -0
.gitattributes CHANGED
@@ -126,3 +126,21 @@ mosesdecoder/phrase-extract/syntax-common/bin/gcc-9/release/link-static/threadin
126
  mosesdecoder/phrase-extract/syntax-common/bin/gcc-9/release/link-static/threading-multi/tree_test filter=lfs diff=lfs merge=lfs -text
127
  mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/CreateProbingPT filter=lfs diff=lfs merge=lfs -text
128
  mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/libprobingpt.a filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  mosesdecoder/phrase-extract/syntax-common/bin/gcc-9/release/link-static/threading-multi/tree_test filter=lfs diff=lfs merge=lfs -text
127
  mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/CreateProbingPT filter=lfs diff=lfs merge=lfs -text
128
  mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/libprobingpt.a filter=lfs diff=lfs merge=lfs -text
129
+ mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test filter=lfs diff=lfs merge=lfs -text
130
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test filter=lfs diff=lfs merge=lfs -text
131
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test filter=lfs diff=lfs merge=lfs -text
132
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/joint_sort_test filter=lfs diff=lfs merge=lfs -text
133
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/multi_intersection_test filter=lfs diff=lfs merge=lfs -text
134
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/pcqueue_test filter=lfs diff=lfs merge=lfs -text
135
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/probing_hash_table_test filter=lfs diff=lfs merge=lfs -text
136
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/random_test filter=lfs diff=lfs merge=lfs -text
137
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/sized_iterator_test filter=lfs diff=lfs merge=lfs -text
138
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/sorted_uniform_test filter=lfs diff=lfs merge=lfs -text
139
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/string_stream_test filter=lfs diff=lfs merge=lfs -text
140
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/tempfile_test filter=lfs diff=lfs merge=lfs -text
141
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/tokenize_piece_test filter=lfs diff=lfs merge=lfs -text
142
+ mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/tokenize_test filter=lfs diff=lfs merge=lfs -text
143
+ mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/io_test filter=lfs diff=lfs merge=lfs -text
144
+ mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/rewindable_stream_test filter=lfs diff=lfs merge=lfs -text
145
+ mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/sort_test filter=lfs diff=lfs merge=lfs -text
146
+ mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/stream_test filter=lfs diff=lfs merge=lfs -text
mosesdecoder/search/Jamfile ADDED
@@ -0,0 +1 @@
 
 
1
+ fakelib search : edge_generator.cc nbest.cc rule.cc vertex.cc ../lm//kenlm ../util//kenutil /top//boost_system : : : <include>.. ;
mosesdecoder/search/applied.hh ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_APPLIED__
2
+ #define SEARCH_APPLIED__
3
+
4
+ #include "search/edge.hh"
5
+ #include "search/header.hh"
6
+ #include "util/pool.hh"
7
+
8
+ #include <cmath>
9
+
10
+ namespace search {
11
+
12
+ // A full hypothesis: a score, arity of the rule, a pointer to the decoder's rule (Note), and pointers to non-terminals that were substituted.
13
+ template <class Below> class GenericApplied : public Header {
14
+ public:
15
+ GenericApplied() {}
16
+
17
+ GenericApplied(void *location, PartialEdge partial)
18
+ : Header(location) {
19
+ memcpy(Base(), partial.Base(), kHeaderSize);
20
+ Below *child_out = Children();
21
+ const PartialVertex *part = partial.NT();
22
+ const PartialVertex *const part_end_loop = part + partial.GetArity();
23
+ for (; part != part_end_loop; ++part, ++child_out)
24
+ *child_out = Below(part->End());
25
+ }
26
+
27
+ GenericApplied(void *location, Score score, Arity arity, Note note, Moses::Range range) : Header(location, arity) {
28
+ SetScore(score);
29
+ SetNote(note);
30
+ SetRange(range);
31
+ }
32
+
33
+ explicit GenericApplied(History from) : Header(from) {}
34
+
35
+
36
+ // These are arrays of length GetArity().
37
+ Below *Children() {
38
+ return reinterpret_cast<Below*>(After());
39
+ }
40
+ const Below *Children() const {
41
+ return reinterpret_cast<const Below*>(After());
42
+ }
43
+
44
+ static std::size_t Size(Arity arity) {
45
+ return kHeaderSize + arity * sizeof(const Below);
46
+ }
47
+ };
48
+
49
+ // Applied rule that references itself.
50
+ class Applied : public GenericApplied<Applied> {
51
+ private:
52
+ typedef GenericApplied<Applied> P;
53
+
54
+ public:
55
+ Applied() {}
56
+ Applied(void *location, PartialEdge partial) : P(location, partial) {}
57
+ Applied(History from) : P(from) {}
58
+ };
59
+
60
+ // How to build single-best hypotheses.
61
+ class SingleBest {
62
+ public:
63
+ typedef PartialEdge Combine;
64
+
65
+ void Add(PartialEdge &existing, PartialEdge add) const {
66
+ if (!existing.Valid() || existing.GetScore() < add.GetScore())
67
+ existing = add;
68
+ }
69
+
70
+ NBestComplete Complete(PartialEdge partial) {
71
+ if (!partial.Valid())
72
+ return NBestComplete(NULL, lm::ngram::ChartState(), -INFINITY);
73
+ void *place_final = pool_.Allocate(Applied::Size(partial.GetArity()));
74
+ Applied(place_final, partial);
75
+ return NBestComplete(
76
+ place_final,
77
+ partial.CompletedState(),
78
+ partial.GetScore());
79
+ }
80
+
81
+ private:
82
+ util::Pool pool_;
83
+ };
84
+
85
+ } // namespace search
86
+
87
+ #endif // SEARCH_APPLIED__
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/edge_generator.o ADDED
Binary file (44.9 kB). View file
 
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/nbest.o ADDED
Binary file (14.1 kB). View file
 
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/rule.o ADDED
Binary file (11 kB). View file
 
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/vertex.o ADDED
Binary file (24 kB). View file
 
mosesdecoder/search/config.hh ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_CONFIG__
2
+ #define SEARCH_CONFIG__
3
+
4
+ #include "search/types.hh"
5
+
6
+ namespace search {
7
+
8
+ struct NBestConfig {
9
+ explicit NBestConfig(unsigned int in_size) {
10
+ keep = in_size;
11
+ size = in_size;
12
+ }
13
+
14
+ unsigned int keep, size;
15
+ };
16
+
17
+ class Config {
18
+ public:
19
+ Config(Score lm_weight, unsigned int pop_limit, const NBestConfig &nbest) :
20
+ lm_weight_(lm_weight), pop_limit_(pop_limit), nbest_(nbest) {}
21
+
22
+ Score LMWeight() const { return lm_weight_; }
23
+
24
+ unsigned int PopLimit() const { return pop_limit_; }
25
+
26
+ const NBestConfig &GetNBest() const { return nbest_; }
27
+
28
+ private:
29
+ Score lm_weight_;
30
+
31
+ unsigned int pop_limit_;
32
+
33
+ NBestConfig nbest_;
34
+ };
35
+
36
+ } // namespace search
37
+
38
+ #endif // SEARCH_CONFIG__
mosesdecoder/search/context.hh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_CONTEXT__
2
+ #define SEARCH_CONTEXT__
3
+
4
+ #include "search/config.hh"
5
+ #include "search/vertex.hh"
6
+
7
+ #include <boost/pool/object_pool.hpp>
8
+
9
+ namespace search {
10
+
11
+ class ContextBase {
12
+ public:
13
+ explicit ContextBase(const Config &config) : config_(config) {}
14
+
15
+ VertexNode *NewVertexNode() {
16
+ VertexNode *ret = vertex_node_pool_.construct();
17
+ assert(ret);
18
+ return ret;
19
+ }
20
+
21
+ void DeleteVertexNode(VertexNode *node) {
22
+ vertex_node_pool_.destroy(node);
23
+ }
24
+
25
+ unsigned int PopLimit() const { return config_.PopLimit(); }
26
+
27
+ Score LMWeight() const { return config_.LMWeight(); }
28
+
29
+ const Config &GetConfig() const { return config_; }
30
+
31
+ private:
32
+ boost::object_pool<VertexNode> vertex_node_pool_;
33
+
34
+ Config config_;
35
+ };
36
+
37
+ template <class Model> class Context : public ContextBase {
38
+ public:
39
+ Context(const Config &config, const Model &model) : ContextBase(config), model_(model) {}
40
+
41
+ const Model &LanguageModel() const { return model_; }
42
+
43
+ private:
44
+ const Model &model_;
45
+ };
46
+
47
+ } // namespace search
48
+
49
+ #endif // SEARCH_CONTEXT__
mosesdecoder/search/edge.hh ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_EDGE__
2
+ #define SEARCH_EDGE__
3
+
4
+ #include "lm/state.hh"
5
+ #include "search/header.hh"
6
+ #include "search/types.hh"
7
+ #include "search/vertex.hh"
8
+ #include "util/pool.hh"
9
+
10
+ #include <functional>
11
+
12
+ #include <stdint.h>
13
+
14
+ namespace search {
15
+
16
+ // Copyable, but the copy will be shallow.
17
+ class PartialEdge : public Header {
18
+ public:
19
+ // Allow default construction for STL.
20
+ PartialEdge() {}
21
+
22
+ PartialEdge(util::Pool &pool, Arity arity)
23
+ : Header(pool.Allocate(Size(arity, arity + 1)), arity) {}
24
+
25
+ PartialEdge(util::Pool &pool, Arity arity, Arity chart_states)
26
+ : Header(pool.Allocate(Size(arity, chart_states)), arity) {}
27
+
28
+ // Non-terminals
29
+ const PartialVertex *NT() const {
30
+ return reinterpret_cast<const PartialVertex*>(After());
31
+ }
32
+ PartialVertex *NT() {
33
+ return reinterpret_cast<PartialVertex*>(After());
34
+ }
35
+
36
+ const lm::ngram::ChartState &CompletedState() const {
37
+ return *Between();
38
+ }
39
+ const lm::ngram::ChartState *Between() const {
40
+ return reinterpret_cast<const lm::ngram::ChartState*>(After() + GetArity() * sizeof(PartialVertex));
41
+ }
42
+ lm::ngram::ChartState *Between() {
43
+ return reinterpret_cast<lm::ngram::ChartState*>(After() + GetArity() * sizeof(PartialVertex));
44
+ }
45
+
46
+ private:
47
+ static std::size_t Size(Arity arity, Arity chart_states) {
48
+ return kHeaderSize + arity * sizeof(PartialVertex) + chart_states * sizeof(lm::ngram::ChartState);
49
+ }
50
+ };
51
+
52
+
53
+ } // namespace search
54
+ #endif // SEARCH_EDGE__
mosesdecoder/search/edge_generator.cc ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "search/edge_generator.hh"
2
+
3
+ #include "lm/left.hh"
4
+ #include "lm/model.hh"
5
+ #include "lm/partial.hh"
6
+ #include "search/context.hh"
7
+ #include "search/vertex.hh"
8
+
9
+ #include <numeric>
10
+
11
+ namespace search {
12
+
13
+ namespace {
14
+
15
+ template <class Model> void FastScore(const Context<Model> &context, Arity victim, Arity before_idx, Arity incomplete, const PartialVertex &previous_vertex, PartialEdge update) {
16
+ lm::ngram::ChartState *between = update.Between();
17
+ lm::ngram::ChartState *before = &between[before_idx], *after = &between[before_idx + 1];
18
+
19
+ float adjustment = 0.0;
20
+ const lm::ngram::ChartState &previous_reveal = previous_vertex.State();
21
+ const PartialVertex &update_nt = update.NT()[victim];
22
+ const lm::ngram::ChartState &update_reveal = update_nt.State();
23
+ if ((update_reveal.left.length > previous_reveal.left.length) || (update_reveal.left.full && !previous_reveal.left.full)) {
24
+ adjustment += lm::ngram::RevealAfter(context.LanguageModel(), before->left, before->right, update_reveal.left, previous_reveal.left.length);
25
+ }
26
+ if ((update_reveal.right.length > previous_reveal.right.length) || (update_nt.RightFull() && !previous_vertex.RightFull())) {
27
+ adjustment += lm::ngram::RevealBefore(context.LanguageModel(), update_reveal.right, previous_reveal.right.length, update_nt.RightFull(), after->left, after->right);
28
+ }
29
+ if (update_nt.Complete()) {
30
+ if (update_reveal.left.full) {
31
+ before->left.full = true;
32
+ } else {
33
+ assert(update_reveal.left.length == update_reveal.right.length);
34
+ adjustment += lm::ngram::Subsume(context.LanguageModel(), before->left, before->right, after->left, after->right, update_reveal.left.length);
35
+ }
36
+ before->right = after->right;
37
+ // Shift the others shifted one down, covering after.
38
+ for (lm::ngram::ChartState *cover = after; cover < between + incomplete; ++cover) {
39
+ *cover = *(cover + 1);
40
+ }
41
+ }
42
+ update.SetScore(update.GetScore() + adjustment * context.LMWeight());
43
+ }
44
+
45
+ } // namespace
46
+
47
+ template <class Model> PartialEdge EdgeGenerator::Pop(Context<Model> &context) {
48
+ assert(!generate_.empty());
49
+ PartialEdge top = generate_.top();
50
+ generate_.pop();
51
+ PartialVertex *const top_nt = top.NT();
52
+ const Arity arity = top.GetArity();
53
+
54
+ Arity victim = 0;
55
+ Arity victim_completed;
56
+ Arity incomplete;
57
+ unsigned char lowest_niceness = 255;
58
+ // Select victim or return if complete.
59
+ {
60
+ Arity completed = 0;
61
+ for (Arity i = 0; i != arity; ++i) {
62
+ if (top_nt[i].Complete()) {
63
+ ++completed;
64
+ } else if (top_nt[i].Niceness() < lowest_niceness) {
65
+ lowest_niceness = top_nt[i].Niceness();
66
+ victim = i;
67
+ victim_completed = completed;
68
+ }
69
+ }
70
+ if (lowest_niceness == 255) {
71
+ return top;
72
+ }
73
+ incomplete = arity - completed;
74
+ }
75
+
76
+ PartialVertex old_value(top_nt[victim]);
77
+ PartialVertex alternate_changed;
78
+ if (top_nt[victim].Split(alternate_changed)) {
79
+ PartialEdge alternate(partial_edge_pool_, arity, incomplete + 1);
80
+ alternate.SetScore(top.GetScore() + alternate_changed.Bound() - old_value.Bound());
81
+
82
+ alternate.SetNote(top.GetNote());
83
+ alternate.SetRange(top.GetRange());
84
+
85
+ PartialVertex *alternate_nt = alternate.NT();
86
+ for (Arity i = 0; i < victim; ++i) alternate_nt[i] = top_nt[i];
87
+ alternate_nt[victim] = alternate_changed;
88
+ for (Arity i = victim + 1; i < arity; ++i) alternate_nt[i] = top_nt[i];
89
+
90
+ memcpy(alternate.Between(), top.Between(), sizeof(lm::ngram::ChartState) * (incomplete + 1));
91
+
92
+ // TODO: dedupe?
93
+ generate_.push(alternate);
94
+ }
95
+
96
+ #ifndef NDEBUG
97
+ Score before = top.GetScore();
98
+ #endif
99
+ // top is now the continuation.
100
+ FastScore(context, victim, victim - victim_completed, incomplete, old_value, top);
101
+ // TODO: dedupe?
102
+ generate_.push(top);
103
+ assert(lowest_niceness != 254 || top.GetScore() == before);
104
+
105
+ // Invalid indicates no new hypothesis generated.
106
+ return PartialEdge();
107
+ }
108
+
109
+ template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::RestProbingModel> &context);
110
+ template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::ProbingModel> &context);
111
+ template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::TrieModel> &context);
112
+ template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::QuantTrieModel> &context);
113
+ template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::ArrayTrieModel> &context);
114
+ template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::QuantArrayTrieModel> &context);
115
+
116
+ } // namespace search
mosesdecoder/search/edge_generator.hh ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_EDGE_GENERATOR__
2
+ #define SEARCH_EDGE_GENERATOR__
3
+
4
+ #include "search/edge.hh"
5
+ #include "search/types.hh"
6
+
7
+ #include <queue>
8
+
9
+ namespace lm {
10
+ namespace ngram {
11
+ struct ChartState;
12
+ } // namespace ngram
13
+ } // namespace lm
14
+
15
+ namespace search {
16
+
17
+ template <class Model> class Context;
18
+
19
+ class EdgeGenerator {
20
+ public:
21
+ EdgeGenerator() {}
22
+
23
+ PartialEdge AllocateEdge(Arity arity) {
24
+ return PartialEdge(partial_edge_pool_, arity);
25
+ }
26
+
27
+ void AddEdge(PartialEdge edge) {
28
+ generate_.push(edge);
29
+ }
30
+
31
+ bool Empty() const { return generate_.empty(); }
32
+
33
+ // Pop. If there's a complete hypothesis, return it. Otherwise return an invalid PartialEdge.
34
+ template <class Model> PartialEdge Pop(Context<Model> &context);
35
+
36
+ template <class Model, class Output> void Search(Context<Model> &context, Output &output) {
37
+ unsigned to_pop = context.PopLimit();
38
+ while (to_pop > 0 && !generate_.empty()) {
39
+ PartialEdge got(Pop(context));
40
+ if (got.Valid()) {
41
+ output.NewHypothesis(got);
42
+ --to_pop;
43
+ }
44
+ }
45
+ output.FinishedSearch();
46
+ }
47
+
48
+ private:
49
+ util::Pool partial_edge_pool_;
50
+
51
+ typedef std::priority_queue<PartialEdge> Generate;
52
+ Generate generate_;
53
+ };
54
+
55
+ } // namespace search
56
+ #endif // SEARCH_EDGE_GENERATOR__
mosesdecoder/search/header.hh ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_HEADER__
2
+ #define SEARCH_HEADER__
3
+
4
+ // Header consisting of Score, Arity, Note and Range
5
+
6
+ #include "search/types.hh"
7
+ #include "moses/Range.h"
8
+
9
+ #include <stdint.h>
10
+
11
+ namespace search {
12
+
13
+ // Copying is shallow.
14
+ class Header {
15
+ public:
16
+ bool Valid() const { return base_; }
17
+
18
+ Score GetScore() const {
19
+ return *reinterpret_cast<const float*>(base_);
20
+ }
21
+ void SetScore(Score to) {
22
+ *reinterpret_cast<float*>(base_) = to;
23
+ }
24
+ bool operator<(const Header &other) const {
25
+ return GetScore() < other.GetScore();
26
+ }
27
+ bool operator>(const Header &other) const {
28
+ return GetScore() > other.GetScore();
29
+ }
30
+
31
+ Arity GetArity() const {
32
+ return *reinterpret_cast<const Arity*>(base_ + sizeof(Score));
33
+ }
34
+
35
+ Note GetNote() const {
36
+ return *reinterpret_cast<const Note*>(base_ + sizeof(Score) + sizeof(Arity));
37
+ }
38
+ void SetNote(Note to) {
39
+ *reinterpret_cast<Note*>(base_ + sizeof(Score) + sizeof(Arity)) = to;
40
+ }
41
+
42
+ Moses::Range GetRange() const {
43
+ return *reinterpret_cast<const Moses::Range*>(base_ + sizeof(Score) + sizeof(Arity) + sizeof(Note));
44
+ }
45
+ void SetRange(Moses::Range to) {
46
+ *reinterpret_cast<Moses::Range*>(base_ + sizeof(Score) + sizeof(Arity) + sizeof(Note)) = to;
47
+ }
48
+
49
+ uint8_t *Base() { return base_; }
50
+ const uint8_t *Base() const { return base_; }
51
+
52
+ protected:
53
+ Header() : base_(NULL) {}
54
+
55
+ explicit Header(void *base) : base_(static_cast<uint8_t*>(base)) {}
56
+
57
+ Header(void *base, Arity arity) : base_(static_cast<uint8_t*>(base)) {
58
+ *reinterpret_cast<Arity*>(base_ + sizeof(Score)) = arity;
59
+ }
60
+
61
+ static const std::size_t kHeaderSize = sizeof(Score) + sizeof(Arity) + sizeof(Note) + sizeof(Moses::Range);
62
+
63
+ uint8_t *After() { return base_ + kHeaderSize; }
64
+ const uint8_t *After() const { return base_ + kHeaderSize; }
65
+
66
+ private:
67
+ uint8_t *base_;
68
+ };
69
+
70
+ } // namespace search
71
+
72
+ #endif // SEARCH_HEADER__
mosesdecoder/search/nbest.cc ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "search/nbest.hh"
2
+
3
+ #include "util/pool.hh"
4
+ #include "moses/Util.h"
5
+
6
+ #include <algorithm>
7
+ #include <functional>
8
+ #include <queue>
9
+ #include <cassert>
10
+ #include <cmath>
11
+
12
+ namespace search {
13
+
14
+ NBestList::NBestList(std::vector<PartialEdge> &partials, util::Pool &entry_pool, std::size_t keep) {
15
+ assert(!partials.empty());
16
+ std::vector<PartialEdge>::iterator end;
17
+ if (partials.size() > keep) {
18
+ end = partials.begin() + keep;
19
+ NTH_ELEMENT4(partials.begin(), end, partials.end(), std::greater<PartialEdge>());
20
+ } else {
21
+ end = partials.end();
22
+ }
23
+ for (std::vector<PartialEdge>::const_iterator i(partials.begin()); i != end; ++i) {
24
+ queue_.push(QueueEntry(entry_pool.Allocate(QueueEntry::Size(i->GetArity())), *i));
25
+ }
26
+ }
27
+
28
+ Score NBestList::TopAfterConstructor() const {
29
+ assert(revealed_.empty());
30
+ return queue_.top().GetScore();
31
+ }
32
+
33
+ const std::vector<Applied> &NBestList::Extract(util::Pool &pool, std::size_t n) {
34
+ while (revealed_.size() < n && !queue_.empty()) {
35
+ MoveTop(pool);
36
+ }
37
+ return revealed_;
38
+ }
39
+
40
+ Score NBestList::Visit(util::Pool &pool, std::size_t index) {
41
+ if (index + 1 < revealed_.size())
42
+ return revealed_[index + 1].GetScore() - revealed_[index].GetScore();
43
+ if (queue_.empty())
44
+ return -INFINITY;
45
+ if (index + 1 == revealed_.size())
46
+ return queue_.top().GetScore() - revealed_[index].GetScore();
47
+ assert(index == revealed_.size());
48
+
49
+ MoveTop(pool);
50
+
51
+ if (queue_.empty()) return -INFINITY;
52
+ return queue_.top().GetScore() - revealed_[index].GetScore();
53
+ }
54
+
55
+ Applied NBestList::Get(util::Pool &pool, std::size_t index) {
56
+ assert(index <= revealed_.size());
57
+ if (index == revealed_.size()) MoveTop(pool);
58
+ return revealed_[index];
59
+ }
60
+
61
+ void NBestList::MoveTop(util::Pool &pool) {
62
+ assert(!queue_.empty());
63
+ QueueEntry entry(queue_.top());
64
+ queue_.pop();
65
+ RevealedRef *const children_begin = entry.Children();
66
+ RevealedRef *const children_end = children_begin + entry.GetArity();
67
+ Score basis = entry.GetScore();
68
+ for (RevealedRef *child = children_begin; child != children_end; ++child) {
69
+ Score change = child->in_->Visit(pool, child->index_);
70
+ if (change != -INFINITY) {
71
+ assert(change < 0.001);
72
+ QueueEntry new_entry(pool.Allocate(QueueEntry::Size(entry.GetArity())), basis + change, entry.GetArity(), entry.GetNote(), entry.GetRange());
73
+ std::copy(children_begin, child, new_entry.Children());
74
+ RevealedRef *update = new_entry.Children() + (child - children_begin);
75
+ update->in_ = child->in_;
76
+ update->index_ = child->index_ + 1;
77
+ std::copy(child + 1, children_end, update + 1);
78
+ queue_.push(new_entry);
79
+ }
80
+ // Gesmundo, A. and Henderson, J. Faster Cube Pruning, IWSLT 2010.
81
+ if (child->index_) break;
82
+ }
83
+
84
+ // Convert QueueEntry to Applied. This leaves some unused memory.
85
+ void *overwrite = entry.Children();
86
+ for (unsigned int i = 0; i < entry.GetArity(); ++i) {
87
+ RevealedRef from(*(static_cast<const RevealedRef*>(overwrite) + i));
88
+ *(static_cast<Applied*>(overwrite) + i) = from.in_->Get(pool, from.index_);
89
+ }
90
+ revealed_.push_back(Applied(entry.Base()));
91
+ }
92
+
93
+ NBestComplete NBest::Complete(std::vector<PartialEdge> &partials) {
94
+ assert(!partials.empty());
95
+ NBestList *list = list_pool_.construct(partials, entry_pool_, config_.keep);
96
+ return NBestComplete(
97
+ list,
98
+ partials.front().CompletedState(), // All partials have the same state
99
+ list->TopAfterConstructor());
100
+ }
101
+
102
+ const std::vector<Applied> &NBest::Extract(History history) {
103
+ return static_cast<NBestList*>(history)->Extract(entry_pool_, config_.size);
104
+ }
105
+
106
+ } // namespace search
mosesdecoder/search/nbest.hh ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_NBEST__
2
+ #define SEARCH_NBEST__
3
+
4
+ #include "search/applied.hh"
5
+ #include "search/config.hh"
6
+ #include "search/edge.hh"
7
+
8
+ #include <boost/pool/object_pool.hpp>
9
+
10
+ #include <cstddef>
11
+ #include <queue>
12
+ #include <vector>
13
+ #include <cassert>
14
+
15
+ namespace search {
16
+
17
+ class NBestList;
18
+
19
+ class NBestList {
20
+ private:
21
+ class RevealedRef {
22
+ public:
23
+ explicit RevealedRef(History history)
24
+ : in_(static_cast<NBestList*>(history)), index_(0) {}
25
+
26
+ private:
27
+ friend class NBestList;
28
+
29
+ NBestList *in_;
30
+ std::size_t index_;
31
+ };
32
+
33
+ typedef GenericApplied<RevealedRef> QueueEntry;
34
+
35
+ public:
36
+ NBestList(std::vector<PartialEdge> &existing, util::Pool &entry_pool, std::size_t keep);
37
+
38
+ Score TopAfterConstructor() const;
39
+
40
+ const std::vector<Applied> &Extract(util::Pool &pool, std::size_t n);
41
+
42
+ private:
43
+ Score Visit(util::Pool &pool, std::size_t index);
44
+
45
+ Applied Get(util::Pool &pool, std::size_t index);
46
+
47
+ void MoveTop(util::Pool &pool);
48
+
49
+ typedef std::vector<Applied> Revealed;
50
+ Revealed revealed_;
51
+
52
+ typedef std::priority_queue<QueueEntry> Queue;
53
+ Queue queue_;
54
+ };
55
+
56
+ class NBest {
57
+ public:
58
+ typedef std::vector<PartialEdge> Combine;
59
+
60
+ explicit NBest(const NBestConfig &config) : config_(config) {}
61
+
62
+ void Add(std::vector<PartialEdge> &existing, PartialEdge addition) const {
63
+ existing.push_back(addition);
64
+ }
65
+
66
+ NBestComplete Complete(std::vector<PartialEdge> &partials);
67
+
68
+ const std::vector<Applied> &Extract(History root);
69
+
70
+ private:
71
+ const NBestConfig config_;
72
+
73
+ boost::object_pool<NBestList> list_pool_;
74
+
75
+ util::Pool entry_pool_;
76
+ };
77
+
78
+ } // namespace search
79
+
80
+ #endif // SEARCH_NBEST__
mosesdecoder/search/rule.cc ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "search/rule.hh"
2
+
3
+ #include "lm/model.hh"
4
+ #include "search/context.hh"
5
+
6
+ #include <ostream>
7
+
8
+ #include <cmath>
9
+
10
+ namespace search {
11
+
12
+ template <class Model> ScoreRuleRet ScoreRule(const Model &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing) {
13
+ ScoreRuleRet ret;
14
+ ret.prob = 0.0;
15
+ ret.oov = 0;
16
+ const lm::WordIndex oov = model.GetVocabulary().NotFound(), bos = model.GetVocabulary().BeginSentence();
17
+ lm::ngram::RuleScore<Model> scorer(model, *(writing++));
18
+ std::vector<lm::WordIndex>::const_iterator word = words.begin();
19
+ if (word != words.end() && *word == bos) {
20
+ scorer.BeginSentence();
21
+ ++word;
22
+ }
23
+ for (; word != words.end(); ++word) {
24
+ if (*word == kNonTerminal) {
25
+ ret.prob += scorer.Finish();
26
+ scorer.Reset(*(writing++));
27
+ } else {
28
+ if (*word == oov) ++ret.oov;
29
+ scorer.Terminal(*word);
30
+ }
31
+ }
32
+ ret.prob += scorer.Finish();
33
+ return ret;
34
+ }
35
+
36
+ template ScoreRuleRet ScoreRule(const lm::ngram::RestProbingModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
37
+ template ScoreRuleRet ScoreRule(const lm::ngram::ProbingModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
38
+ template ScoreRuleRet ScoreRule(const lm::ngram::TrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
39
+ template ScoreRuleRet ScoreRule(const lm::ngram::QuantTrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
40
+ template ScoreRuleRet ScoreRule(const lm::ngram::ArrayTrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
41
+ template ScoreRuleRet ScoreRule(const lm::ngram::QuantArrayTrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
42
+
43
+ } // namespace search
mosesdecoder/search/rule.hh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_RULE__
2
+ #define SEARCH_RULE__
3
+
4
+ #include "lm/left.hh"
5
+ #include "lm/word_index.hh"
6
+ #include "search/types.hh"
7
+
8
+ #include <vector>
9
+
10
+ namespace search {
11
+
12
+ const lm::WordIndex kNonTerminal = lm::kMaxWordIndex;
13
+
14
+ struct ScoreRuleRet {
15
+ Score prob;
16
+ unsigned int oov;
17
+ };
18
+
19
+ // Pass <s> and </s> normally.
20
+ // Indicate non-terminals with kNonTerminal.
21
+ template <class Model> ScoreRuleRet ScoreRule(const Model &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *state_out);
22
+
23
+ } // namespace search
24
+
25
+ #endif // SEARCH_RULE__
mosesdecoder/search/types.hh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_TYPES__
2
+ #define SEARCH_TYPES__
3
+
4
+ #include <stdint.h>
5
+
6
+ namespace lm { namespace ngram { struct ChartState; } }
7
+
8
+ namespace search {
9
+
10
+ typedef float Score;
11
+
12
+ typedef uint32_t Arity;
13
+
14
+ union Note {
15
+ const void *vp;
16
+ };
17
+
18
+ typedef void *History;
19
+
20
+ struct NBestComplete {
21
+ NBestComplete(History in_history, const lm::ngram::ChartState &in_state, Score in_score)
22
+ : history(in_history), state(&in_state), score(in_score) {}
23
+
24
+ History history;
25
+ const lm::ngram::ChartState *state;
26
+ Score score;
27
+ };
28
+
29
+ } // namespace search
30
+
31
+ #endif // SEARCH_TYPES__
mosesdecoder/search/vertex.cc ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "search/vertex.hh"
2
+
3
+ #include "search/context.hh"
4
+
5
+ #include <boost/unordered_map.hpp>
6
+
7
+ #include <algorithm>
8
+ #include <functional>
9
+ #include <cassert>
10
+
11
+ namespace search {
12
+
13
+ namespace {
14
+
15
+ const uint64_t kCompleteAdd = static_cast<uint64_t>(-1);
16
+
17
+ class DivideLeft {
18
+ public:
19
+ explicit DivideLeft(unsigned char index)
20
+ : index_(index) {}
21
+
22
+ uint64_t operator()(const lm::ngram::ChartState &state) const {
23
+ return (index_ < state.left.length) ?
24
+ state.left.pointers[index_] :
25
+ (kCompleteAdd - state.left.full);
26
+ }
27
+
28
+ private:
29
+ unsigned char index_;
30
+ };
31
+
32
+ class DivideRight {
33
+ public:
34
+ explicit DivideRight(unsigned char index)
35
+ : index_(index) {}
36
+
37
+ uint64_t operator()(const lm::ngram::ChartState &state) const {
38
+ return (index_ < state.right.length) ?
39
+ static_cast<uint64_t>(state.right.words[index_]) :
40
+ (kCompleteAdd - state.left.full);
41
+ }
42
+
43
+ private:
44
+ unsigned char index_;
45
+ };
46
+
47
+ template <class Divider> void Split(const Divider &divider, const std::vector<HypoState> &hypos, std::vector<VertexNode> &extend) {
48
+ // Map from divider to index in extend.
49
+ typedef boost::unordered_map<uint64_t, std::size_t> Lookup;
50
+ Lookup lookup;
51
+ for (std::vector<HypoState>::const_iterator i = hypos.begin(); i != hypos.end(); ++i) {
52
+ uint64_t key = divider(i->state);
53
+ std::pair<Lookup::iterator, bool> res(lookup.insert(std::make_pair(key, extend.size())));
54
+ if (res.second) {
55
+ extend.resize(extend.size() + 1);
56
+ extend.back().AppendHypothesis(*i);
57
+ } else {
58
+ extend[res.first->second].AppendHypothesis(*i);
59
+ }
60
+ }
61
+ //assert((extend.size() != 1) || (hypos.size() == 1));
62
+ }
63
+
64
+ lm::WordIndex Identify(const lm::ngram::Right &right, unsigned char index) {
65
+ return right.words[index];
66
+ }
67
+
68
+ uint64_t Identify(const lm::ngram::Left &left, unsigned char index) {
69
+ return left.pointers[index];
70
+ }
71
+
72
+ template <class Side> class DetermineSame {
73
+ public:
74
+ DetermineSame(const Side &side, unsigned char guaranteed)
75
+ : side_(side), guaranteed_(guaranteed), shared_(side.length), complete_(true) {}
76
+
77
+ void Consider(const Side &other) {
78
+ if (shared_ != other.length) {
79
+ complete_ = false;
80
+ if (shared_ > other.length)
81
+ shared_ = other.length;
82
+ }
83
+ for (unsigned char i = guaranteed_; i < shared_; ++i) {
84
+ if (Identify(side_, i) != Identify(other, i)) {
85
+ shared_ = i;
86
+ complete_ = false;
87
+ return;
88
+ }
89
+ }
90
+ }
91
+
92
+ unsigned char Shared() const { return shared_; }
93
+
94
+ bool Complete() const { return complete_; }
95
+
96
+ private:
97
+ const Side &side_;
98
+ unsigned char guaranteed_, shared_;
99
+ bool complete_;
100
+ };
101
+
102
+ // Custom enum to save memory: valid values of policy_.
103
+ // Alternate and there is still alternation to do.
104
+ const unsigned char kPolicyAlternate = 0;
105
+ // Branch based on left state only, because right ran out or this is a left tree.
106
+ const unsigned char kPolicyOneLeft = 1;
107
+ // Branch based on right state only.
108
+ const unsigned char kPolicyOneRight = 2;
109
+ // Reveal everything in the next branch. Used to terminate the left/right policies.
110
+ // static const unsigned char kPolicyEverything = 3;
111
+
112
+ } // namespace
113
+
114
+ namespace {
115
+ struct GreaterByScore : public std::binary_function<const HypoState &, const HypoState &, bool> {
116
+ bool operator()(const HypoState &first, const HypoState &second) const {
117
+ return first.score > second.score;
118
+ }
119
+ };
120
+ } // namespace
121
+
122
+ void VertexNode::FinishRoot() {
123
+ std::sort(hypos_.begin(), hypos_.end(), GreaterByScore());
124
+ extend_.clear();
125
+ // HACK: extend to one hypo so that root can be blank.
126
+ state_.left.full = false;
127
+ state_.left.length = 0;
128
+ state_.right.length = 0;
129
+ right_full_ = false;
130
+ niceness_ = 0;
131
+ policy_ = kPolicyAlternate;
132
+ if (hypos_.size() == 1) {
133
+ extend_.resize(1);
134
+ extend_.front().AppendHypothesis(hypos_.front());
135
+ extend_.front().FinishedAppending(0, 0);
136
+ }
137
+ if (hypos_.empty()) {
138
+ bound_ = -INFINITY;
139
+ } else {
140
+ bound_ = hypos_.front().score;
141
+ }
142
+ }
143
+
144
+ void VertexNode::FinishedAppending(const unsigned char common_left, const unsigned char common_right) {
145
+ assert(!hypos_.empty());
146
+ assert(extend_.empty());
147
+ bound_ = hypos_.front().score;
148
+ state_ = hypos_.front().state;
149
+ bool all_full = state_.left.full;
150
+ bool all_non_full = !state_.left.full;
151
+ DetermineSame<lm::ngram::Left> left(state_.left, common_left);
152
+ DetermineSame<lm::ngram::Right> right(state_.right, common_right);
153
+ for (std::vector<HypoState>::const_iterator i = hypos_.begin() + 1; i != hypos_.end(); ++i) {
154
+ all_full &= i->state.left.full;
155
+ all_non_full &= !i->state.left.full;
156
+ left.Consider(i->state.left);
157
+ right.Consider(i->state.right);
158
+ }
159
+ state_.left.full = all_full && left.Complete();
160
+ right_full_ = all_full && right.Complete();
161
+ state_.left.length = left.Shared();
162
+ state_.right.length = right.Shared();
163
+
164
+ if (!all_full && !all_non_full) {
165
+ policy_ = kPolicyAlternate;
166
+ } else if (left.Complete()) {
167
+ policy_ = kPolicyOneRight;
168
+ } else if (right.Complete()) {
169
+ policy_ = kPolicyOneLeft;
170
+ } else {
171
+ policy_ = kPolicyAlternate;
172
+ }
173
+ niceness_ = state_.left.length + state_.right.length;
174
+ }
175
+
176
+ void VertexNode::BuildExtend() {
177
+ // Already built.
178
+ if (!extend_.empty()) return;
179
+ // Nothing to build since this is a leaf.
180
+ if (hypos_.size() <= 1) return;
181
+ bool left_branch = true;
182
+ switch (policy_) {
183
+ case kPolicyAlternate:
184
+ left_branch = (state_.left.length <= state_.right.length);
185
+ break;
186
+ case kPolicyOneLeft:
187
+ left_branch = true;
188
+ break;
189
+ case kPolicyOneRight:
190
+ left_branch = false;
191
+ break;
192
+ }
193
+ if (left_branch) {
194
+ Split(DivideLeft(state_.left.length), hypos_, extend_);
195
+ } else {
196
+ Split(DivideRight(state_.right.length), hypos_, extend_);
197
+ }
198
+ for (std::vector<VertexNode>::iterator i = extend_.begin(); i != extend_.end(); ++i) {
199
+ // TODO: provide more here for branching?
200
+ i->FinishedAppending(state_.left.length, state_.right.length);
201
+ }
202
+ }
203
+
204
+ } // namespace search
mosesdecoder/search/vertex.hh ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_VERTEX__
2
+ #define SEARCH_VERTEX__
3
+
4
+ #include "lm/left.hh"
5
+ #include "search/types.hh"
6
+
7
+ #include <boost/unordered_set.hpp>
8
+
9
+ #include <queue>
10
+ #include <vector>
11
+ #include <cmath>
12
+ #include <stdint.h>
13
+
14
+ namespace search {
15
+
16
+ class ContextBase;
17
+
18
+ struct HypoState {
19
+ History history;
20
+ lm::ngram::ChartState state;
21
+ Score score;
22
+ };
23
+
24
+ class VertexNode {
25
+ public:
26
+ VertexNode() {}
27
+
28
+ void InitRoot() { hypos_.clear(); }
29
+
30
+ /* The steps of building a VertexNode:
31
+ * 1. Default construct.
32
+ * 2. AppendHypothesis at least once, possibly multiple times.
33
+ * 3. FinishAppending with the number of words on left and right guaranteed
34
+ * to be common.
35
+ * 4. If !Complete(), call BuildExtend to construct the extensions
36
+ */
37
+ // Must default construct, call AppendHypothesis 1 or more times then do FinishedAppending.
38
+ void AppendHypothesis(const NBestComplete &best) {
39
+ assert(hypos_.empty() || !(hypos_.front().state == *best.state));
40
+ HypoState hypo;
41
+ hypo.history = best.history;
42
+ hypo.state = *best.state;
43
+ hypo.score = best.score;
44
+ hypos_.push_back(hypo);
45
+ }
46
+ void AppendHypothesis(const HypoState &hypo) {
47
+ hypos_.push_back(hypo);
48
+ }
49
+
50
+ // Sort hypotheses for the root.
51
+ void FinishRoot();
52
+
53
+ void FinishedAppending(const unsigned char common_left, const unsigned char common_right);
54
+
55
+ void BuildExtend();
56
+
57
+ // Should only happen to a root node when the entire vertex is empty.
58
+ bool Empty() const {
59
+ return hypos_.empty() && extend_.empty();
60
+ }
61
+
62
+ bool Complete() const {
63
+ // HACK: prevent root from being complete. TODO: allow root to be complete.
64
+ return hypos_.size() == 1 && extend_.empty();
65
+ }
66
+
67
+ const lm::ngram::ChartState &State() const { return state_; }
68
+ bool RightFull() const { return right_full_; }
69
+
70
+ // Priority relative to other non-terminals. 0 is highest.
71
+ unsigned char Niceness() const { return niceness_; }
72
+
73
+ Score Bound() const {
74
+ return bound_;
75
+ }
76
+
77
+ // Will be invalid unless this is a leaf.
78
+ const History End() const {
79
+ assert(hypos_.size() == 1);
80
+ return hypos_.front().history;
81
+ }
82
+
83
+ VertexNode &operator[](size_t index) {
84
+ assert(!extend_.empty());
85
+ return extend_[index];
86
+ }
87
+
88
+ size_t Size() const {
89
+ return extend_.size();
90
+ }
91
+
92
+ private:
93
+ // Hypotheses to be split.
94
+ std::vector<HypoState> hypos_;
95
+
96
+ std::vector<VertexNode> extend_;
97
+
98
+ lm::ngram::ChartState state_;
99
+ bool right_full_;
100
+
101
+ unsigned char niceness_;
102
+
103
+ unsigned char policy_;
104
+
105
+ Score bound_;
106
+ };
107
+
108
+ class PartialVertex {
109
+ public:
110
+ PartialVertex() {}
111
+
112
+ explicit PartialVertex(VertexNode &back) : back_(&back), index_(0) {}
113
+
114
+ bool Empty() const { return back_->Empty(); }
115
+
116
+ bool Complete() const { return back_->Complete(); }
117
+
118
+ const lm::ngram::ChartState &State() const { return back_->State(); }
119
+ bool RightFull() const { return back_->RightFull(); }
120
+
121
+ Score Bound() const { return index_ ? (*back_)[index_].Bound() : back_->Bound(); }
122
+
123
+ unsigned char Niceness() const { return back_->Niceness(); }
124
+
125
+ // Split into continuation and alternative, rendering this the continuation.
126
+ bool Split(PartialVertex &alternative) {
127
+ assert(!Complete());
128
+ back_->BuildExtend();
129
+ bool ret;
130
+ if (index_ + 1 < back_->Size()) {
131
+ alternative.index_ = index_ + 1;
132
+ alternative.back_ = back_;
133
+ ret = true;
134
+ } else {
135
+ ret = false;
136
+ }
137
+ back_ = &((*back_)[index_]);
138
+ index_ = 0;
139
+ return ret;
140
+ }
141
+
142
+ const History End() const {
143
+ return back_->End();
144
+ }
145
+
146
+ private:
147
+ VertexNode *back_;
148
+ unsigned int index_;
149
+ };
150
+
151
+ template <class Output> class VertexGenerator;
152
+
153
+ class Vertex {
154
+ public:
155
+ Vertex() {}
156
+
157
+ //PartialVertex RootFirst() const { return PartialVertex(right_); }
158
+ PartialVertex RootAlternate() { return PartialVertex(root_); }
159
+ //PartialVertex RootLast() const { return PartialVertex(left_); }
160
+
161
+ bool Empty() const {
162
+ return root_.Empty();
163
+ }
164
+
165
+ Score Bound() const {
166
+ return root_.Bound();
167
+ }
168
+
169
+ const History BestChild() {
170
+ // left_ and right_ are not set at the root.
171
+ PartialVertex top(RootAlternate());
172
+ if (top.Empty()) {
173
+ return History();
174
+ } else {
175
+ PartialVertex continuation;
176
+ while (!top.Complete()) {
177
+ top.Split(continuation);
178
+ }
179
+ return top.End();
180
+ }
181
+ }
182
+
183
+ private:
184
+ template <class Output> friend class VertexGenerator;
185
+ template <class Output> friend class RootVertexGenerator;
186
+ VertexNode root_;
187
+
188
+ // These will not be set for the root vertex.
189
+ // Branches only on left state.
190
+ //VertexNode left_;
191
+ // Branches only on right state.
192
+ //VertexNode right_;
193
+ };
194
+
195
+ } // namespace search
196
+ #endif // SEARCH_VERTEX__
mosesdecoder/search/vertex_generator.hh ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef SEARCH_VERTEX_GENERATOR__
2
+ #define SEARCH_VERTEX_GENERATOR__
3
+
4
+ #include "search/edge.hh"
5
+ #include "search/types.hh"
6
+ #include "search/vertex.hh"
7
+
8
+ namespace lm {
9
+ namespace ngram {
10
+ struct ChartState;
11
+ } // namespace ngram
12
+ } // namespace lm
13
+
14
+ namespace search {
15
+
16
+ class ContextBase;
17
+
18
+ // Output makes the single-best or n-best list.
19
+ template <class Output> class VertexGenerator {
20
+ public:
21
+ VertexGenerator(ContextBase &context, Vertex &gen, Output &nbest) : context_(context), gen_(gen), nbest_(nbest) {}
22
+
23
+ void NewHypothesis(PartialEdge partial) {
24
+ nbest_.Add(existing_[hash_value(partial.CompletedState())], partial);
25
+ }
26
+
27
+ void FinishedSearch() {
28
+ gen_.root_.InitRoot();
29
+ for (typename Existing::iterator i(existing_.begin()); i != existing_.end(); ++i) {
30
+ gen_.root_.AppendHypothesis(nbest_.Complete(i->second));
31
+ }
32
+ existing_.clear();
33
+ gen_.root_.FinishRoot();
34
+ }
35
+
36
+ Vertex &Generating() { return gen_; }
37
+
38
+ private:
39
+ ContextBase &context_;
40
+
41
+ Vertex &gen_;
42
+
43
+ typedef boost::unordered_map<uint64_t, typename Output::Combine> Existing;
44
+ Existing existing_;
45
+
46
+ Output &nbest_;
47
+ };
48
+
49
+ // Special case for root vertex: everything should come together into the root
50
+ // node. In theory, this should happen naturally due to state collapsing with
51
+ // <s> and </s>. If that's the case, VertexGenerator is fine, though it will
52
+ // make one connection.
53
+ template <class Output> class RootVertexGenerator {
54
+ public:
55
+ RootVertexGenerator(Vertex &gen, Output &out) : gen_(gen), out_(out) {}
56
+
57
+ void NewHypothesis(PartialEdge partial) {
58
+ out_.Add(combine_, partial);
59
+ }
60
+
61
+ void FinishedSearch() {
62
+ gen_.root_.InitRoot();
63
+ gen_.root_.AppendHypothesis(out_.Complete(combine_));
64
+ gen_.root_.FinishRoot();
65
+ }
66
+
67
+ private:
68
+ Vertex &gen_;
69
+
70
+ typename Output::Combine combine_;
71
+ Output &out_;
72
+ };
73
+
74
+ } // namespace search
75
+ #endif // SEARCH_VERTEX_GENERATOR__
mosesdecoder/symal/Jamfile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ exe symal : symal.cpp cmd.c ;
2
+
mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/cmd.o ADDED
Binary file (14.7 kB). View file
 
mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal ADDED
Binary file (200 kB). View file
 
mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal.o ADDED
Binary file (41.4 kB). View file
 
mosesdecoder/symal/cmd.c ADDED
@@ -0,0 +1,642 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // $Id$
3
+
4
+ #include <stdarg.h>
5
+ #include <stdio.h>
6
+ #include <stdlib.h>
7
+ #include <ctype.h>
8
+ #include <string.h>
9
+
10
+ #include "cmd.h"
11
+
12
+ #ifdef WIN32
13
+ # define popen _popen
14
+ # define pclose _pclose
15
+ #endif
16
+
17
+ typedef struct {
18
+ enum CommandType Type;
19
+ const char *Name,
20
+ *ArgStr;
21
+ void *Val;
22
+ const void *p;
23
+ } Cmd_T;
24
+
25
+ static const Enum_T BoolEnum[] = {
26
+ { "FALSE", 0 },
27
+ { "TRUE", 1 },
28
+ { 0, 0 }
29
+ };
30
+
31
+ #ifdef NEEDSTRDUP
32
+ char *strdup();
33
+ #endif
34
+
35
+ #define FALSE 0
36
+ #define TRUE 1
37
+
38
+ #define LINSIZ 10240
39
+ #define MAXPARAM 256
40
+
41
+ static Cmd_T cmds[MAXPARAM+1];
42
+ static const char *SepString = " \t\n";
43
+
44
+ /// Return cmd->p, as an int.
45
+ static int get_p_int(const Cmd_T *cmd)
46
+ {
47
+ return *(const int *)cmd->p;
48
+ }
49
+
50
+ /// Return cmd->p, as a pointer to a null-terminated array of Enum_T.
51
+ static const Enum_T *get_p_enums(const Cmd_T *cmd)
52
+ {
53
+ return (const Enum_T *)cmd->p;
54
+ }
55
+
56
+ /// Return cmd->p, as a pointer to a string.
57
+ static const char *get_p_char(const Cmd_T *cmd)
58
+ {
59
+ return (const char *)cmd->p;
60
+ }
61
+
62
+ /// Return cmd->p, as a pointer to an array of two ints.
63
+ static const int *get_p_range(const Cmd_T *cmd)
64
+ {
65
+ return (const int *)cmd->p;
66
+ }
67
+
68
+ /// Return cmd->Val, as a pointer to int.
69
+ static int *get_val_int_ptr(const Cmd_T *cmd)
70
+ {
71
+ return (int *)cmd->Val;
72
+ }
73
+
74
+ /// Return the int at which cmd->Val points.
75
+ static int get_val_int(const Cmd_T *cmd)
76
+ {
77
+ return *get_val_int_ptr(cmd);
78
+ }
79
+
80
+ /// Update the int at which cmd->Val points.
81
+ static void update_val_int(const Cmd_T *cmd, int value)
82
+ {
83
+ *get_val_int_ptr(cmd) = value;
84
+ }
85
+
86
+ /// Return cmd->Val, as a pointer to double.
87
+ static double *get_val_double_ptr(const Cmd_T *cmd)
88
+ {
89
+ return (double *)cmd->Val;
90
+ }
91
+
92
+ /// Return the double at which cmd->Val points.
93
+ static double get_val_double(const Cmd_T *cmd)
94
+ {
95
+ return *get_val_double_ptr(cmd);
96
+ }
97
+
98
+ /// Return cmd->Val as a pointer to a string pointer.
99
+ static const char **get_val_char_ptr(const Cmd_T *cmd)
100
+ {
101
+ return (const char **)cmd->Val;
102
+ }
103
+
104
+ /// Return the string pointer at which cmd->Val points.
105
+ static const char *get_val_char(const Cmd_T *cmd)
106
+ {
107
+ return *get_val_char_ptr(cmd);
108
+ }
109
+
110
+ /// Update the string pointer at which cmd->Val points.
111
+ static void update_val_char(const Cmd_T *cmd, const char *s)
112
+ {
113
+ *get_val_char_ptr(cmd) = s;
114
+ }
115
+
116
+ int DeclareParams(const char *ParName, ...)
117
+ {
118
+ va_list args;
119
+ static int ParamN = 0;
120
+
121
+ va_start(args, ParName);
122
+ for(; ParName;) {
123
+ int c,
124
+ j = 0;
125
+ if(ParamN==MAXPARAM) {
126
+ fprintf(stderr, "Too many parameters !!\n");
127
+ break;
128
+ }
129
+ for(c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
130
+ ;
131
+ if(!c) {
132
+ fprintf(stderr,
133
+ "Warning: parameter \"%s\" declared twice.\n",
134
+ ParName);
135
+ }
136
+ for(c=ParamN; c>j; c--) {
137
+ cmds[c] = cmds[c-1];
138
+ }
139
+ cmds[j].Name = ParName;
140
+ cmds[j].Type = va_arg(args, enum CommandType);
141
+ cmds[j].Val = va_arg(args, void *);
142
+ switch(cmds[j].Type) {
143
+ case CMDENUMTYPE: /* get the pointer to Enum_T struct */
144
+ cmds[j].p = va_arg(args, void *);
145
+ break;
146
+ case CMDSUBRANGETYPE: { /* get the two extremes */
147
+ int *subrange = calloc(2, sizeof(int));
148
+ cmds[j].p = subrange;
149
+ subrange[0] = va_arg(args, int);
150
+ subrange[1] = va_arg(args, int);
151
+ }
152
+ break;
153
+ case CMDGTETYPE: /* get lower or upper bound */
154
+ case CMDLTETYPE: {
155
+ int *value = calloc(1, sizeof(int));
156
+ cmds[j].p = value;
157
+ value[0] = va_arg(args, int);
158
+ }
159
+ break;
160
+ case CMDSTRARRAYTYPE: { /* get the separators string */
161
+ const char *s = va_arg(args, const char *);
162
+ cmds[j].p = (s ? strdup(s) : NULL);
163
+ }
164
+ break;
165
+ case CMDBOOLTYPE:
166
+ cmds[j].Type = CMDENUMTYPE;
167
+ cmds[j].p = BoolEnum;
168
+ break;
169
+ case CMDDOUBLETYPE: /* nothing else is needed */
170
+ case CMDINTTYPE:
171
+ case CMDSTRINGTYPE:
172
+ break;
173
+ default:
174
+ fprintf(stderr, "%s: %s %d %s \"%s\"\n",
175
+ "DeclareParam()", "Unknown Type",
176
+ cmds[j].Type, "for parameter", cmds[j].Name);
177
+ exit(1);
178
+ }
179
+ ParamN++;
180
+ ParName = va_arg(args, const char *);
181
+ }
182
+ cmds[ParamN].Name = NULL;
183
+ va_end(args);
184
+ return 0;
185
+ }
186
+
187
+ static char *GetLine(FILE *fp, int n, char *Line)
188
+ {
189
+ int offs=0;
190
+
191
+ for(;;) {
192
+ int j, l;
193
+ if(!fgets(Line+offs, n-offs, fp)) {
194
+ return NULL;
195
+ }
196
+ if(Line[offs]=='#') continue;
197
+ l = strlen(Line+offs)-1;
198
+ Line[offs+l] = 0;
199
+ for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
200
+ ;
201
+ if(l<1) continue;
202
+ if(j > offs) {
203
+ char *s = Line+offs,
204
+ *q = Line+j;
205
+
206
+ while((*s++=*q++))
207
+ ;
208
+ }
209
+ if(Line[offs+l-1]=='\\') {
210
+ offs += l;
211
+ Line[offs-1] = ' ';
212
+ } else {
213
+ break;
214
+ }
215
+ }
216
+ return Line;
217
+ }
218
+
219
+ static void EnumError(const Cmd_T *cmd, const char *s)
220
+ {
221
+ const Enum_T *en;
222
+
223
+ fprintf(stderr,
224
+ "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
225
+ fprintf(stderr, "Valid values are:\n");
226
+ for(en=get_p_enums(cmd); en->Name; en++) {
227
+ if(*en->Name) {
228
+ fprintf(stderr, " %s\n", en->Name);
229
+ }
230
+ }
231
+ fprintf(stderr, "\n");
232
+ exit(1);
233
+ }
234
+
235
+ static void GteError(const Cmd_T *cmd, int n)
236
+ {
237
+ fprintf(stderr,
238
+ "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
239
+ fprintf(stderr, "Valid values must be greater than or equal to %d\n",
240
+ get_p_int(cmd));
241
+ exit(1);
242
+ }
243
+
244
+ static void LteError(const Cmd_T *cmd, int n)
245
+ {
246
+ fprintf(stderr,
247
+ "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
248
+ fprintf(stderr, "Valid values must be less than or equal to %d\n",
249
+ get_p_int(cmd));
250
+ exit(1);
251
+ }
252
+
253
+ static void SubrangeError(const Cmd_T *cmd, int n)
254
+ {
255
+ const int *subrange = get_p_range(cmd);
256
+ fprintf(stderr,
257
+ "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
258
+ fprintf(stderr, "Valid values range from %d to %d\n",
259
+ subrange[0], subrange[1]);
260
+ exit(1);
261
+ }
262
+
263
+ static void SetEnum(Cmd_T *cmd, const char *s)
264
+ {
265
+ const Enum_T *en;
266
+
267
+ for(en=get_p_enums(cmd); en->Name; en++) {
268
+ if(*en->Name && !strcmp(s, en->Name)) {
269
+ update_val_int(cmd, en->Idx);
270
+ return;
271
+ }
272
+ }
273
+ EnumError(cmd, s);
274
+ }
275
+
276
+ static void SetSubrange(Cmd_T *cmd, const char *s)
277
+ {
278
+ int n;
279
+ const int *subrange = get_p_range(cmd);
280
+
281
+ if(sscanf(s, "%d", &n)!=1) {
282
+ fprintf(stderr,
283
+ "Integer value required for parameter \"%s\"\n",
284
+ cmd->Name);
285
+ exit(1);
286
+ }
287
+ if(n < subrange[0] || n > subrange[1]) {
288
+ SubrangeError(cmd, n);
289
+ }
290
+ update_val_int(cmd, n);
291
+ }
292
+
293
+ static void SetGte(Cmd_T *cmd, const char *s)
294
+ {
295
+ int n;
296
+
297
+ if(sscanf(s, "%d", &n)!=1) {
298
+ fprintf(stderr,
299
+ "Integer value required for parameter \"%s\"\n",
300
+ cmd->Name);
301
+ exit(1);
302
+ }
303
+ if(n<get_p_int(cmd)) {
304
+ GteError(cmd, n);
305
+ }
306
+ update_val_int(cmd, n);
307
+ }
308
+
309
+ static char **str2array(const char *s, const char *sep)
310
+ {
311
+ const char *p;
312
+ char **a;
313
+ int n = 0,
314
+ l;
315
+
316
+ if(!sep) sep = SepString;
317
+ p = s += strspn(s, sep);
318
+ while(*p) {
319
+ p += strcspn(p, sep);
320
+ p += strspn(p, sep);
321
+ ++n;
322
+ }
323
+ a = calloc(n+1, sizeof(char *));
324
+ p = s;
325
+ n = 0;
326
+ while(*p) {
327
+ l = strcspn(p, sep);
328
+ a[n] = malloc(l+1);
329
+ memcpy(a[n], p, l);
330
+ a[n][l] = 0;
331
+ ++n;
332
+ p += l;
333
+ p += strspn(p, sep);
334
+ }
335
+ return a;
336
+ }
337
+
338
+ static void SetStrArray(Cmd_T *cmd, const char *s)
339
+ {
340
+ *(char***)cmd->Val = str2array(s, get_p_char(cmd));
341
+ }
342
+
343
+ static void SetLte(Cmd_T *cmd, const char *s)
344
+ {
345
+ int n;
346
+
347
+ if(sscanf(s, "%d", &n)!=1) {
348
+ fprintf(stderr,
349
+ "Integer value required for parameter \"%s\"\n",
350
+ cmd->Name);
351
+ exit(1);
352
+ }
353
+ if(n > get_p_int(cmd)) {
354
+ LteError(cmd, n);
355
+ }
356
+ update_val_int(cmd, n);
357
+ }
358
+
359
+ static void SetParam(Cmd_T *cmd, const char *s)
360
+ {
361
+ if(!*s && cmd->Type != CMDSTRINGTYPE) {
362
+ fprintf(stderr,
363
+ "WARNING: No value specified for parameter \"%s\"\n",
364
+ cmd->Name);
365
+ return;
366
+ }
367
+ switch(cmd->Type) {
368
+ case CMDDOUBLETYPE:
369
+ if(sscanf(s, "%lf", get_val_double_ptr(cmd))!=1) {
370
+ fprintf(stderr,
371
+ "Float value required for parameter \"%s\"\n",
372
+ cmd->Name);
373
+ exit(1);
374
+ }
375
+ break;
376
+ case CMDENUMTYPE:
377
+ SetEnum(cmd, s);
378
+ break;
379
+ case CMDINTTYPE:
380
+ if(sscanf(s, "%d", get_val_int_ptr(cmd))!=1) {
381
+ fprintf(stderr,
382
+ "Integer value required for parameter \"%s\"\n",
383
+ cmd->Name);
384
+ exit(1);
385
+ }
386
+ break;
387
+ case CMDSTRINGTYPE:
388
+ update_val_char(cmd,
389
+ (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
390
+ ? strdup(s)
391
+ : 0);
392
+ break;
393
+ case CMDSTRARRAYTYPE:
394
+ SetStrArray(cmd, s);
395
+ break;
396
+ case CMDGTETYPE:
397
+ SetGte(cmd, s);
398
+ break;
399
+ case CMDLTETYPE:
400
+ SetLte(cmd, s);
401
+ break;
402
+ case CMDSUBRANGETYPE:
403
+ SetSubrange(cmd, s);
404
+ break;
405
+ default:
406
+ fprintf(stderr, "%s: %s %d %s \"%s\"\n",
407
+ "SetParam",
408
+ "Unknown Type",
409
+ cmd->Type,
410
+ "for parameter",
411
+ cmd->Name);
412
+ exit(1);
413
+ }
414
+ cmd->ArgStr = strdup(s);
415
+ }
416
+
417
+ static int Scan(const char *ProgName, Cmd_T *cmds, char *Line)
418
+ {
419
+ char *q,
420
+ *p;
421
+ int i,
422
+ hl,
423
+ HasToMatch = FALSE,
424
+ c0,
425
+ c;
426
+
427
+ p = Line+strspn(Line, SepString);
428
+ hl = strcspn(p, SepString);
429
+ if(!hl) {
430
+ return 0;
431
+ }
432
+ q = strchr(p, '/');
433
+ if(q && q-p<hl) {
434
+ *q = 0;
435
+ if(strcmp(p, ProgName)) {
436
+ *q = '/';
437
+ return 0;
438
+ }
439
+ *q = '/';
440
+ HasToMatch=TRUE;
441
+ p = q+1;
442
+ }
443
+ hl = strcspn(p, SepString);
444
+ if(!hl) {
445
+ return 0;
446
+ }
447
+ c0 = p[hl];
448
+ p[hl] = 0;
449
+ for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
450
+ ;
451
+ p[hl] = c0;
452
+
453
+ if (c)
454
+ return HasToMatch && c;
455
+
456
+ SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
457
+ return 0;
458
+ }
459
+
460
+ static void PrintEnum(const Cmd_T *cmd, int ValFlag, FILE *fp)
461
+ {
462
+ const Enum_T *en;
463
+
464
+ fprintf(fp, "%s", cmd->Name);
465
+ if(ValFlag) {
466
+ for(en=get_p_enums(cmd); en->Name; en++) {
467
+ if(*en->Name && en->Idx==get_val_int(cmd)) {
468
+ fprintf(fp, ": %s", en->Name);
469
+ }
470
+ }
471
+ }
472
+ fprintf(fp, "\n");
473
+ }
474
+
475
+ static void PrintStrArray(const Cmd_T *cmd, int ValFlag, FILE *fp)
476
+ {
477
+ char *indent,
478
+ **s = *(char***)cmd->Val;
479
+ int l = 4+strlen(cmd->Name);
480
+
481
+ fprintf(fp, "%s", cmd->Name);
482
+ indent = malloc(l+2);
483
+ memset(indent, ' ', l+1);
484
+ indent[l+1] = 0;
485
+ if(ValFlag) {
486
+ fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
487
+ if(s) while(*s) {
488
+ fprintf(fp, "\n%s %s", indent, *s++);
489
+ }
490
+ }
491
+ free(indent);
492
+ fprintf(fp, "\n");
493
+ }
494
+
495
+ static void PrintParam(const Cmd_T *cmd, int ValFlag, FILE *fp)
496
+ {
497
+ fprintf(fp, "%4s", "");
498
+ switch(cmd->Type) {
499
+ case CMDDOUBLETYPE:
500
+ fprintf(fp, "%s", cmd->Name);
501
+ if(ValFlag) fprintf(fp, ": %22.15e", get_val_double(cmd));
502
+ fprintf(fp, "\n");
503
+ break;
504
+ case CMDENUMTYPE:
505
+ PrintEnum(cmd, ValFlag, fp);
506
+ break;
507
+ case CMDINTTYPE:
508
+ case CMDSUBRANGETYPE:
509
+ case CMDGTETYPE:
510
+ case CMDLTETYPE:
511
+ fprintf(fp, "%s", cmd->Name);
512
+ if(ValFlag) fprintf(fp, ": %d", get_val_int(cmd));
513
+ fprintf(fp, "\n");
514
+ break;
515
+ case CMDSTRINGTYPE:
516
+ fprintf(fp, "%s", cmd->Name);
517
+ if(ValFlag) {
518
+ const char *value = get_val_char(cmd);
519
+ if(value) {
520
+ fprintf(fp, ": \"%s\"", value);
521
+ } else {
522
+ fprintf(fp, ": %s", "NULL");
523
+ }
524
+ }
525
+ fprintf(fp, "\n");
526
+ break;
527
+ case CMDSTRARRAYTYPE:
528
+ PrintStrArray(cmd, ValFlag, fp);
529
+ break;
530
+ default:
531
+ fprintf(stderr, "%s: %s %d %s \"%s\"\n",
532
+ "PrintParam",
533
+ "Unknown Type",
534
+ cmd->Type,
535
+ "for parameter",
536
+ cmd->Name);
537
+ exit(1);
538
+ }
539
+ }
540
+
541
+ static void PrintParams(int ValFlag, FILE *fp)
542
+ {
543
+ int i;
544
+
545
+ fflush(fp);
546
+ if(ValFlag) {
547
+ fprintf(fp, "Parameters Values:\n");
548
+ } else {
549
+ fprintf(fp, "Parameters:\n");
550
+ }
551
+ for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
552
+ fprintf(fp, "\n");
553
+ fflush(fp);
554
+ }
555
+
556
+ static void CmdError(const char *opt)
557
+ {
558
+ fprintf(stderr, "Invalid option \"%s\"\n", opt);
559
+ fprintf(stderr, "This program expectes the following parameters:\n");
560
+ PrintParams(FALSE, stderr);
561
+ exit(0);
562
+ }
563
+
564
+ int GetParams(int *n, char ***a, const char *CmdFileName)
565
+ {
566
+ char *Line,
567
+ *ProgName;
568
+ int argc = *n;
569
+ char **argv = *a,
570
+ *s;
571
+ FILE *fp;
572
+ int IsPipe;
573
+
574
+ #ifdef MSDOS
575
+ #define PATHSEP '\\'
576
+ char *dot = NULL;
577
+ #else
578
+ #define PATHSEP '/'
579
+ #endif
580
+
581
+ if(!(Line=malloc(LINSIZ))) {
582
+ fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
583
+ LINSIZ);
584
+ exit(1);
585
+ }
586
+ if((ProgName=strrchr(*argv, PATHSEP))) {
587
+ ++ProgName;
588
+ } else {
589
+ ProgName = *argv;
590
+ }
591
+ #ifdef MSDOS
592
+ if(dot=strchr(ProgName, '.')) *dot = 0;
593
+ #endif
594
+ --argc;
595
+ ++argv;
596
+ for(;;) {
597
+ if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
598
+ CmdFileName = argv[0]+2;
599
+ ++argv;
600
+ --argc;
601
+ }
602
+ if(!CmdFileName) {
603
+ break;
604
+ }
605
+ IsPipe = !strncmp(CmdFileName, "@@", 2);
606
+ fp = IsPipe
607
+ ? popen(CmdFileName+2, "r")
608
+ : strcmp(CmdFileName, "-")
609
+ ? fopen(CmdFileName, "r")
610
+ : stdin;
611
+ if(!fp) {
612
+ fprintf(stderr, "Unable to open command file %s\n",
613
+ CmdFileName);
614
+ exit(1);
615
+ }
616
+ while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
617
+ if(Scan(ProgName, cmds, Line)) {
618
+ CmdError(Line);
619
+ }
620
+ }
621
+ if(fp!=stdin) {
622
+ if(IsPipe) pclose(fp);
623
+ else fclose(fp);
624
+ }
625
+ CmdFileName = NULL;
626
+ }
627
+ while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
628
+ *s = ' ';
629
+ sprintf(Line, "%s/%s", ProgName, *argv+1);
630
+ *s = '=';
631
+ if(Scan(ProgName, cmds, Line)) CmdError(*argv);
632
+ --argc;
633
+ ++argv;
634
+ }
635
+ *n = argc;
636
+ *a = argv;
637
+ #ifdef MSDOS
638
+ if(dot) *dot = '.';
639
+ #endif
640
+ free(Line);
641
+ return 0;
642
+ }
mosesdecoder/symal/cmd.h ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // $Id$
3
+
4
+ #if !defined(CMD_H)
5
+
6
+ #define CMD_H
7
+
8
+ enum CommandType {
9
+ CMDDOUBLETYPE = 1,
10
+ CMDENUMTYPE,
11
+ CMDINTTYPE,
12
+ CMDSTRINGTYPE,
13
+ CMDSUBRANGETYPE,
14
+ CMDGTETYPE,
15
+ CMDLTETYPE,
16
+ CMDSTRARRAYTYPE,
17
+ CMDBOOLTYPE
18
+ };
19
+
20
+ typedef struct {
21
+ const char *Name;
22
+ int Idx;
23
+ } Enum_T;
24
+
25
+ #ifdef __cplusplus
26
+ extern "C" {
27
+ #endif
28
+
29
+ int DeclareParams(const char *, ...);
30
+ int GetParams(int *n, char ***a, const char *CmdFileName);
31
+
32
+ #ifdef __cplusplus
33
+ }
34
+ #endif
35
+ #endif
mosesdecoder/symal/symal.cpp ADDED
@@ -0,0 +1,518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ #include <cassert>
4
+ #include <iomanip>
5
+ #include <iostream>
6
+ #include <fstream>
7
+ #include <sstream>
8
+ #include <stdexcept>
9
+ #include <string>
10
+ #include <list>
11
+ #include <vector>
12
+ #include <set>
13
+ #include <algorithm>
14
+ #include <cstring>
15
+ #include "cmd.h"
16
+
17
+ using namespace std;
18
+
19
+ const int MAX_WORD = 10000; // maximum lengthsource/target strings
20
+ const int MAX_M = 400; // maximum length of source strings
21
+ const int MAX_N = 400; // maximum length of target strings
22
+
23
+ enum Alignment {
24
+ UNION = 1,
25
+ INTERSECT,
26
+ GROW,
27
+ SRCTOTGT,
28
+ TGTTOSRC,
29
+ };
30
+
31
+ const Enum_T END_ENUM = {0, 0};
32
+
33
+ namespace
34
+ {
35
+ Enum_T AlignEnum [] = {
36
+ { "union", UNION },
37
+ { "u", UNION },
38
+ { "intersect", INTERSECT},
39
+ { "i", INTERSECT},
40
+ { "grow", GROW },
41
+ { "g", GROW },
42
+ { "srctotgt", SRCTOTGT },
43
+ { "s2t", SRCTOTGT },
44
+ { "tgttosrc", TGTTOSRC },
45
+ { "t2s", TGTTOSRC },
46
+ END_ENUM
47
+ };
48
+
49
+ Enum_T BoolEnum [] = {
50
+ { "true", true },
51
+ { "yes", true },
52
+ { "y", true },
53
+ { "false", false },
54
+ { "no", false },
55
+ { "n", false },
56
+ END_ENUM
57
+ };
58
+
59
+ // global variables and constants
60
+
61
+ int* fa; //counters of covered foreign positions
62
+ int* ea; //counters of covered english positions
63
+ int** A; //alignment matrix with information symmetric/direct/inverse alignments
64
+
65
+ int verbose=0;
66
+
67
+ //read an alignment pair from the input stream.
68
+
69
+ int lc = 0;
70
+
71
+ int getals(istream& inp,int& m, int *a,int& n, int *b)
72
+ {
73
+ char w[MAX_WORD], dummy[10];
74
+ int i,j,freq;
75
+ if (inp >> freq) {
76
+ ++lc;
77
+ //target sentence
78
+ inp >> n;
79
+ assert(n<MAX_N);
80
+ for (i=1; i<=n; i++) {
81
+ inp >> setw(MAX_WORD) >> w;
82
+ if (strlen(w)>=MAX_WORD-1) {
83
+ cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
84
+ << MAX_WORD-1 << endl;
85
+ assert(strlen(w)<MAX_WORD-1);
86
+ }
87
+ }
88
+
89
+ inp >> dummy; //# separator
90
+ // inverse alignment
91
+ for (i=1; i<=n; i++) inp >> b[i];
92
+
93
+ //source sentence
94
+ inp >> m;
95
+ assert(m<MAX_M);
96
+ for (j=1; j<=m; j++) {
97
+ inp >> setw(MAX_WORD) >> w;
98
+ if (strlen(w)>=MAX_WORD-1) {
99
+ cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
100
+ << MAX_WORD-1 << endl;
101
+ assert(strlen(w)<MAX_WORD-1);
102
+ }
103
+ }
104
+
105
+ inp >> dummy; //# separator
106
+
107
+ // direct alignment
108
+ for (j=1; j<=m; j++) {
109
+ inp >> a[j];
110
+ assert(0<=a[j] && a[j]<=n);
111
+ }
112
+
113
+ //check inverse alignemnt
114
+ for (i=1; i<=n; i++)
115
+ assert(0<=b[i] && b[i]<=m);
116
+
117
+ return 1;
118
+
119
+ } else
120
+ return 0;
121
+ }
122
+
123
+
124
+ //compute union alignment
125
+ int prunionalignment(ostream& out,int m,int *a,int n,int* b)
126
+ {
127
+
128
+ ostringstream sout;
129
+
130
+ for (int j=1; j<=m; j++)
131
+ if (a[j])
132
+ sout << j-1 << "-" << a[j]-1 << " ";
133
+
134
+ for (int i=1; i<=n; i++)
135
+ if (b[i] && a[b[i]]!=i)
136
+ sout << b[i]-1 << "-" << i-1 << " ";
137
+
138
+ //fix the last " "
139
+ string str = sout.str();
140
+ if (str.length() == 0)
141
+ str = "\n";
142
+ else
143
+ str.replace(str.length()-1,1,"\n");
144
+
145
+ out << str;
146
+ out.flush();
147
+
148
+ return 1;
149
+ }
150
+
151
+
152
+ //Compute intersection alignment
153
+
154
+ int printersect(ostream& out,int m,int *a,int n,int* b)
155
+ {
156
+
157
+ ostringstream sout;
158
+
159
+ for (int j=1; j<=m; j++)
160
+ if (a[j] && b[a[j]]==j)
161
+ sout << j-1 << "-" << a[j]-1 << " ";
162
+
163
+ //fix the last " "
164
+ string str = sout.str();
165
+ if (str.length() == 0)
166
+ str = "\n";
167
+ else
168
+ str.replace(str.length()-1,1,"\n");
169
+
170
+ out << str;
171
+ out.flush();
172
+
173
+ return 1;
174
+ }
175
+
176
+ //Compute target-to-source alignment
177
+
178
+ int printtgttosrc(ostream& out,int m,int *a,int n,int* b)
179
+ {
180
+
181
+ ostringstream sout;
182
+
183
+ for (int i=1; i<=n; i++)
184
+ if (b[i])
185
+ sout << b[i]-1 << "-" << i-1 << " ";
186
+
187
+ //fix the last " "
188
+ string str = sout.str();
189
+ if (str.length() == 0)
190
+ str = "\n";
191
+ else
192
+ str.replace(str.length()-1,1,"\n");
193
+
194
+ out << str;
195
+ out.flush();
196
+
197
+ return 1;
198
+ }
199
+
200
+ //Compute source-to-target alignment
201
+
202
+ int printsrctotgt(ostream& out,int m,int *a,int n,int* b)
203
+ {
204
+
205
+ ostringstream sout;
206
+
207
+ for (int j=1; j<=m; j++)
208
+ if (a[j])
209
+ sout << j-1 << "-" << a[j]-1 << " ";
210
+
211
+ //fix the last " "
212
+ string str = sout.str();
213
+ if (str.length() == 0)
214
+ str = "\n";
215
+ else
216
+ str.replace(str.length()-1,1,"\n");
217
+
218
+ out << str;
219
+ out.flush();
220
+
221
+ return 1;
222
+ }
223
+
224
+ //Compute Grow Diagonal Alignment
225
+ //Nice property: you will never introduce more points
226
+ //than the unionalignment alignemt. Hence, you will always be able
227
+ //to represent the grow alignment as the unionalignment of a
228
+ //directed and inverted alignment
229
+
230
+ int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool isfinal=false,bool bothuncovered=false)
231
+ {
232
+
233
+ ostringstream sout;
234
+
235
+ vector <pair <int,int> > neighbors; //neighbors
236
+
237
+ pair <int,int> entry;
238
+
239
+ neighbors.push_back(make_pair(-1,-0));
240
+ neighbors.push_back(make_pair(0,-1));
241
+ neighbors.push_back(make_pair(1,0));
242
+ neighbors.push_back(make_pair(0,1));
243
+
244
+
245
+ if (diagonal) {
246
+ neighbors.push_back(make_pair(-1,-1));
247
+ neighbors.push_back(make_pair(-1,1));
248
+ neighbors.push_back(make_pair(1,-1));
249
+ neighbors.push_back(make_pair(1,1));
250
+ }
251
+
252
+
253
+ int i,j;
254
+ size_t o;
255
+
256
+
257
+ //covered foreign and english positions
258
+
259
+ memset(fa,0,(m+1)*sizeof(int));
260
+ memset(ea,0,(n+1)*sizeof(int));
261
+
262
+ //matrix to quickly check if one point is in the symmetric
263
+ //alignment (value=2), direct alignment (=1) and inverse alignment
264
+
265
+ for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int));
266
+
267
+ set <pair <int,int> > currentpoints; //symmetric alignment
268
+ set <pair <int,int> > unionalignment; //union alignment
269
+
270
+ pair <int,int> point; //variable to store points
271
+ set<pair <int,int> >::const_iterator k; //iterator over sets
272
+
273
+ //fill in the alignments
274
+ for (j=1; j<=m; j++) {
275
+ if (a[j]) {
276
+ unionalignment.insert(make_pair(a[j],j));
277
+ if (b[a[j]]==j) {
278
+ fa[j]=1;
279
+ ea[a[j]]=1;
280
+ A[a[j]][j]=2;
281
+ currentpoints.insert(make_pair(a[j],j));
282
+ } else
283
+ A[a[j]][j]=-1;
284
+ }
285
+ }
286
+
287
+ for (i=1; i<=n; i++)
288
+ if (b[i] && a[b[i]]!=i) { //not intersection
289
+ unionalignment.insert(make_pair(i,b[i]));
290
+ A[i][b[i]]=1;
291
+ }
292
+
293
+
294
+ int added=1;
295
+
296
+ while (added) {
297
+ added=0;
298
+ ///scan the current alignment
299
+ for (k=currentpoints.begin(); k!=currentpoints.end(); k++) {
300
+ //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
301
+ for (o=0; o<neighbors.size(); o++) {
302
+ //cout << "go over check all neighbors\n";
303
+ point.first=k->first+neighbors[o].first;
304
+ point.second=k->second+neighbors[o].second;
305
+ //cout << point.second-1 << " " << point.first-1 << "\n";
306
+ //check if neighbor is inside 'matrix'
307
+ if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
308
+ //check if neighbor is in the unionalignment alignment
309
+ if (b[point.first]==point.second || a[point.second]==point.first) {
310
+ //cout << "In unionalignment ";cout.flush();
311
+ //check if it connects at least one uncovered word
312
+ if (!(ea[point.first] && fa[point.second])) {
313
+ //insert point in currentpoints!
314
+ currentpoints.insert(point);
315
+ A[point.first][point.second]=2;
316
+ ea[point.first]=1;
317
+ fa[point.second]=1;
318
+ added=1;
319
+ //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
320
+ }
321
+ }
322
+ }
323
+ }
324
+ }
325
+
326
+ if (isfinal) {
327
+ for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
328
+ if (A[k->first][k->second]==1) {
329
+ point.first=k->first;
330
+ point.second=k->second;
331
+ //one of the two words is not covered yet
332
+ //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
333
+ if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
334
+ (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
335
+ //add it!
336
+ currentpoints.insert(point);
337
+ A[point.first][point.second]=2;
338
+ //keep track of new covered positions
339
+ ea[point.first]=1;
340
+ fa[point.second]=1;
341
+
342
+ //added=1;
343
+ //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
344
+ }
345
+ }
346
+
347
+ for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
348
+ if (A[k->first][k->second]==-1) {
349
+ point.first=k->first;
350
+ point.second=k->second;
351
+ //one of the two words is not covered yet
352
+ //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
353
+ if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
354
+ (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
355
+ //add it!
356
+ currentpoints.insert(point);
357
+ A[point.first][point.second]=2;
358
+ //keep track of new covered positions
359
+ ea[point.first]=1;
360
+ fa[point.second]=1;
361
+
362
+ //added=1;
363
+ //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
364
+ }
365
+ }
366
+ }
367
+
368
+
369
+ for (k=currentpoints.begin(); k!=currentpoints.end(); k++)
370
+ sout << k->second-1 << "-" << k->first-1 << " ";
371
+
372
+
373
+ //fix the last " "
374
+ string str = sout.str();
375
+ if (str.length() == 0)
376
+ str = "\n";
377
+ else
378
+ str.replace(str.length()-1,1,"\n");
379
+
380
+ out << str;
381
+ out.flush();
382
+ return 1;
383
+
384
+ return 1;
385
+ }
386
+
387
+ } // namespace
388
+
389
+
390
+ //Main file here
391
+
392
+
393
+ int main(int argc, char** argv)
394
+ {
395
+
396
+ int alignment=0;
397
+ char* input= NULL;
398
+ char* output= NULL;
399
+ int diagonal=false;
400
+ int isfinal=false;
401
+ int bothuncovered=false;
402
+
403
+
404
+ DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
405
+ "alignment", CMDENUMTYPE, &alignment, AlignEnum,
406
+ "d", CMDENUMTYPE, &diagonal, BoolEnum,
407
+ "diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
408
+ "f", CMDENUMTYPE, &isfinal, BoolEnum,
409
+ "final", CMDENUMTYPE, &isfinal, BoolEnum,
410
+ "b", CMDENUMTYPE, &bothuncovered, BoolEnum,
411
+ "both", CMDENUMTYPE, &bothuncovered, BoolEnum,
412
+ "i", CMDSTRINGTYPE, &input,
413
+ "o", CMDSTRINGTYPE, &output,
414
+ "v", CMDENUMTYPE, &verbose, BoolEnum,
415
+ "verbose", CMDENUMTYPE, &verbose, BoolEnum,
416
+
417
+ NULL);
418
+
419
+ GetParams(&argc, &argv, NULL);
420
+
421
+ if (alignment==0) {
422
+ cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
423
+ << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
424
+
425
+ exit(1);
426
+ }
427
+
428
+ istream *inp = &std::cin;
429
+ ostream *out = &std::cout;
430
+
431
+ try {
432
+ if (input) {
433
+ fstream *fin = new fstream(input,ios::in);
434
+ if (!fin->is_open()) throw runtime_error("cannot open " + string(input));
435
+ inp = fin;
436
+ }
437
+
438
+ if (output) {
439
+ fstream *fout = new fstream(output,ios::out);
440
+ if (!fout->is_open()) throw runtime_error("cannot open " + string(output));
441
+ out = fout;
442
+ }
443
+
444
+ int a[MAX_M],b[MAX_N],m,n;
445
+ fa=new int[MAX_M+1];
446
+ ea=new int[MAX_N+1];
447
+
448
+
449
+ int sents = 0;
450
+ A=new int *[MAX_N+1];
451
+ for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
452
+
453
+ switch (alignment) {
454
+ case UNION:
455
+ cerr << "symal: computing union alignment\n";
456
+ while(getals(*inp,m,a,n,b)) {
457
+ prunionalignment(*out,m,a,n,b);
458
+ sents++;
459
+ }
460
+ cerr << "Sents: " << sents << endl;
461
+ break;
462
+ case INTERSECT:
463
+ cerr << "symal: computing intersect alignment\n";
464
+ while(getals(*inp,m,a,n,b)) {
465
+ printersect(*out,m,a,n,b);
466
+ sents++;
467
+ }
468
+ cerr << "Sents: " << sents << endl;
469
+ break;
470
+ case GROW:
471
+ cerr << "symal: computing grow alignment: diagonal ("
472
+ << diagonal << ") final ("<< isfinal << ")"
473
+ << "both-uncovered (" << bothuncovered <<")\n";
474
+
475
+ while(getals(*inp,m,a,n,b))
476
+ printgrow(*out,m,a,n,b,diagonal,isfinal,bothuncovered);
477
+
478
+ break;
479
+ case TGTTOSRC:
480
+ cerr << "symal: computing target-to-source alignment\n";
481
+
482
+ while(getals(*inp,m,a,n,b)) {
483
+ printtgttosrc(*out,m,a,n,b);
484
+ sents++;
485
+ }
486
+ cerr << "Sents: " << sents << endl;
487
+ break;
488
+ case SRCTOTGT:
489
+ cerr << "symal: computing source-to-target alignment\n";
490
+
491
+ while(getals(*inp,m,a,n,b)) {
492
+ printsrctotgt(*out,m,a,n,b);
493
+ sents++;
494
+ }
495
+ cerr << "Sents: " << sents << endl;
496
+ break;
497
+ default:
498
+ throw runtime_error("Unknown alignment");
499
+ }
500
+
501
+ delete [] fa;
502
+ delete [] ea;
503
+ for (int i=1; i<=MAX_N; i++) delete [] A[i];
504
+ delete [] A;
505
+
506
+ if (inp != &std::cin) {
507
+ delete inp;
508
+ }
509
+ if (out != &std::cout) {
510
+ delete inp;
511
+ }
512
+ } catch (const std::exception &e) {
513
+ cerr << e.what() << std::endl;
514
+ exit(1);
515
+ }
516
+
517
+ exit(0);
518
+ }
mosesdecoder/symal/symal.vcproj ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="Windows-1252"?>
2
+ <VisualStudioProject
3
+ ProjectType="Visual C++"
4
+ Version="9.00"
5
+ Name="symal"
6
+ ProjectGUID="{6716FB26-8298-47A3-A915-958AF0AC80F8}"
7
+ RootNamespace="symal"
8
+ Keyword="Win32Proj"
9
+ TargetFrameworkVersion="131072"
10
+ >
11
+ <Platforms>
12
+ <Platform
13
+ Name="Win32"
14
+ />
15
+ </Platforms>
16
+ <ToolFiles>
17
+ </ToolFiles>
18
+ <Configurations>
19
+ <Configuration
20
+ Name="Debug|Win32"
21
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
22
+ IntermediateDirectory="$(ConfigurationName)"
23
+ ConfigurationType="1"
24
+ CharacterSet="1"
25
+ >
26
+ <Tool
27
+ Name="VCPreBuildEventTool"
28
+ />
29
+ <Tool
30
+ Name="VCCustomBuildTool"
31
+ />
32
+ <Tool
33
+ Name="VCXMLDataGeneratorTool"
34
+ />
35
+ <Tool
36
+ Name="VCWebServiceProxyGeneratorTool"
37
+ />
38
+ <Tool
39
+ Name="VCMIDLTool"
40
+ />
41
+ <Tool
42
+ Name="VCCLCompilerTool"
43
+ Optimization="0"
44
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;__STDC__"
45
+ MinimalRebuild="true"
46
+ BasicRuntimeChecks="3"
47
+ RuntimeLibrary="3"
48
+ UsePrecompiledHeader="0"
49
+ WarningLevel="3"
50
+ Detect64BitPortabilityProblems="true"
51
+ DebugInformationFormat="4"
52
+ />
53
+ <Tool
54
+ Name="VCManagedResourceCompilerTool"
55
+ />
56
+ <Tool
57
+ Name="VCResourceCompilerTool"
58
+ />
59
+ <Tool
60
+ Name="VCPreLinkEventTool"
61
+ />
62
+ <Tool
63
+ Name="VCLinkerTool"
64
+ LinkIncremental="2"
65
+ GenerateDebugInformation="true"
66
+ SubSystem="1"
67
+ RandomizedBaseAddress="1"
68
+ DataExecutionPrevention="0"
69
+ TargetMachine="1"
70
+ />
71
+ <Tool
72
+ Name="VCALinkTool"
73
+ />
74
+ <Tool
75
+ Name="VCManifestTool"
76
+ />
77
+ <Tool
78
+ Name="VCXDCMakeTool"
79
+ />
80
+ <Tool
81
+ Name="VCBscMakeTool"
82
+ />
83
+ <Tool
84
+ Name="VCFxCopTool"
85
+ />
86
+ <Tool
87
+ Name="VCAppVerifierTool"
88
+ />
89
+ <Tool
90
+ Name="VCPostBuildEventTool"
91
+ />
92
+ </Configuration>
93
+ <Configuration
94
+ Name="Release|Win32"
95
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
96
+ IntermediateDirectory="$(ConfigurationName)"
97
+ ConfigurationType="1"
98
+ CharacterSet="1"
99
+ WholeProgramOptimization="1"
100
+ >
101
+ <Tool
102
+ Name="VCPreBuildEventTool"
103
+ />
104
+ <Tool
105
+ Name="VCCustomBuildTool"
106
+ />
107
+ <Tool
108
+ Name="VCXMLDataGeneratorTool"
109
+ />
110
+ <Tool
111
+ Name="VCWebServiceProxyGeneratorTool"
112
+ />
113
+ <Tool
114
+ Name="VCMIDLTool"
115
+ />
116
+ <Tool
117
+ Name="VCCLCompilerTool"
118
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;__STDC__"
119
+ RuntimeLibrary="2"
120
+ UsePrecompiledHeader="0"
121
+ WarningLevel="3"
122
+ Detect64BitPortabilityProblems="true"
123
+ DebugInformationFormat="3"
124
+ />
125
+ <Tool
126
+ Name="VCManagedResourceCompilerTool"
127
+ />
128
+ <Tool
129
+ Name="VCResourceCompilerTool"
130
+ />
131
+ <Tool
132
+ Name="VCPreLinkEventTool"
133
+ />
134
+ <Tool
135
+ Name="VCLinkerTool"
136
+ LinkIncremental="1"
137
+ GenerateDebugInformation="true"
138
+ SubSystem="1"
139
+ OptimizeReferences="2"
140
+ EnableCOMDATFolding="2"
141
+ RandomizedBaseAddress="1"
142
+ DataExecutionPrevention="0"
143
+ TargetMachine="1"
144
+ />
145
+ <Tool
146
+ Name="VCALinkTool"
147
+ />
148
+ <Tool
149
+ Name="VCManifestTool"
150
+ />
151
+ <Tool
152
+ Name="VCXDCMakeTool"
153
+ />
154
+ <Tool
155
+ Name="VCBscMakeTool"
156
+ />
157
+ <Tool
158
+ Name="VCFxCopTool"
159
+ />
160
+ <Tool
161
+ Name="VCAppVerifierTool"
162
+ />
163
+ <Tool
164
+ Name="VCPostBuildEventTool"
165
+ />
166
+ </Configuration>
167
+ </Configurations>
168
+ <References>
169
+ </References>
170
+ <Files>
171
+ <Filter
172
+ Name="Source Files"
173
+ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
174
+ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
175
+ >
176
+ <File
177
+ RelativePath=".\cmd.c"
178
+ >
179
+ </File>
180
+ <File
181
+ RelativePath=".\symal.cpp"
182
+ >
183
+ </File>
184
+ </Filter>
185
+ <Filter
186
+ Name="Header Files"
187
+ Filter="h;hpp;hxx;hm;inl;inc;xsd"
188
+ UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
189
+ >
190
+ <File
191
+ RelativePath=".\cmd.h"
192
+ >
193
+ </File>
194
+ </Filter>
195
+ <Filter
196
+ Name="Resource Files"
197
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
198
+ UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
199
+ >
200
+ </Filter>
201
+ </Files>
202
+ <Globals>
203
+ </Globals>
204
+ </VisualStudioProject>
mosesdecoder/util/CMakeLists.txt ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 2.8.8)
2
+ #
3
+ # The KenLM cmake files make use of add_library(... OBJECTS ...)
4
+ #
5
+ # This syntax allows grouping of source files when compiling
6
+ # (effectively creating "fake" libraries based on source subdirs).
7
+ #
8
+ # This syntax was only added in cmake version 2.8.8
9
+ #
10
+ # see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
11
+
12
+
13
+ # This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
14
+
15
+
16
+ # Explicitly list the source files for this subdirectory
17
+ #
18
+ # If you add any source files to this subdirectory
19
+ # that should be included in the kenlm library,
20
+ # (this excludes any unit test files)
21
+ # you should add them to the following list:
22
+ #
23
+ # Because we do not set PARENT_SCOPE in the following definition,
24
+ # CMake files in the parent directory won't be able to access this variable.
25
+ #
26
+ set(KENLM_UTIL_SOURCE
27
+ bit_packing.cc
28
+ ersatz_progress.cc
29
+ exception.cc
30
+ file.cc
31
+ file_piece.cc
32
+ float_to_string.cc
33
+ integer_to_string.cc
34
+ mmap.cc
35
+ murmur_hash.cc
36
+ parallel_read.cc
37
+ pool.cc
38
+ read_compressed.cc
39
+ scoped.cc
40
+ string_piece.cc
41
+ usage.cc
42
+ )
43
+
44
+ # This directory has children that need to be processed
45
+ add_subdirectory(double-conversion)
46
+ add_subdirectory(stream)
47
+
48
+
49
+ # Group these objects together for later use.
50
+ #
51
+ # Given add_library(foo OBJECT ${my_foo_sources}),
52
+ # refer to these objects as $<TARGET_OBJECTS:foo>
53
+ #
54
+ add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL_STREAM_SOURCE} ${KENLM_UTIL_SOURCE})
55
+
56
+
57
+
58
+ # Only compile and run unit tests if tests should be run
59
+ if(BUILD_TESTING)
60
+
61
+ # Explicitly list the Boost test files to be compiled
62
+ set(KENLM_BOOST_TESTS_LIST
63
+ bit_packing_test
64
+ joint_sort_test
65
+ multi_intersection_test
66
+ probing_hash_table_test
67
+ read_compressed_test
68
+ sorted_uniform_test
69
+ tokenize_piece_test
70
+ )
71
+
72
+ AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
73
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
74
+ LIBRARIES ${Boost_LIBRARIES} pthread)
75
+
76
+ # file_piece_test requires an extra command line parameter
77
+ KenLMAddTest(TEST file_piece_test
78
+ DEPENDS $<TARGET_OBJECTS:kenlm_util>
79
+ LIBRARIES ${Boost_LIBRARIES} pthread
80
+ TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc)
81
+ endif()
mosesdecoder/util/Jamfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ local compressed_flags = <include>.. <define>HAVE_ZLIB ;
2
+ local compressed_deps = /top//z ;
3
+ if [ test_library "bz2" ] && [ test_header "bzlib.h" ] {
4
+ external-lib bz2 ;
5
+ compressed_flags += <define>HAVE_BZLIB ;
6
+ compressed_deps += bz2 ;
7
+ }
8
+ if [ test_library "lzma" ] && [ test_header "lzma.h" ] {
9
+ external-lib lzma ;
10
+ compressed_flags += <define>HAVE_XZLIB ;
11
+ compressed_deps += lzma ;
12
+ }
13
+
14
+ #rt is needed for clock_gettime on linux. But it's already included with threading=multi
15
+ lib rt ;
16
+
17
+ obj read_compressed.o : read_compressed.cc : $(compressed_flags) ;
18
+ alias read_compressed : read_compressed.o $(compressed_deps) ;
19
+ obj read_compressed_test.o : read_compressed_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
20
+ obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
21
+
22
+ fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ;
23
+
24
+ fakelib kenutil : [ glob *.cc : parallel_read.cc read_compressed.cc *_main.cc *_test.cc ] read_compressed parallel_read double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
25
+
26
+ exe cat_compressed : cat_compressed_main.cc kenutil ;
27
+
28
+ #Does not install this
29
+ exe probing_hash_table_benchmark : probing_hash_table_benchmark_main.cc kenutil ;
30
+
31
+ alias programs : cat_compressed ;
32
+
33
+ import testing ;
34
+
35
+ run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
36
+ for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
37
+ local name = [ MATCH "(.*)\.cc" : $(t) ] ;
38
+ unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_filesystem /top//boost_system ;
39
+ }
40
+
41
+ build-project stream ;
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8564bc029fa8f47779ac76015e129d019fb75c9c8bbda96ccb54e419e67a5c
3
+ size 1747304
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.output ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Boost.Test WARNING: token "util/file_piece.cc" does not correspond to the Boost.Test argument
2
+ and should be placed after all Boost.Test arguments and the -- separator.
3
+ For example: file_piece_test --random -- util/file_piece.cc
4
+ Running 6 test cases...
5
+
6
+ *** No errors detected
7
+
8
+ EXIT STATUS: 0
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.run ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Boost.Test WARNING: token "util/file_piece.cc" does not correspond to the Boost.Test argument
2
+ and should be placed after all Boost.Test arguments and the -- separator.
3
+ For example: file_piece_test --random -- util/file_piece.cc
4
+ Running 6 test cases...
5
+
6
+ *** No errors detected
7
+
8
+ EXIT STATUS: 0
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.test ADDED
@@ -0,0 +1 @@
 
 
1
+ passed
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing.o ADDED
Binary file (10.4 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488bd05ca0bdcd9baf07b6c4618502d8b9cdf5b1ef97a55149be622a2e33cc60
3
+ size 1701840
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.o ADDED
Binary file (109 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.passed ADDED
File without changes
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed ADDED
Binary file (839 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed_main.o ADDED
Binary file (6.75 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/ersatz_progress.o ADDED
Binary file (5.73 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/exception.o ADDED
Binary file (21.2 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file.o ADDED
Binary file (144 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece.o ADDED
Binary file (65.4 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece_test.o ADDED
Binary file (210 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/float_to_string.o ADDED
Binary file (4.61 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string.o ADDED
Binary file (12.5 kB). View file
 
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47f73d0d532194f98ca0b5de04960b79173ae4cb6291106f75af83f478b2ee94
3
+ size 1715968
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test.o ADDED
Binary file (178 kB). View file