Upload 235 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +18 -0
- mosesdecoder/search/Jamfile +1 -0
- mosesdecoder/search/applied.hh +87 -0
- mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/edge_generator.o +0 -0
- mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/nbest.o +0 -0
- mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/rule.o +0 -0
- mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/vertex.o +0 -0
- mosesdecoder/search/config.hh +38 -0
- mosesdecoder/search/context.hh +49 -0
- mosesdecoder/search/edge.hh +54 -0
- mosesdecoder/search/edge_generator.cc +116 -0
- mosesdecoder/search/edge_generator.hh +56 -0
- mosesdecoder/search/header.hh +72 -0
- mosesdecoder/search/nbest.cc +106 -0
- mosesdecoder/search/nbest.hh +80 -0
- mosesdecoder/search/rule.cc +43 -0
- mosesdecoder/search/rule.hh +25 -0
- mosesdecoder/search/types.hh +31 -0
- mosesdecoder/search/vertex.cc +204 -0
- mosesdecoder/search/vertex.hh +196 -0
- mosesdecoder/search/vertex_generator.hh +75 -0
- mosesdecoder/symal/Jamfile +2 -0
- mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/cmd.o +0 -0
- mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal +0 -0
- mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal.o +0 -0
- mosesdecoder/symal/cmd.c +642 -0
- mosesdecoder/symal/cmd.h +35 -0
- mosesdecoder/symal/symal.cpp +518 -0
- mosesdecoder/symal/symal.vcproj +204 -0
- mosesdecoder/util/CMakeLists.txt +81 -0
- mosesdecoder/util/Jamfile +41 -0
- mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test +3 -0
- mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.output +8 -0
- mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.run +8 -0
- mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.test +1 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test +3 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.passed +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed_main.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/ersatz_progress.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/exception.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece_test.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/float_to_string.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string.o +0 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test +3 -0
- mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test.o +0 -0
.gitattributes
CHANGED
|
@@ -126,3 +126,21 @@ mosesdecoder/phrase-extract/syntax-common/bin/gcc-9/release/link-static/threadin
|
|
| 126 |
mosesdecoder/phrase-extract/syntax-common/bin/gcc-9/release/link-static/threading-multi/tree_test filter=lfs diff=lfs merge=lfs -text
|
| 127 |
mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/CreateProbingPT filter=lfs diff=lfs merge=lfs -text
|
| 128 |
mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/libprobingpt.a filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
mosesdecoder/phrase-extract/syntax-common/bin/gcc-9/release/link-static/threading-multi/tree_test filter=lfs diff=lfs merge=lfs -text
|
| 127 |
mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/CreateProbingPT filter=lfs diff=lfs merge=lfs -text
|
| 128 |
mosesdecoder/probingpt/bin/gcc-9/release/link-static/threading-multi/libprobingpt.a filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/joint_sort_test filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/multi_intersection_test filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/pcqueue_test filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/probing_hash_table_test filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/random_test filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/sized_iterator_test filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/sorted_uniform_test filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/string_stream_test filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/tempfile_test filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/tokenize_piece_test filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/tokenize_test filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/io_test filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/rewindable_stream_test filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/sort_test filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
mosesdecoder/util/stream/bin/gcc-9/release/link-static/threading-multi/stream_test filter=lfs diff=lfs merge=lfs -text
|
mosesdecoder/search/Jamfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
fakelib search : edge_generator.cc nbest.cc rule.cc vertex.cc ../lm//kenlm ../util//kenutil /top//boost_system : : : <include>.. ;
|
mosesdecoder/search/applied.hh
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_APPLIED__
|
| 2 |
+
#define SEARCH_APPLIED__
|
| 3 |
+
|
| 4 |
+
#include "search/edge.hh"
|
| 5 |
+
#include "search/header.hh"
|
| 6 |
+
#include "util/pool.hh"
|
| 7 |
+
|
| 8 |
+
#include <cmath>
|
| 9 |
+
|
| 10 |
+
namespace search {
|
| 11 |
+
|
| 12 |
+
// A full hypothesis: a score, arity of the rule, a pointer to the decoder's rule (Note), and pointers to non-terminals that were substituted.
|
| 13 |
+
template <class Below> class GenericApplied : public Header {
|
| 14 |
+
public:
|
| 15 |
+
GenericApplied() {}
|
| 16 |
+
|
| 17 |
+
GenericApplied(void *location, PartialEdge partial)
|
| 18 |
+
: Header(location) {
|
| 19 |
+
memcpy(Base(), partial.Base(), kHeaderSize);
|
| 20 |
+
Below *child_out = Children();
|
| 21 |
+
const PartialVertex *part = partial.NT();
|
| 22 |
+
const PartialVertex *const part_end_loop = part + partial.GetArity();
|
| 23 |
+
for (; part != part_end_loop; ++part, ++child_out)
|
| 24 |
+
*child_out = Below(part->End());
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
GenericApplied(void *location, Score score, Arity arity, Note note, Moses::Range range) : Header(location, arity) {
|
| 28 |
+
SetScore(score);
|
| 29 |
+
SetNote(note);
|
| 30 |
+
SetRange(range);
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
explicit GenericApplied(History from) : Header(from) {}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
// These are arrays of length GetArity().
|
| 37 |
+
Below *Children() {
|
| 38 |
+
return reinterpret_cast<Below*>(After());
|
| 39 |
+
}
|
| 40 |
+
const Below *Children() const {
|
| 41 |
+
return reinterpret_cast<const Below*>(After());
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
static std::size_t Size(Arity arity) {
|
| 45 |
+
return kHeaderSize + arity * sizeof(const Below);
|
| 46 |
+
}
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
// Applied rule that references itself.
|
| 50 |
+
class Applied : public GenericApplied<Applied> {
|
| 51 |
+
private:
|
| 52 |
+
typedef GenericApplied<Applied> P;
|
| 53 |
+
|
| 54 |
+
public:
|
| 55 |
+
Applied() {}
|
| 56 |
+
Applied(void *location, PartialEdge partial) : P(location, partial) {}
|
| 57 |
+
Applied(History from) : P(from) {}
|
| 58 |
+
};
|
| 59 |
+
|
| 60 |
+
// How to build single-best hypotheses.
|
| 61 |
+
class SingleBest {
|
| 62 |
+
public:
|
| 63 |
+
typedef PartialEdge Combine;
|
| 64 |
+
|
| 65 |
+
void Add(PartialEdge &existing, PartialEdge add) const {
|
| 66 |
+
if (!existing.Valid() || existing.GetScore() < add.GetScore())
|
| 67 |
+
existing = add;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
NBestComplete Complete(PartialEdge partial) {
|
| 71 |
+
if (!partial.Valid())
|
| 72 |
+
return NBestComplete(NULL, lm::ngram::ChartState(), -INFINITY);
|
| 73 |
+
void *place_final = pool_.Allocate(Applied::Size(partial.GetArity()));
|
| 74 |
+
Applied(place_final, partial);
|
| 75 |
+
return NBestComplete(
|
| 76 |
+
place_final,
|
| 77 |
+
partial.CompletedState(),
|
| 78 |
+
partial.GetScore());
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
private:
|
| 82 |
+
util::Pool pool_;
|
| 83 |
+
};
|
| 84 |
+
|
| 85 |
+
} // namespace search
|
| 86 |
+
|
| 87 |
+
#endif // SEARCH_APPLIED__
|
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/edge_generator.o
ADDED
|
Binary file (44.9 kB). View file
|
|
|
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/nbest.o
ADDED
|
Binary file (14.1 kB). View file
|
|
|
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/rule.o
ADDED
|
Binary file (11 kB). View file
|
|
|
mosesdecoder/search/bin/gcc-9/release/link-static/threading-multi/vertex.o
ADDED
|
Binary file (24 kB). View file
|
|
|
mosesdecoder/search/config.hh
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_CONFIG__
|
| 2 |
+
#define SEARCH_CONFIG__
|
| 3 |
+
|
| 4 |
+
#include "search/types.hh"
|
| 5 |
+
|
| 6 |
+
namespace search {
|
| 7 |
+
|
| 8 |
+
struct NBestConfig {
|
| 9 |
+
explicit NBestConfig(unsigned int in_size) {
|
| 10 |
+
keep = in_size;
|
| 11 |
+
size = in_size;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
unsigned int keep, size;
|
| 15 |
+
};
|
| 16 |
+
|
| 17 |
+
class Config {
|
| 18 |
+
public:
|
| 19 |
+
Config(Score lm_weight, unsigned int pop_limit, const NBestConfig &nbest) :
|
| 20 |
+
lm_weight_(lm_weight), pop_limit_(pop_limit), nbest_(nbest) {}
|
| 21 |
+
|
| 22 |
+
Score LMWeight() const { return lm_weight_; }
|
| 23 |
+
|
| 24 |
+
unsigned int PopLimit() const { return pop_limit_; }
|
| 25 |
+
|
| 26 |
+
const NBestConfig &GetNBest() const { return nbest_; }
|
| 27 |
+
|
| 28 |
+
private:
|
| 29 |
+
Score lm_weight_;
|
| 30 |
+
|
| 31 |
+
unsigned int pop_limit_;
|
| 32 |
+
|
| 33 |
+
NBestConfig nbest_;
|
| 34 |
+
};
|
| 35 |
+
|
| 36 |
+
} // namespace search
|
| 37 |
+
|
| 38 |
+
#endif // SEARCH_CONFIG__
|
mosesdecoder/search/context.hh
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_CONTEXT__
|
| 2 |
+
#define SEARCH_CONTEXT__
|
| 3 |
+
|
| 4 |
+
#include "search/config.hh"
|
| 5 |
+
#include "search/vertex.hh"
|
| 6 |
+
|
| 7 |
+
#include <boost/pool/object_pool.hpp>
|
| 8 |
+
|
| 9 |
+
namespace search {
|
| 10 |
+
|
| 11 |
+
class ContextBase {
|
| 12 |
+
public:
|
| 13 |
+
explicit ContextBase(const Config &config) : config_(config) {}
|
| 14 |
+
|
| 15 |
+
VertexNode *NewVertexNode() {
|
| 16 |
+
VertexNode *ret = vertex_node_pool_.construct();
|
| 17 |
+
assert(ret);
|
| 18 |
+
return ret;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
void DeleteVertexNode(VertexNode *node) {
|
| 22 |
+
vertex_node_pool_.destroy(node);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
unsigned int PopLimit() const { return config_.PopLimit(); }
|
| 26 |
+
|
| 27 |
+
Score LMWeight() const { return config_.LMWeight(); }
|
| 28 |
+
|
| 29 |
+
const Config &GetConfig() const { return config_; }
|
| 30 |
+
|
| 31 |
+
private:
|
| 32 |
+
boost::object_pool<VertexNode> vertex_node_pool_;
|
| 33 |
+
|
| 34 |
+
Config config_;
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
+
template <class Model> class Context : public ContextBase {
|
| 38 |
+
public:
|
| 39 |
+
Context(const Config &config, const Model &model) : ContextBase(config), model_(model) {}
|
| 40 |
+
|
| 41 |
+
const Model &LanguageModel() const { return model_; }
|
| 42 |
+
|
| 43 |
+
private:
|
| 44 |
+
const Model &model_;
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
} // namespace search
|
| 48 |
+
|
| 49 |
+
#endif // SEARCH_CONTEXT__
|
mosesdecoder/search/edge.hh
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_EDGE__
|
| 2 |
+
#define SEARCH_EDGE__
|
| 3 |
+
|
| 4 |
+
#include "lm/state.hh"
|
| 5 |
+
#include "search/header.hh"
|
| 6 |
+
#include "search/types.hh"
|
| 7 |
+
#include "search/vertex.hh"
|
| 8 |
+
#include "util/pool.hh"
|
| 9 |
+
|
| 10 |
+
#include <functional>
|
| 11 |
+
|
| 12 |
+
#include <stdint.h>
|
| 13 |
+
|
| 14 |
+
namespace search {
|
| 15 |
+
|
| 16 |
+
// Copyable, but the copy will be shallow.
|
| 17 |
+
class PartialEdge : public Header {
|
| 18 |
+
public:
|
| 19 |
+
// Allow default construction for STL.
|
| 20 |
+
PartialEdge() {}
|
| 21 |
+
|
| 22 |
+
PartialEdge(util::Pool &pool, Arity arity)
|
| 23 |
+
: Header(pool.Allocate(Size(arity, arity + 1)), arity) {}
|
| 24 |
+
|
| 25 |
+
PartialEdge(util::Pool &pool, Arity arity, Arity chart_states)
|
| 26 |
+
: Header(pool.Allocate(Size(arity, chart_states)), arity) {}
|
| 27 |
+
|
| 28 |
+
// Non-terminals
|
| 29 |
+
const PartialVertex *NT() const {
|
| 30 |
+
return reinterpret_cast<const PartialVertex*>(After());
|
| 31 |
+
}
|
| 32 |
+
PartialVertex *NT() {
|
| 33 |
+
return reinterpret_cast<PartialVertex*>(After());
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
const lm::ngram::ChartState &CompletedState() const {
|
| 37 |
+
return *Between();
|
| 38 |
+
}
|
| 39 |
+
const lm::ngram::ChartState *Between() const {
|
| 40 |
+
return reinterpret_cast<const lm::ngram::ChartState*>(After() + GetArity() * sizeof(PartialVertex));
|
| 41 |
+
}
|
| 42 |
+
lm::ngram::ChartState *Between() {
|
| 43 |
+
return reinterpret_cast<lm::ngram::ChartState*>(After() + GetArity() * sizeof(PartialVertex));
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
private:
|
| 47 |
+
static std::size_t Size(Arity arity, Arity chart_states) {
|
| 48 |
+
return kHeaderSize + arity * sizeof(PartialVertex) + chart_states * sizeof(lm::ngram::ChartState);
|
| 49 |
+
}
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
} // namespace search
|
| 54 |
+
#endif // SEARCH_EDGE__
|
mosesdecoder/search/edge_generator.cc
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "search/edge_generator.hh"
|
| 2 |
+
|
| 3 |
+
#include "lm/left.hh"
|
| 4 |
+
#include "lm/model.hh"
|
| 5 |
+
#include "lm/partial.hh"
|
| 6 |
+
#include "search/context.hh"
|
| 7 |
+
#include "search/vertex.hh"
|
| 8 |
+
|
| 9 |
+
#include <numeric>
|
| 10 |
+
|
| 11 |
+
namespace search {
|
| 12 |
+
|
| 13 |
+
namespace {
|
| 14 |
+
|
| 15 |
+
template <class Model> void FastScore(const Context<Model> &context, Arity victim, Arity before_idx, Arity incomplete, const PartialVertex &previous_vertex, PartialEdge update) {
|
| 16 |
+
lm::ngram::ChartState *between = update.Between();
|
| 17 |
+
lm::ngram::ChartState *before = &between[before_idx], *after = &between[before_idx + 1];
|
| 18 |
+
|
| 19 |
+
float adjustment = 0.0;
|
| 20 |
+
const lm::ngram::ChartState &previous_reveal = previous_vertex.State();
|
| 21 |
+
const PartialVertex &update_nt = update.NT()[victim];
|
| 22 |
+
const lm::ngram::ChartState &update_reveal = update_nt.State();
|
| 23 |
+
if ((update_reveal.left.length > previous_reveal.left.length) || (update_reveal.left.full && !previous_reveal.left.full)) {
|
| 24 |
+
adjustment += lm::ngram::RevealAfter(context.LanguageModel(), before->left, before->right, update_reveal.left, previous_reveal.left.length);
|
| 25 |
+
}
|
| 26 |
+
if ((update_reveal.right.length > previous_reveal.right.length) || (update_nt.RightFull() && !previous_vertex.RightFull())) {
|
| 27 |
+
adjustment += lm::ngram::RevealBefore(context.LanguageModel(), update_reveal.right, previous_reveal.right.length, update_nt.RightFull(), after->left, after->right);
|
| 28 |
+
}
|
| 29 |
+
if (update_nt.Complete()) {
|
| 30 |
+
if (update_reveal.left.full) {
|
| 31 |
+
before->left.full = true;
|
| 32 |
+
} else {
|
| 33 |
+
assert(update_reveal.left.length == update_reveal.right.length);
|
| 34 |
+
adjustment += lm::ngram::Subsume(context.LanguageModel(), before->left, before->right, after->left, after->right, update_reveal.left.length);
|
| 35 |
+
}
|
| 36 |
+
before->right = after->right;
|
| 37 |
+
// Shift the others shifted one down, covering after.
|
| 38 |
+
for (lm::ngram::ChartState *cover = after; cover < between + incomplete; ++cover) {
|
| 39 |
+
*cover = *(cover + 1);
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
update.SetScore(update.GetScore() + adjustment * context.LMWeight());
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
} // namespace
|
| 46 |
+
|
| 47 |
+
template <class Model> PartialEdge EdgeGenerator::Pop(Context<Model> &context) {
|
| 48 |
+
assert(!generate_.empty());
|
| 49 |
+
PartialEdge top = generate_.top();
|
| 50 |
+
generate_.pop();
|
| 51 |
+
PartialVertex *const top_nt = top.NT();
|
| 52 |
+
const Arity arity = top.GetArity();
|
| 53 |
+
|
| 54 |
+
Arity victim = 0;
|
| 55 |
+
Arity victim_completed;
|
| 56 |
+
Arity incomplete;
|
| 57 |
+
unsigned char lowest_niceness = 255;
|
| 58 |
+
// Select victim or return if complete.
|
| 59 |
+
{
|
| 60 |
+
Arity completed = 0;
|
| 61 |
+
for (Arity i = 0; i != arity; ++i) {
|
| 62 |
+
if (top_nt[i].Complete()) {
|
| 63 |
+
++completed;
|
| 64 |
+
} else if (top_nt[i].Niceness() < lowest_niceness) {
|
| 65 |
+
lowest_niceness = top_nt[i].Niceness();
|
| 66 |
+
victim = i;
|
| 67 |
+
victim_completed = completed;
|
| 68 |
+
}
|
| 69 |
+
}
|
| 70 |
+
if (lowest_niceness == 255) {
|
| 71 |
+
return top;
|
| 72 |
+
}
|
| 73 |
+
incomplete = arity - completed;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
PartialVertex old_value(top_nt[victim]);
|
| 77 |
+
PartialVertex alternate_changed;
|
| 78 |
+
if (top_nt[victim].Split(alternate_changed)) {
|
| 79 |
+
PartialEdge alternate(partial_edge_pool_, arity, incomplete + 1);
|
| 80 |
+
alternate.SetScore(top.GetScore() + alternate_changed.Bound() - old_value.Bound());
|
| 81 |
+
|
| 82 |
+
alternate.SetNote(top.GetNote());
|
| 83 |
+
alternate.SetRange(top.GetRange());
|
| 84 |
+
|
| 85 |
+
PartialVertex *alternate_nt = alternate.NT();
|
| 86 |
+
for (Arity i = 0; i < victim; ++i) alternate_nt[i] = top_nt[i];
|
| 87 |
+
alternate_nt[victim] = alternate_changed;
|
| 88 |
+
for (Arity i = victim + 1; i < arity; ++i) alternate_nt[i] = top_nt[i];
|
| 89 |
+
|
| 90 |
+
memcpy(alternate.Between(), top.Between(), sizeof(lm::ngram::ChartState) * (incomplete + 1));
|
| 91 |
+
|
| 92 |
+
// TODO: dedupe?
|
| 93 |
+
generate_.push(alternate);
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
#ifndef NDEBUG
|
| 97 |
+
Score before = top.GetScore();
|
| 98 |
+
#endif
|
| 99 |
+
// top is now the continuation.
|
| 100 |
+
FastScore(context, victim, victim - victim_completed, incomplete, old_value, top);
|
| 101 |
+
// TODO: dedupe?
|
| 102 |
+
generate_.push(top);
|
| 103 |
+
assert(lowest_niceness != 254 || top.GetScore() == before);
|
| 104 |
+
|
| 105 |
+
// Invalid indicates no new hypothesis generated.
|
| 106 |
+
return PartialEdge();
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::RestProbingModel> &context);
|
| 110 |
+
template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::ProbingModel> &context);
|
| 111 |
+
template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::TrieModel> &context);
|
| 112 |
+
template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::QuantTrieModel> &context);
|
| 113 |
+
template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::ArrayTrieModel> &context);
|
| 114 |
+
template PartialEdge EdgeGenerator::Pop(Context<lm::ngram::QuantArrayTrieModel> &context);
|
| 115 |
+
|
| 116 |
+
} // namespace search
|
mosesdecoder/search/edge_generator.hh
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_EDGE_GENERATOR__
|
| 2 |
+
#define SEARCH_EDGE_GENERATOR__
|
| 3 |
+
|
| 4 |
+
#include "search/edge.hh"
|
| 5 |
+
#include "search/types.hh"
|
| 6 |
+
|
| 7 |
+
#include <queue>
|
| 8 |
+
|
| 9 |
+
namespace lm {
|
| 10 |
+
namespace ngram {
|
| 11 |
+
struct ChartState;
|
| 12 |
+
} // namespace ngram
|
| 13 |
+
} // namespace lm
|
| 14 |
+
|
| 15 |
+
namespace search {
|
| 16 |
+
|
| 17 |
+
template <class Model> class Context;
|
| 18 |
+
|
| 19 |
+
class EdgeGenerator {
|
| 20 |
+
public:
|
| 21 |
+
EdgeGenerator() {}
|
| 22 |
+
|
| 23 |
+
PartialEdge AllocateEdge(Arity arity) {
|
| 24 |
+
return PartialEdge(partial_edge_pool_, arity);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
void AddEdge(PartialEdge edge) {
|
| 28 |
+
generate_.push(edge);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
bool Empty() const { return generate_.empty(); }
|
| 32 |
+
|
| 33 |
+
// Pop. If there's a complete hypothesis, return it. Otherwise return an invalid PartialEdge.
|
| 34 |
+
template <class Model> PartialEdge Pop(Context<Model> &context);
|
| 35 |
+
|
| 36 |
+
template <class Model, class Output> void Search(Context<Model> &context, Output &output) {
|
| 37 |
+
unsigned to_pop = context.PopLimit();
|
| 38 |
+
while (to_pop > 0 && !generate_.empty()) {
|
| 39 |
+
PartialEdge got(Pop(context));
|
| 40 |
+
if (got.Valid()) {
|
| 41 |
+
output.NewHypothesis(got);
|
| 42 |
+
--to_pop;
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
output.FinishedSearch();
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
private:
|
| 49 |
+
util::Pool partial_edge_pool_;
|
| 50 |
+
|
| 51 |
+
typedef std::priority_queue<PartialEdge> Generate;
|
| 52 |
+
Generate generate_;
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
} // namespace search
|
| 56 |
+
#endif // SEARCH_EDGE_GENERATOR__
|
mosesdecoder/search/header.hh
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_HEADER__
|
| 2 |
+
#define SEARCH_HEADER__
|
| 3 |
+
|
| 4 |
+
// Header consisting of Score, Arity, Note and Range
|
| 5 |
+
|
| 6 |
+
#include "search/types.hh"
|
| 7 |
+
#include "moses/Range.h"
|
| 8 |
+
|
| 9 |
+
#include <stdint.h>
|
| 10 |
+
|
| 11 |
+
namespace search {
|
| 12 |
+
|
| 13 |
+
// Copying is shallow.
|
| 14 |
+
class Header {
|
| 15 |
+
public:
|
| 16 |
+
bool Valid() const { return base_; }
|
| 17 |
+
|
| 18 |
+
Score GetScore() const {
|
| 19 |
+
return *reinterpret_cast<const float*>(base_);
|
| 20 |
+
}
|
| 21 |
+
void SetScore(Score to) {
|
| 22 |
+
*reinterpret_cast<float*>(base_) = to;
|
| 23 |
+
}
|
| 24 |
+
bool operator<(const Header &other) const {
|
| 25 |
+
return GetScore() < other.GetScore();
|
| 26 |
+
}
|
| 27 |
+
bool operator>(const Header &other) const {
|
| 28 |
+
return GetScore() > other.GetScore();
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
Arity GetArity() const {
|
| 32 |
+
return *reinterpret_cast<const Arity*>(base_ + sizeof(Score));
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
Note GetNote() const {
|
| 36 |
+
return *reinterpret_cast<const Note*>(base_ + sizeof(Score) + sizeof(Arity));
|
| 37 |
+
}
|
| 38 |
+
void SetNote(Note to) {
|
| 39 |
+
*reinterpret_cast<Note*>(base_ + sizeof(Score) + sizeof(Arity)) = to;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
Moses::Range GetRange() const {
|
| 43 |
+
return *reinterpret_cast<const Moses::Range*>(base_ + sizeof(Score) + sizeof(Arity) + sizeof(Note));
|
| 44 |
+
}
|
| 45 |
+
void SetRange(Moses::Range to) {
|
| 46 |
+
*reinterpret_cast<Moses::Range*>(base_ + sizeof(Score) + sizeof(Arity) + sizeof(Note)) = to;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
uint8_t *Base() { return base_; }
|
| 50 |
+
const uint8_t *Base() const { return base_; }
|
| 51 |
+
|
| 52 |
+
protected:
|
| 53 |
+
Header() : base_(NULL) {}
|
| 54 |
+
|
| 55 |
+
explicit Header(void *base) : base_(static_cast<uint8_t*>(base)) {}
|
| 56 |
+
|
| 57 |
+
Header(void *base, Arity arity) : base_(static_cast<uint8_t*>(base)) {
|
| 58 |
+
*reinterpret_cast<Arity*>(base_ + sizeof(Score)) = arity;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
static const std::size_t kHeaderSize = sizeof(Score) + sizeof(Arity) + sizeof(Note) + sizeof(Moses::Range);
|
| 62 |
+
|
| 63 |
+
uint8_t *After() { return base_ + kHeaderSize; }
|
| 64 |
+
const uint8_t *After() const { return base_ + kHeaderSize; }
|
| 65 |
+
|
| 66 |
+
private:
|
| 67 |
+
uint8_t *base_;
|
| 68 |
+
};
|
| 69 |
+
|
| 70 |
+
} // namespace search
|
| 71 |
+
|
| 72 |
+
#endif // SEARCH_HEADER__
|
mosesdecoder/search/nbest.cc
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "search/nbest.hh"
|
| 2 |
+
|
| 3 |
+
#include "util/pool.hh"
|
| 4 |
+
#include "moses/Util.h"
|
| 5 |
+
|
| 6 |
+
#include <algorithm>
|
| 7 |
+
#include <functional>
|
| 8 |
+
#include <queue>
|
| 9 |
+
#include <cassert>
|
| 10 |
+
#include <cmath>
|
| 11 |
+
|
| 12 |
+
namespace search {
|
| 13 |
+
|
| 14 |
+
NBestList::NBestList(std::vector<PartialEdge> &partials, util::Pool &entry_pool, std::size_t keep) {
|
| 15 |
+
assert(!partials.empty());
|
| 16 |
+
std::vector<PartialEdge>::iterator end;
|
| 17 |
+
if (partials.size() > keep) {
|
| 18 |
+
end = partials.begin() + keep;
|
| 19 |
+
NTH_ELEMENT4(partials.begin(), end, partials.end(), std::greater<PartialEdge>());
|
| 20 |
+
} else {
|
| 21 |
+
end = partials.end();
|
| 22 |
+
}
|
| 23 |
+
for (std::vector<PartialEdge>::const_iterator i(partials.begin()); i != end; ++i) {
|
| 24 |
+
queue_.push(QueueEntry(entry_pool.Allocate(QueueEntry::Size(i->GetArity())), *i));
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
Score NBestList::TopAfterConstructor() const {
|
| 29 |
+
assert(revealed_.empty());
|
| 30 |
+
return queue_.top().GetScore();
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
const std::vector<Applied> &NBestList::Extract(util::Pool &pool, std::size_t n) {
|
| 34 |
+
while (revealed_.size() < n && !queue_.empty()) {
|
| 35 |
+
MoveTop(pool);
|
| 36 |
+
}
|
| 37 |
+
return revealed_;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
Score NBestList::Visit(util::Pool &pool, std::size_t index) {
|
| 41 |
+
if (index + 1 < revealed_.size())
|
| 42 |
+
return revealed_[index + 1].GetScore() - revealed_[index].GetScore();
|
| 43 |
+
if (queue_.empty())
|
| 44 |
+
return -INFINITY;
|
| 45 |
+
if (index + 1 == revealed_.size())
|
| 46 |
+
return queue_.top().GetScore() - revealed_[index].GetScore();
|
| 47 |
+
assert(index == revealed_.size());
|
| 48 |
+
|
| 49 |
+
MoveTop(pool);
|
| 50 |
+
|
| 51 |
+
if (queue_.empty()) return -INFINITY;
|
| 52 |
+
return queue_.top().GetScore() - revealed_[index].GetScore();
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
Applied NBestList::Get(util::Pool &pool, std::size_t index) {
|
| 56 |
+
assert(index <= revealed_.size());
|
| 57 |
+
if (index == revealed_.size()) MoveTop(pool);
|
| 58 |
+
return revealed_[index];
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
void NBestList::MoveTop(util::Pool &pool) {
|
| 62 |
+
assert(!queue_.empty());
|
| 63 |
+
QueueEntry entry(queue_.top());
|
| 64 |
+
queue_.pop();
|
| 65 |
+
RevealedRef *const children_begin = entry.Children();
|
| 66 |
+
RevealedRef *const children_end = children_begin + entry.GetArity();
|
| 67 |
+
Score basis = entry.GetScore();
|
| 68 |
+
for (RevealedRef *child = children_begin; child != children_end; ++child) {
|
| 69 |
+
Score change = child->in_->Visit(pool, child->index_);
|
| 70 |
+
if (change != -INFINITY) {
|
| 71 |
+
assert(change < 0.001);
|
| 72 |
+
QueueEntry new_entry(pool.Allocate(QueueEntry::Size(entry.GetArity())), basis + change, entry.GetArity(), entry.GetNote(), entry.GetRange());
|
| 73 |
+
std::copy(children_begin, child, new_entry.Children());
|
| 74 |
+
RevealedRef *update = new_entry.Children() + (child - children_begin);
|
| 75 |
+
update->in_ = child->in_;
|
| 76 |
+
update->index_ = child->index_ + 1;
|
| 77 |
+
std::copy(child + 1, children_end, update + 1);
|
| 78 |
+
queue_.push(new_entry);
|
| 79 |
+
}
|
| 80 |
+
// Gesmundo, A. and Henderson, J. Faster Cube Pruning, IWSLT 2010.
|
| 81 |
+
if (child->index_) break;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
// Convert QueueEntry to Applied. This leaves some unused memory.
|
| 85 |
+
void *overwrite = entry.Children();
|
| 86 |
+
for (unsigned int i = 0; i < entry.GetArity(); ++i) {
|
| 87 |
+
RevealedRef from(*(static_cast<const RevealedRef*>(overwrite) + i));
|
| 88 |
+
*(static_cast<Applied*>(overwrite) + i) = from.in_->Get(pool, from.index_);
|
| 89 |
+
}
|
| 90 |
+
revealed_.push_back(Applied(entry.Base()));
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
NBestComplete NBest::Complete(std::vector<PartialEdge> &partials) {
|
| 94 |
+
assert(!partials.empty());
|
| 95 |
+
NBestList *list = list_pool_.construct(partials, entry_pool_, config_.keep);
|
| 96 |
+
return NBestComplete(
|
| 97 |
+
list,
|
| 98 |
+
partials.front().CompletedState(), // All partials have the same state
|
| 99 |
+
list->TopAfterConstructor());
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
const std::vector<Applied> &NBest::Extract(History history) {
|
| 103 |
+
return static_cast<NBestList*>(history)->Extract(entry_pool_, config_.size);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
} // namespace search
|
mosesdecoder/search/nbest.hh
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_NBEST__
|
| 2 |
+
#define SEARCH_NBEST__
|
| 3 |
+
|
| 4 |
+
#include "search/applied.hh"
|
| 5 |
+
#include "search/config.hh"
|
| 6 |
+
#include "search/edge.hh"
|
| 7 |
+
|
| 8 |
+
#include <boost/pool/object_pool.hpp>
|
| 9 |
+
|
| 10 |
+
#include <cstddef>
|
| 11 |
+
#include <queue>
|
| 12 |
+
#include <vector>
|
| 13 |
+
#include <cassert>
|
| 14 |
+
|
| 15 |
+
namespace search {
|
| 16 |
+
|
| 17 |
+
class NBestList;
|
| 18 |
+
|
| 19 |
+
class NBestList {
|
| 20 |
+
private:
|
| 21 |
+
class RevealedRef {
|
| 22 |
+
public:
|
| 23 |
+
explicit RevealedRef(History history)
|
| 24 |
+
: in_(static_cast<NBestList*>(history)), index_(0) {}
|
| 25 |
+
|
| 26 |
+
private:
|
| 27 |
+
friend class NBestList;
|
| 28 |
+
|
| 29 |
+
NBestList *in_;
|
| 30 |
+
std::size_t index_;
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
typedef GenericApplied<RevealedRef> QueueEntry;
|
| 34 |
+
|
| 35 |
+
public:
|
| 36 |
+
NBestList(std::vector<PartialEdge> &existing, util::Pool &entry_pool, std::size_t keep);
|
| 37 |
+
|
| 38 |
+
Score TopAfterConstructor() const;
|
| 39 |
+
|
| 40 |
+
const std::vector<Applied> &Extract(util::Pool &pool, std::size_t n);
|
| 41 |
+
|
| 42 |
+
private:
|
| 43 |
+
Score Visit(util::Pool &pool, std::size_t index);
|
| 44 |
+
|
| 45 |
+
Applied Get(util::Pool &pool, std::size_t index);
|
| 46 |
+
|
| 47 |
+
void MoveTop(util::Pool &pool);
|
| 48 |
+
|
| 49 |
+
typedef std::vector<Applied> Revealed;
|
| 50 |
+
Revealed revealed_;
|
| 51 |
+
|
| 52 |
+
typedef std::priority_queue<QueueEntry> Queue;
|
| 53 |
+
Queue queue_;
|
| 54 |
+
};
|
| 55 |
+
|
| 56 |
+
class NBest {
|
| 57 |
+
public:
|
| 58 |
+
typedef std::vector<PartialEdge> Combine;
|
| 59 |
+
|
| 60 |
+
explicit NBest(const NBestConfig &config) : config_(config) {}
|
| 61 |
+
|
| 62 |
+
void Add(std::vector<PartialEdge> &existing, PartialEdge addition) const {
|
| 63 |
+
existing.push_back(addition);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
NBestComplete Complete(std::vector<PartialEdge> &partials);
|
| 67 |
+
|
| 68 |
+
const std::vector<Applied> &Extract(History root);
|
| 69 |
+
|
| 70 |
+
private:
|
| 71 |
+
const NBestConfig config_;
|
| 72 |
+
|
| 73 |
+
boost::object_pool<NBestList> list_pool_;
|
| 74 |
+
|
| 75 |
+
util::Pool entry_pool_;
|
| 76 |
+
};
|
| 77 |
+
|
| 78 |
+
} // namespace search
|
| 79 |
+
|
| 80 |
+
#endif // SEARCH_NBEST__
|
mosesdecoder/search/rule.cc
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "search/rule.hh"
|
| 2 |
+
|
| 3 |
+
#include "lm/model.hh"
|
| 4 |
+
#include "search/context.hh"
|
| 5 |
+
|
| 6 |
+
#include <ostream>
|
| 7 |
+
|
| 8 |
+
#include <cmath>
|
| 9 |
+
|
| 10 |
+
namespace search {
|
| 11 |
+
|
| 12 |
+
template <class Model> ScoreRuleRet ScoreRule(const Model &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing) {
|
| 13 |
+
ScoreRuleRet ret;
|
| 14 |
+
ret.prob = 0.0;
|
| 15 |
+
ret.oov = 0;
|
| 16 |
+
const lm::WordIndex oov = model.GetVocabulary().NotFound(), bos = model.GetVocabulary().BeginSentence();
|
| 17 |
+
lm::ngram::RuleScore<Model> scorer(model, *(writing++));
|
| 18 |
+
std::vector<lm::WordIndex>::const_iterator word = words.begin();
|
| 19 |
+
if (word != words.end() && *word == bos) {
|
| 20 |
+
scorer.BeginSentence();
|
| 21 |
+
++word;
|
| 22 |
+
}
|
| 23 |
+
for (; word != words.end(); ++word) {
|
| 24 |
+
if (*word == kNonTerminal) {
|
| 25 |
+
ret.prob += scorer.Finish();
|
| 26 |
+
scorer.Reset(*(writing++));
|
| 27 |
+
} else {
|
| 28 |
+
if (*word == oov) ++ret.oov;
|
| 29 |
+
scorer.Terminal(*word);
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
ret.prob += scorer.Finish();
|
| 33 |
+
return ret;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
template ScoreRuleRet ScoreRule(const lm::ngram::RestProbingModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
|
| 37 |
+
template ScoreRuleRet ScoreRule(const lm::ngram::ProbingModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
|
| 38 |
+
template ScoreRuleRet ScoreRule(const lm::ngram::TrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
|
| 39 |
+
template ScoreRuleRet ScoreRule(const lm::ngram::QuantTrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
|
| 40 |
+
template ScoreRuleRet ScoreRule(const lm::ngram::ArrayTrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
|
| 41 |
+
template ScoreRuleRet ScoreRule(const lm::ngram::QuantArrayTrieModel &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *writing);
|
| 42 |
+
|
| 43 |
+
} // namespace search
|
mosesdecoder/search/rule.hh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_RULE__
|
| 2 |
+
#define SEARCH_RULE__
|
| 3 |
+
|
| 4 |
+
#include "lm/left.hh"
|
| 5 |
+
#include "lm/word_index.hh"
|
| 6 |
+
#include "search/types.hh"
|
| 7 |
+
|
| 8 |
+
#include <vector>
|
| 9 |
+
|
| 10 |
+
namespace search {
|
| 11 |
+
|
| 12 |
+
const lm::WordIndex kNonTerminal = lm::kMaxWordIndex;
|
| 13 |
+
|
| 14 |
+
struct ScoreRuleRet {
|
| 15 |
+
Score prob;
|
| 16 |
+
unsigned int oov;
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
// Pass <s> and </s> normally.
|
| 20 |
+
// Indicate non-terminals with kNonTerminal.
|
| 21 |
+
template <class Model> ScoreRuleRet ScoreRule(const Model &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *state_out);
|
| 22 |
+
|
| 23 |
+
} // namespace search
|
| 24 |
+
|
| 25 |
+
#endif // SEARCH_RULE__
|
mosesdecoder/search/types.hh
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_TYPES__
|
| 2 |
+
#define SEARCH_TYPES__
|
| 3 |
+
|
| 4 |
+
#include <stdint.h>
|
| 5 |
+
|
| 6 |
+
namespace lm { namespace ngram { struct ChartState; } }
|
| 7 |
+
|
| 8 |
+
namespace search {
|
| 9 |
+
|
| 10 |
+
typedef float Score;
|
| 11 |
+
|
| 12 |
+
typedef uint32_t Arity;
|
| 13 |
+
|
| 14 |
+
union Note {
|
| 15 |
+
const void *vp;
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
typedef void *History;
|
| 19 |
+
|
| 20 |
+
struct NBestComplete {
|
| 21 |
+
NBestComplete(History in_history, const lm::ngram::ChartState &in_state, Score in_score)
|
| 22 |
+
: history(in_history), state(&in_state), score(in_score) {}
|
| 23 |
+
|
| 24 |
+
History history;
|
| 25 |
+
const lm::ngram::ChartState *state;
|
| 26 |
+
Score score;
|
| 27 |
+
};
|
| 28 |
+
|
| 29 |
+
} // namespace search
|
| 30 |
+
|
| 31 |
+
#endif // SEARCH_TYPES__
|
mosesdecoder/search/vertex.cc
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "search/vertex.hh"
|
| 2 |
+
|
| 3 |
+
#include "search/context.hh"
|
| 4 |
+
|
| 5 |
+
#include <boost/unordered_map.hpp>
|
| 6 |
+
|
| 7 |
+
#include <algorithm>
|
| 8 |
+
#include <functional>
|
| 9 |
+
#include <cassert>
|
| 10 |
+
|
| 11 |
+
namespace search {
|
| 12 |
+
|
| 13 |
+
namespace {
|
| 14 |
+
|
| 15 |
+
const uint64_t kCompleteAdd = static_cast<uint64_t>(-1);
|
| 16 |
+
|
| 17 |
+
class DivideLeft {
|
| 18 |
+
public:
|
| 19 |
+
explicit DivideLeft(unsigned char index)
|
| 20 |
+
: index_(index) {}
|
| 21 |
+
|
| 22 |
+
uint64_t operator()(const lm::ngram::ChartState &state) const {
|
| 23 |
+
return (index_ < state.left.length) ?
|
| 24 |
+
state.left.pointers[index_] :
|
| 25 |
+
(kCompleteAdd - state.left.full);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
private:
|
| 29 |
+
unsigned char index_;
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
class DivideRight {
|
| 33 |
+
public:
|
| 34 |
+
explicit DivideRight(unsigned char index)
|
| 35 |
+
: index_(index) {}
|
| 36 |
+
|
| 37 |
+
uint64_t operator()(const lm::ngram::ChartState &state) const {
|
| 38 |
+
return (index_ < state.right.length) ?
|
| 39 |
+
static_cast<uint64_t>(state.right.words[index_]) :
|
| 40 |
+
(kCompleteAdd - state.left.full);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
private:
|
| 44 |
+
unsigned char index_;
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
template <class Divider> void Split(const Divider ÷r, const std::vector<HypoState> &hypos, std::vector<VertexNode> &extend) {
|
| 48 |
+
// Map from divider to index in extend.
|
| 49 |
+
typedef boost::unordered_map<uint64_t, std::size_t> Lookup;
|
| 50 |
+
Lookup lookup;
|
| 51 |
+
for (std::vector<HypoState>::const_iterator i = hypos.begin(); i != hypos.end(); ++i) {
|
| 52 |
+
uint64_t key = divider(i->state);
|
| 53 |
+
std::pair<Lookup::iterator, bool> res(lookup.insert(std::make_pair(key, extend.size())));
|
| 54 |
+
if (res.second) {
|
| 55 |
+
extend.resize(extend.size() + 1);
|
| 56 |
+
extend.back().AppendHypothesis(*i);
|
| 57 |
+
} else {
|
| 58 |
+
extend[res.first->second].AppendHypothesis(*i);
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
//assert((extend.size() != 1) || (hypos.size() == 1));
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
lm::WordIndex Identify(const lm::ngram::Right &right, unsigned char index) {
|
| 65 |
+
return right.words[index];
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
uint64_t Identify(const lm::ngram::Left &left, unsigned char index) {
|
| 69 |
+
return left.pointers[index];
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
template <class Side> class DetermineSame {
|
| 73 |
+
public:
|
| 74 |
+
DetermineSame(const Side &side, unsigned char guaranteed)
|
| 75 |
+
: side_(side), guaranteed_(guaranteed), shared_(side.length), complete_(true) {}
|
| 76 |
+
|
| 77 |
+
void Consider(const Side &other) {
|
| 78 |
+
if (shared_ != other.length) {
|
| 79 |
+
complete_ = false;
|
| 80 |
+
if (shared_ > other.length)
|
| 81 |
+
shared_ = other.length;
|
| 82 |
+
}
|
| 83 |
+
for (unsigned char i = guaranteed_; i < shared_; ++i) {
|
| 84 |
+
if (Identify(side_, i) != Identify(other, i)) {
|
| 85 |
+
shared_ = i;
|
| 86 |
+
complete_ = false;
|
| 87 |
+
return;
|
| 88 |
+
}
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
unsigned char Shared() const { return shared_; }
|
| 93 |
+
|
| 94 |
+
bool Complete() const { return complete_; }
|
| 95 |
+
|
| 96 |
+
private:
|
| 97 |
+
const Side &side_;
|
| 98 |
+
unsigned char guaranteed_, shared_;
|
| 99 |
+
bool complete_;
|
| 100 |
+
};
|
| 101 |
+
|
| 102 |
+
// Custom enum to save memory: valid values of policy_.
|
| 103 |
+
// Alternate and there is still alternation to do.
|
| 104 |
+
const unsigned char kPolicyAlternate = 0;
|
| 105 |
+
// Branch based on left state only, because right ran out or this is a left tree.
|
| 106 |
+
const unsigned char kPolicyOneLeft = 1;
|
| 107 |
+
// Branch based on right state only.
|
| 108 |
+
const unsigned char kPolicyOneRight = 2;
|
| 109 |
+
// Reveal everything in the next branch. Used to terminate the left/right policies.
|
| 110 |
+
// static const unsigned char kPolicyEverything = 3;
|
| 111 |
+
|
| 112 |
+
} // namespace
|
| 113 |
+
|
| 114 |
+
namespace {
|
| 115 |
+
struct GreaterByScore : public std::binary_function<const HypoState &, const HypoState &, bool> {
|
| 116 |
+
bool operator()(const HypoState &first, const HypoState &second) const {
|
| 117 |
+
return first.score > second.score;
|
| 118 |
+
}
|
| 119 |
+
};
|
| 120 |
+
} // namespace
|
| 121 |
+
|
| 122 |
+
void VertexNode::FinishRoot() {
|
| 123 |
+
std::sort(hypos_.begin(), hypos_.end(), GreaterByScore());
|
| 124 |
+
extend_.clear();
|
| 125 |
+
// HACK: extend to one hypo so that root can be blank.
|
| 126 |
+
state_.left.full = false;
|
| 127 |
+
state_.left.length = 0;
|
| 128 |
+
state_.right.length = 0;
|
| 129 |
+
right_full_ = false;
|
| 130 |
+
niceness_ = 0;
|
| 131 |
+
policy_ = kPolicyAlternate;
|
| 132 |
+
if (hypos_.size() == 1) {
|
| 133 |
+
extend_.resize(1);
|
| 134 |
+
extend_.front().AppendHypothesis(hypos_.front());
|
| 135 |
+
extend_.front().FinishedAppending(0, 0);
|
| 136 |
+
}
|
| 137 |
+
if (hypos_.empty()) {
|
| 138 |
+
bound_ = -INFINITY;
|
| 139 |
+
} else {
|
| 140 |
+
bound_ = hypos_.front().score;
|
| 141 |
+
}
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
void VertexNode::FinishedAppending(const unsigned char common_left, const unsigned char common_right) {
|
| 145 |
+
assert(!hypos_.empty());
|
| 146 |
+
assert(extend_.empty());
|
| 147 |
+
bound_ = hypos_.front().score;
|
| 148 |
+
state_ = hypos_.front().state;
|
| 149 |
+
bool all_full = state_.left.full;
|
| 150 |
+
bool all_non_full = !state_.left.full;
|
| 151 |
+
DetermineSame<lm::ngram::Left> left(state_.left, common_left);
|
| 152 |
+
DetermineSame<lm::ngram::Right> right(state_.right, common_right);
|
| 153 |
+
for (std::vector<HypoState>::const_iterator i = hypos_.begin() + 1; i != hypos_.end(); ++i) {
|
| 154 |
+
all_full &= i->state.left.full;
|
| 155 |
+
all_non_full &= !i->state.left.full;
|
| 156 |
+
left.Consider(i->state.left);
|
| 157 |
+
right.Consider(i->state.right);
|
| 158 |
+
}
|
| 159 |
+
state_.left.full = all_full && left.Complete();
|
| 160 |
+
right_full_ = all_full && right.Complete();
|
| 161 |
+
state_.left.length = left.Shared();
|
| 162 |
+
state_.right.length = right.Shared();
|
| 163 |
+
|
| 164 |
+
if (!all_full && !all_non_full) {
|
| 165 |
+
policy_ = kPolicyAlternate;
|
| 166 |
+
} else if (left.Complete()) {
|
| 167 |
+
policy_ = kPolicyOneRight;
|
| 168 |
+
} else if (right.Complete()) {
|
| 169 |
+
policy_ = kPolicyOneLeft;
|
| 170 |
+
} else {
|
| 171 |
+
policy_ = kPolicyAlternate;
|
| 172 |
+
}
|
| 173 |
+
niceness_ = state_.left.length + state_.right.length;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
void VertexNode::BuildExtend() {
|
| 177 |
+
// Already built.
|
| 178 |
+
if (!extend_.empty()) return;
|
| 179 |
+
// Nothing to build since this is a leaf.
|
| 180 |
+
if (hypos_.size() <= 1) return;
|
| 181 |
+
bool left_branch = true;
|
| 182 |
+
switch (policy_) {
|
| 183 |
+
case kPolicyAlternate:
|
| 184 |
+
left_branch = (state_.left.length <= state_.right.length);
|
| 185 |
+
break;
|
| 186 |
+
case kPolicyOneLeft:
|
| 187 |
+
left_branch = true;
|
| 188 |
+
break;
|
| 189 |
+
case kPolicyOneRight:
|
| 190 |
+
left_branch = false;
|
| 191 |
+
break;
|
| 192 |
+
}
|
| 193 |
+
if (left_branch) {
|
| 194 |
+
Split(DivideLeft(state_.left.length), hypos_, extend_);
|
| 195 |
+
} else {
|
| 196 |
+
Split(DivideRight(state_.right.length), hypos_, extend_);
|
| 197 |
+
}
|
| 198 |
+
for (std::vector<VertexNode>::iterator i = extend_.begin(); i != extend_.end(); ++i) {
|
| 199 |
+
// TODO: provide more here for branching?
|
| 200 |
+
i->FinishedAppending(state_.left.length, state_.right.length);
|
| 201 |
+
}
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
} // namespace search
|
mosesdecoder/search/vertex.hh
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_VERTEX__
|
| 2 |
+
#define SEARCH_VERTEX__
|
| 3 |
+
|
| 4 |
+
#include "lm/left.hh"
|
| 5 |
+
#include "search/types.hh"
|
| 6 |
+
|
| 7 |
+
#include <boost/unordered_set.hpp>
|
| 8 |
+
|
| 9 |
+
#include <queue>
|
| 10 |
+
#include <vector>
|
| 11 |
+
#include <cmath>
|
| 12 |
+
#include <stdint.h>
|
| 13 |
+
|
| 14 |
+
namespace search {
|
| 15 |
+
|
| 16 |
+
class ContextBase;
|
| 17 |
+
|
| 18 |
+
struct HypoState {
|
| 19 |
+
History history;
|
| 20 |
+
lm::ngram::ChartState state;
|
| 21 |
+
Score score;
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
class VertexNode {
|
| 25 |
+
public:
|
| 26 |
+
VertexNode() {}
|
| 27 |
+
|
| 28 |
+
void InitRoot() { hypos_.clear(); }
|
| 29 |
+
|
| 30 |
+
/* The steps of building a VertexNode:
|
| 31 |
+
* 1. Default construct.
|
| 32 |
+
* 2. AppendHypothesis at least once, possibly multiple times.
|
| 33 |
+
* 3. FinishAppending with the number of words on left and right guaranteed
|
| 34 |
+
* to be common.
|
| 35 |
+
* 4. If !Complete(), call BuildExtend to construct the extensions
|
| 36 |
+
*/
|
| 37 |
+
// Must default construct, call AppendHypothesis 1 or more times then do FinishedAppending.
|
| 38 |
+
void AppendHypothesis(const NBestComplete &best) {
|
| 39 |
+
assert(hypos_.empty() || !(hypos_.front().state == *best.state));
|
| 40 |
+
HypoState hypo;
|
| 41 |
+
hypo.history = best.history;
|
| 42 |
+
hypo.state = *best.state;
|
| 43 |
+
hypo.score = best.score;
|
| 44 |
+
hypos_.push_back(hypo);
|
| 45 |
+
}
|
| 46 |
+
void AppendHypothesis(const HypoState &hypo) {
|
| 47 |
+
hypos_.push_back(hypo);
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// Sort hypotheses for the root.
|
| 51 |
+
void FinishRoot();
|
| 52 |
+
|
| 53 |
+
void FinishedAppending(const unsigned char common_left, const unsigned char common_right);
|
| 54 |
+
|
| 55 |
+
void BuildExtend();
|
| 56 |
+
|
| 57 |
+
// Should only happen to a root node when the entire vertex is empty.
|
| 58 |
+
bool Empty() const {
|
| 59 |
+
return hypos_.empty() && extend_.empty();
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
bool Complete() const {
|
| 63 |
+
// HACK: prevent root from being complete. TODO: allow root to be complete.
|
| 64 |
+
return hypos_.size() == 1 && extend_.empty();
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
const lm::ngram::ChartState &State() const { return state_; }
|
| 68 |
+
bool RightFull() const { return right_full_; }
|
| 69 |
+
|
| 70 |
+
// Priority relative to other non-terminals. 0 is highest.
|
| 71 |
+
unsigned char Niceness() const { return niceness_; }
|
| 72 |
+
|
| 73 |
+
Score Bound() const {
|
| 74 |
+
return bound_;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// Will be invalid unless this is a leaf.
|
| 78 |
+
const History End() const {
|
| 79 |
+
assert(hypos_.size() == 1);
|
| 80 |
+
return hypos_.front().history;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
VertexNode &operator[](size_t index) {
|
| 84 |
+
assert(!extend_.empty());
|
| 85 |
+
return extend_[index];
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
size_t Size() const {
|
| 89 |
+
return extend_.size();
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
private:
|
| 93 |
+
// Hypotheses to be split.
|
| 94 |
+
std::vector<HypoState> hypos_;
|
| 95 |
+
|
| 96 |
+
std::vector<VertexNode> extend_;
|
| 97 |
+
|
| 98 |
+
lm::ngram::ChartState state_;
|
| 99 |
+
bool right_full_;
|
| 100 |
+
|
| 101 |
+
unsigned char niceness_;
|
| 102 |
+
|
| 103 |
+
unsigned char policy_;
|
| 104 |
+
|
| 105 |
+
Score bound_;
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
+
class PartialVertex {
|
| 109 |
+
public:
|
| 110 |
+
PartialVertex() {}
|
| 111 |
+
|
| 112 |
+
explicit PartialVertex(VertexNode &back) : back_(&back), index_(0) {}
|
| 113 |
+
|
| 114 |
+
bool Empty() const { return back_->Empty(); }
|
| 115 |
+
|
| 116 |
+
bool Complete() const { return back_->Complete(); }
|
| 117 |
+
|
| 118 |
+
const lm::ngram::ChartState &State() const { return back_->State(); }
|
| 119 |
+
bool RightFull() const { return back_->RightFull(); }
|
| 120 |
+
|
| 121 |
+
Score Bound() const { return index_ ? (*back_)[index_].Bound() : back_->Bound(); }
|
| 122 |
+
|
| 123 |
+
unsigned char Niceness() const { return back_->Niceness(); }
|
| 124 |
+
|
| 125 |
+
// Split into continuation and alternative, rendering this the continuation.
|
| 126 |
+
bool Split(PartialVertex &alternative) {
|
| 127 |
+
assert(!Complete());
|
| 128 |
+
back_->BuildExtend();
|
| 129 |
+
bool ret;
|
| 130 |
+
if (index_ + 1 < back_->Size()) {
|
| 131 |
+
alternative.index_ = index_ + 1;
|
| 132 |
+
alternative.back_ = back_;
|
| 133 |
+
ret = true;
|
| 134 |
+
} else {
|
| 135 |
+
ret = false;
|
| 136 |
+
}
|
| 137 |
+
back_ = &((*back_)[index_]);
|
| 138 |
+
index_ = 0;
|
| 139 |
+
return ret;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
const History End() const {
|
| 143 |
+
return back_->End();
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
private:
|
| 147 |
+
VertexNode *back_;
|
| 148 |
+
unsigned int index_;
|
| 149 |
+
};
|
| 150 |
+
|
| 151 |
+
template <class Output> class VertexGenerator;
|
| 152 |
+
|
| 153 |
+
class Vertex {
|
| 154 |
+
public:
|
| 155 |
+
Vertex() {}
|
| 156 |
+
|
| 157 |
+
//PartialVertex RootFirst() const { return PartialVertex(right_); }
|
| 158 |
+
PartialVertex RootAlternate() { return PartialVertex(root_); }
|
| 159 |
+
//PartialVertex RootLast() const { return PartialVertex(left_); }
|
| 160 |
+
|
| 161 |
+
bool Empty() const {
|
| 162 |
+
return root_.Empty();
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
Score Bound() const {
|
| 166 |
+
return root_.Bound();
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
const History BestChild() {
|
| 170 |
+
// left_ and right_ are not set at the root.
|
| 171 |
+
PartialVertex top(RootAlternate());
|
| 172 |
+
if (top.Empty()) {
|
| 173 |
+
return History();
|
| 174 |
+
} else {
|
| 175 |
+
PartialVertex continuation;
|
| 176 |
+
while (!top.Complete()) {
|
| 177 |
+
top.Split(continuation);
|
| 178 |
+
}
|
| 179 |
+
return top.End();
|
| 180 |
+
}
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
private:
|
| 184 |
+
template <class Output> friend class VertexGenerator;
|
| 185 |
+
template <class Output> friend class RootVertexGenerator;
|
| 186 |
+
VertexNode root_;
|
| 187 |
+
|
| 188 |
+
// These will not be set for the root vertex.
|
| 189 |
+
// Branches only on left state.
|
| 190 |
+
//VertexNode left_;
|
| 191 |
+
// Branches only on right state.
|
| 192 |
+
//VertexNode right_;
|
| 193 |
+
};
|
| 194 |
+
|
| 195 |
+
} // namespace search
|
| 196 |
+
#endif // SEARCH_VERTEX__
|
mosesdecoder/search/vertex_generator.hh
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef SEARCH_VERTEX_GENERATOR__
|
| 2 |
+
#define SEARCH_VERTEX_GENERATOR__
|
| 3 |
+
|
| 4 |
+
#include "search/edge.hh"
|
| 5 |
+
#include "search/types.hh"
|
| 6 |
+
#include "search/vertex.hh"
|
| 7 |
+
|
| 8 |
+
namespace lm {
|
| 9 |
+
namespace ngram {
|
| 10 |
+
struct ChartState;
|
| 11 |
+
} // namespace ngram
|
| 12 |
+
} // namespace lm
|
| 13 |
+
|
| 14 |
+
namespace search {
|
| 15 |
+
|
| 16 |
+
class ContextBase;
|
| 17 |
+
|
| 18 |
+
// Output makes the single-best or n-best list.
|
| 19 |
+
template <class Output> class VertexGenerator {
|
| 20 |
+
public:
|
| 21 |
+
VertexGenerator(ContextBase &context, Vertex &gen, Output &nbest) : context_(context), gen_(gen), nbest_(nbest) {}
|
| 22 |
+
|
| 23 |
+
void NewHypothesis(PartialEdge partial) {
|
| 24 |
+
nbest_.Add(existing_[hash_value(partial.CompletedState())], partial);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
void FinishedSearch() {
|
| 28 |
+
gen_.root_.InitRoot();
|
| 29 |
+
for (typename Existing::iterator i(existing_.begin()); i != existing_.end(); ++i) {
|
| 30 |
+
gen_.root_.AppendHypothesis(nbest_.Complete(i->second));
|
| 31 |
+
}
|
| 32 |
+
existing_.clear();
|
| 33 |
+
gen_.root_.FinishRoot();
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
Vertex &Generating() { return gen_; }
|
| 37 |
+
|
| 38 |
+
private:
|
| 39 |
+
ContextBase &context_;
|
| 40 |
+
|
| 41 |
+
Vertex &gen_;
|
| 42 |
+
|
| 43 |
+
typedef boost::unordered_map<uint64_t, typename Output::Combine> Existing;
|
| 44 |
+
Existing existing_;
|
| 45 |
+
|
| 46 |
+
Output &nbest_;
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
// Special case for root vertex: everything should come together into the root
|
| 50 |
+
// node. In theory, this should happen naturally due to state collapsing with
|
| 51 |
+
// <s> and </s>. If that's the case, VertexGenerator is fine, though it will
|
| 52 |
+
// make one connection.
|
| 53 |
+
template <class Output> class RootVertexGenerator {
|
| 54 |
+
public:
|
| 55 |
+
RootVertexGenerator(Vertex &gen, Output &out) : gen_(gen), out_(out) {}
|
| 56 |
+
|
| 57 |
+
void NewHypothesis(PartialEdge partial) {
|
| 58 |
+
out_.Add(combine_, partial);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
void FinishedSearch() {
|
| 62 |
+
gen_.root_.InitRoot();
|
| 63 |
+
gen_.root_.AppendHypothesis(out_.Complete(combine_));
|
| 64 |
+
gen_.root_.FinishRoot();
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
private:
|
| 68 |
+
Vertex &gen_;
|
| 69 |
+
|
| 70 |
+
typename Output::Combine combine_;
|
| 71 |
+
Output &out_;
|
| 72 |
+
};
|
| 73 |
+
|
| 74 |
+
} // namespace search
|
| 75 |
+
#endif // SEARCH_VERTEX_GENERATOR__
|
mosesdecoder/symal/Jamfile
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
exe symal : symal.cpp cmd.c ;
|
| 2 |
+
|
mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/cmd.o
ADDED
|
Binary file (14.7 kB). View file
|
|
|
mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal
ADDED
|
Binary file (200 kB). View file
|
|
|
mosesdecoder/symal/bin/gcc-9/release/link-static/threading-multi/symal.o
ADDED
|
Binary file (41.4 kB). View file
|
|
|
mosesdecoder/symal/cmd.c
ADDED
|
@@ -0,0 +1,642 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
// $Id$
|
| 3 |
+
|
| 4 |
+
#include <stdarg.h>
|
| 5 |
+
#include <stdio.h>
|
| 6 |
+
#include <stdlib.h>
|
| 7 |
+
#include <ctype.h>
|
| 8 |
+
#include <string.h>
|
| 9 |
+
|
| 10 |
+
#include "cmd.h"
|
| 11 |
+
|
| 12 |
+
#ifdef WIN32
|
| 13 |
+
# define popen _popen
|
| 14 |
+
# define pclose _pclose
|
| 15 |
+
#endif
|
| 16 |
+
|
| 17 |
+
typedef struct {
|
| 18 |
+
enum CommandType Type;
|
| 19 |
+
const char *Name,
|
| 20 |
+
*ArgStr;
|
| 21 |
+
void *Val;
|
| 22 |
+
const void *p;
|
| 23 |
+
} Cmd_T;
|
| 24 |
+
|
| 25 |
+
static const Enum_T BoolEnum[] = {
|
| 26 |
+
{ "FALSE", 0 },
|
| 27 |
+
{ "TRUE", 1 },
|
| 28 |
+
{ 0, 0 }
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
#ifdef NEEDSTRDUP
|
| 32 |
+
char *strdup();
|
| 33 |
+
#endif
|
| 34 |
+
|
| 35 |
+
#define FALSE 0
|
| 36 |
+
#define TRUE 1
|
| 37 |
+
|
| 38 |
+
#define LINSIZ 10240
|
| 39 |
+
#define MAXPARAM 256
|
| 40 |
+
|
| 41 |
+
static Cmd_T cmds[MAXPARAM+1];
|
| 42 |
+
static const char *SepString = " \t\n";
|
| 43 |
+
|
| 44 |
+
/// Return cmd->p, as an int.
|
| 45 |
+
static int get_p_int(const Cmd_T *cmd)
|
| 46 |
+
{
|
| 47 |
+
return *(const int *)cmd->p;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
/// Return cmd->p, as a pointer to a null-terminated array of Enum_T.
|
| 51 |
+
static const Enum_T *get_p_enums(const Cmd_T *cmd)
|
| 52 |
+
{
|
| 53 |
+
return (const Enum_T *)cmd->p;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/// Return cmd->p, as a pointer to a string.
|
| 57 |
+
static const char *get_p_char(const Cmd_T *cmd)
|
| 58 |
+
{
|
| 59 |
+
return (const char *)cmd->p;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/// Return cmd->p, as a pointer to an array of two ints.
|
| 63 |
+
static const int *get_p_range(const Cmd_T *cmd)
|
| 64 |
+
{
|
| 65 |
+
return (const int *)cmd->p;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
/// Return cmd->Val, as a pointer to int.
|
| 69 |
+
static int *get_val_int_ptr(const Cmd_T *cmd)
|
| 70 |
+
{
|
| 71 |
+
return (int *)cmd->Val;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/// Return the int at which cmd->Val points.
|
| 75 |
+
static int get_val_int(const Cmd_T *cmd)
|
| 76 |
+
{
|
| 77 |
+
return *get_val_int_ptr(cmd);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
/// Update the int at which cmd->Val points.
|
| 81 |
+
static void update_val_int(const Cmd_T *cmd, int value)
|
| 82 |
+
{
|
| 83 |
+
*get_val_int_ptr(cmd) = value;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/// Return cmd->Val, as a pointer to double.
|
| 87 |
+
static double *get_val_double_ptr(const Cmd_T *cmd)
|
| 88 |
+
{
|
| 89 |
+
return (double *)cmd->Val;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
/// Return the double at which cmd->Val points.
|
| 93 |
+
static double get_val_double(const Cmd_T *cmd)
|
| 94 |
+
{
|
| 95 |
+
return *get_val_double_ptr(cmd);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/// Return cmd->Val as a pointer to a string pointer.
|
| 99 |
+
static const char **get_val_char_ptr(const Cmd_T *cmd)
|
| 100 |
+
{
|
| 101 |
+
return (const char **)cmd->Val;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
/// Return the string pointer at which cmd->Val points.
|
| 105 |
+
static const char *get_val_char(const Cmd_T *cmd)
|
| 106 |
+
{
|
| 107 |
+
return *get_val_char_ptr(cmd);
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/// Update the string pointer at which cmd->Val points.
|
| 111 |
+
static void update_val_char(const Cmd_T *cmd, const char *s)
|
| 112 |
+
{
|
| 113 |
+
*get_val_char_ptr(cmd) = s;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
int DeclareParams(const char *ParName, ...)
|
| 117 |
+
{
|
| 118 |
+
va_list args;
|
| 119 |
+
static int ParamN = 0;
|
| 120 |
+
|
| 121 |
+
va_start(args, ParName);
|
| 122 |
+
for(; ParName;) {
|
| 123 |
+
int c,
|
| 124 |
+
j = 0;
|
| 125 |
+
if(ParamN==MAXPARAM) {
|
| 126 |
+
fprintf(stderr, "Too many parameters !!\n");
|
| 127 |
+
break;
|
| 128 |
+
}
|
| 129 |
+
for(c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
|
| 130 |
+
;
|
| 131 |
+
if(!c) {
|
| 132 |
+
fprintf(stderr,
|
| 133 |
+
"Warning: parameter \"%s\" declared twice.\n",
|
| 134 |
+
ParName);
|
| 135 |
+
}
|
| 136 |
+
for(c=ParamN; c>j; c--) {
|
| 137 |
+
cmds[c] = cmds[c-1];
|
| 138 |
+
}
|
| 139 |
+
cmds[j].Name = ParName;
|
| 140 |
+
cmds[j].Type = va_arg(args, enum CommandType);
|
| 141 |
+
cmds[j].Val = va_arg(args, void *);
|
| 142 |
+
switch(cmds[j].Type) {
|
| 143 |
+
case CMDENUMTYPE: /* get the pointer to Enum_T struct */
|
| 144 |
+
cmds[j].p = va_arg(args, void *);
|
| 145 |
+
break;
|
| 146 |
+
case CMDSUBRANGETYPE: { /* get the two extremes */
|
| 147 |
+
int *subrange = calloc(2, sizeof(int));
|
| 148 |
+
cmds[j].p = subrange;
|
| 149 |
+
subrange[0] = va_arg(args, int);
|
| 150 |
+
subrange[1] = va_arg(args, int);
|
| 151 |
+
}
|
| 152 |
+
break;
|
| 153 |
+
case CMDGTETYPE: /* get lower or upper bound */
|
| 154 |
+
case CMDLTETYPE: {
|
| 155 |
+
int *value = calloc(1, sizeof(int));
|
| 156 |
+
cmds[j].p = value;
|
| 157 |
+
value[0] = va_arg(args, int);
|
| 158 |
+
}
|
| 159 |
+
break;
|
| 160 |
+
case CMDSTRARRAYTYPE: { /* get the separators string */
|
| 161 |
+
const char *s = va_arg(args, const char *);
|
| 162 |
+
cmds[j].p = (s ? strdup(s) : NULL);
|
| 163 |
+
}
|
| 164 |
+
break;
|
| 165 |
+
case CMDBOOLTYPE:
|
| 166 |
+
cmds[j].Type = CMDENUMTYPE;
|
| 167 |
+
cmds[j].p = BoolEnum;
|
| 168 |
+
break;
|
| 169 |
+
case CMDDOUBLETYPE: /* nothing else is needed */
|
| 170 |
+
case CMDINTTYPE:
|
| 171 |
+
case CMDSTRINGTYPE:
|
| 172 |
+
break;
|
| 173 |
+
default:
|
| 174 |
+
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
| 175 |
+
"DeclareParam()", "Unknown Type",
|
| 176 |
+
cmds[j].Type, "for parameter", cmds[j].Name);
|
| 177 |
+
exit(1);
|
| 178 |
+
}
|
| 179 |
+
ParamN++;
|
| 180 |
+
ParName = va_arg(args, const char *);
|
| 181 |
+
}
|
| 182 |
+
cmds[ParamN].Name = NULL;
|
| 183 |
+
va_end(args);
|
| 184 |
+
return 0;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
static char *GetLine(FILE *fp, int n, char *Line)
|
| 188 |
+
{
|
| 189 |
+
int offs=0;
|
| 190 |
+
|
| 191 |
+
for(;;) {
|
| 192 |
+
int j, l;
|
| 193 |
+
if(!fgets(Line+offs, n-offs, fp)) {
|
| 194 |
+
return NULL;
|
| 195 |
+
}
|
| 196 |
+
if(Line[offs]=='#') continue;
|
| 197 |
+
l = strlen(Line+offs)-1;
|
| 198 |
+
Line[offs+l] = 0;
|
| 199 |
+
for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
|
| 200 |
+
;
|
| 201 |
+
if(l<1) continue;
|
| 202 |
+
if(j > offs) {
|
| 203 |
+
char *s = Line+offs,
|
| 204 |
+
*q = Line+j;
|
| 205 |
+
|
| 206 |
+
while((*s++=*q++))
|
| 207 |
+
;
|
| 208 |
+
}
|
| 209 |
+
if(Line[offs+l-1]=='\\') {
|
| 210 |
+
offs += l;
|
| 211 |
+
Line[offs-1] = ' ';
|
| 212 |
+
} else {
|
| 213 |
+
break;
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
return Line;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
static void EnumError(const Cmd_T *cmd, const char *s)
|
| 220 |
+
{
|
| 221 |
+
const Enum_T *en;
|
| 222 |
+
|
| 223 |
+
fprintf(stderr,
|
| 224 |
+
"Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
|
| 225 |
+
fprintf(stderr, "Valid values are:\n");
|
| 226 |
+
for(en=get_p_enums(cmd); en->Name; en++) {
|
| 227 |
+
if(*en->Name) {
|
| 228 |
+
fprintf(stderr, " %s\n", en->Name);
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
fprintf(stderr, "\n");
|
| 232 |
+
exit(1);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
static void GteError(const Cmd_T *cmd, int n)
|
| 236 |
+
{
|
| 237 |
+
fprintf(stderr,
|
| 238 |
+
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
| 239 |
+
fprintf(stderr, "Valid values must be greater than or equal to %d\n",
|
| 240 |
+
get_p_int(cmd));
|
| 241 |
+
exit(1);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
static void LteError(const Cmd_T *cmd, int n)
|
| 245 |
+
{
|
| 246 |
+
fprintf(stderr,
|
| 247 |
+
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
| 248 |
+
fprintf(stderr, "Valid values must be less than or equal to %d\n",
|
| 249 |
+
get_p_int(cmd));
|
| 250 |
+
exit(1);
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
static void SubrangeError(const Cmd_T *cmd, int n)
|
| 254 |
+
{
|
| 255 |
+
const int *subrange = get_p_range(cmd);
|
| 256 |
+
fprintf(stderr,
|
| 257 |
+
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
| 258 |
+
fprintf(stderr, "Valid values range from %d to %d\n",
|
| 259 |
+
subrange[0], subrange[1]);
|
| 260 |
+
exit(1);
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
static void SetEnum(Cmd_T *cmd, const char *s)
|
| 264 |
+
{
|
| 265 |
+
const Enum_T *en;
|
| 266 |
+
|
| 267 |
+
for(en=get_p_enums(cmd); en->Name; en++) {
|
| 268 |
+
if(*en->Name && !strcmp(s, en->Name)) {
|
| 269 |
+
update_val_int(cmd, en->Idx);
|
| 270 |
+
return;
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
EnumError(cmd, s);
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
static void SetSubrange(Cmd_T *cmd, const char *s)
|
| 277 |
+
{
|
| 278 |
+
int n;
|
| 279 |
+
const int *subrange = get_p_range(cmd);
|
| 280 |
+
|
| 281 |
+
if(sscanf(s, "%d", &n)!=1) {
|
| 282 |
+
fprintf(stderr,
|
| 283 |
+
"Integer value required for parameter \"%s\"\n",
|
| 284 |
+
cmd->Name);
|
| 285 |
+
exit(1);
|
| 286 |
+
}
|
| 287 |
+
if(n < subrange[0] || n > subrange[1]) {
|
| 288 |
+
SubrangeError(cmd, n);
|
| 289 |
+
}
|
| 290 |
+
update_val_int(cmd, n);
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
static void SetGte(Cmd_T *cmd, const char *s)
|
| 294 |
+
{
|
| 295 |
+
int n;
|
| 296 |
+
|
| 297 |
+
if(sscanf(s, "%d", &n)!=1) {
|
| 298 |
+
fprintf(stderr,
|
| 299 |
+
"Integer value required for parameter \"%s\"\n",
|
| 300 |
+
cmd->Name);
|
| 301 |
+
exit(1);
|
| 302 |
+
}
|
| 303 |
+
if(n<get_p_int(cmd)) {
|
| 304 |
+
GteError(cmd, n);
|
| 305 |
+
}
|
| 306 |
+
update_val_int(cmd, n);
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
static char **str2array(const char *s, const char *sep)
|
| 310 |
+
{
|
| 311 |
+
const char *p;
|
| 312 |
+
char **a;
|
| 313 |
+
int n = 0,
|
| 314 |
+
l;
|
| 315 |
+
|
| 316 |
+
if(!sep) sep = SepString;
|
| 317 |
+
p = s += strspn(s, sep);
|
| 318 |
+
while(*p) {
|
| 319 |
+
p += strcspn(p, sep);
|
| 320 |
+
p += strspn(p, sep);
|
| 321 |
+
++n;
|
| 322 |
+
}
|
| 323 |
+
a = calloc(n+1, sizeof(char *));
|
| 324 |
+
p = s;
|
| 325 |
+
n = 0;
|
| 326 |
+
while(*p) {
|
| 327 |
+
l = strcspn(p, sep);
|
| 328 |
+
a[n] = malloc(l+1);
|
| 329 |
+
memcpy(a[n], p, l);
|
| 330 |
+
a[n][l] = 0;
|
| 331 |
+
++n;
|
| 332 |
+
p += l;
|
| 333 |
+
p += strspn(p, sep);
|
| 334 |
+
}
|
| 335 |
+
return a;
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
static void SetStrArray(Cmd_T *cmd, const char *s)
|
| 339 |
+
{
|
| 340 |
+
*(char***)cmd->Val = str2array(s, get_p_char(cmd));
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
static void SetLte(Cmd_T *cmd, const char *s)
|
| 344 |
+
{
|
| 345 |
+
int n;
|
| 346 |
+
|
| 347 |
+
if(sscanf(s, "%d", &n)!=1) {
|
| 348 |
+
fprintf(stderr,
|
| 349 |
+
"Integer value required for parameter \"%s\"\n",
|
| 350 |
+
cmd->Name);
|
| 351 |
+
exit(1);
|
| 352 |
+
}
|
| 353 |
+
if(n > get_p_int(cmd)) {
|
| 354 |
+
LteError(cmd, n);
|
| 355 |
+
}
|
| 356 |
+
update_val_int(cmd, n);
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
static void SetParam(Cmd_T *cmd, const char *s)
|
| 360 |
+
{
|
| 361 |
+
if(!*s && cmd->Type != CMDSTRINGTYPE) {
|
| 362 |
+
fprintf(stderr,
|
| 363 |
+
"WARNING: No value specified for parameter \"%s\"\n",
|
| 364 |
+
cmd->Name);
|
| 365 |
+
return;
|
| 366 |
+
}
|
| 367 |
+
switch(cmd->Type) {
|
| 368 |
+
case CMDDOUBLETYPE:
|
| 369 |
+
if(sscanf(s, "%lf", get_val_double_ptr(cmd))!=1) {
|
| 370 |
+
fprintf(stderr,
|
| 371 |
+
"Float value required for parameter \"%s\"\n",
|
| 372 |
+
cmd->Name);
|
| 373 |
+
exit(1);
|
| 374 |
+
}
|
| 375 |
+
break;
|
| 376 |
+
case CMDENUMTYPE:
|
| 377 |
+
SetEnum(cmd, s);
|
| 378 |
+
break;
|
| 379 |
+
case CMDINTTYPE:
|
| 380 |
+
if(sscanf(s, "%d", get_val_int_ptr(cmd))!=1) {
|
| 381 |
+
fprintf(stderr,
|
| 382 |
+
"Integer value required for parameter \"%s\"\n",
|
| 383 |
+
cmd->Name);
|
| 384 |
+
exit(1);
|
| 385 |
+
}
|
| 386 |
+
break;
|
| 387 |
+
case CMDSTRINGTYPE:
|
| 388 |
+
update_val_char(cmd,
|
| 389 |
+
(strcmp(s, "<NULL>") && strcmp(s, "NULL"))
|
| 390 |
+
? strdup(s)
|
| 391 |
+
: 0);
|
| 392 |
+
break;
|
| 393 |
+
case CMDSTRARRAYTYPE:
|
| 394 |
+
SetStrArray(cmd, s);
|
| 395 |
+
break;
|
| 396 |
+
case CMDGTETYPE:
|
| 397 |
+
SetGte(cmd, s);
|
| 398 |
+
break;
|
| 399 |
+
case CMDLTETYPE:
|
| 400 |
+
SetLte(cmd, s);
|
| 401 |
+
break;
|
| 402 |
+
case CMDSUBRANGETYPE:
|
| 403 |
+
SetSubrange(cmd, s);
|
| 404 |
+
break;
|
| 405 |
+
default:
|
| 406 |
+
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
| 407 |
+
"SetParam",
|
| 408 |
+
"Unknown Type",
|
| 409 |
+
cmd->Type,
|
| 410 |
+
"for parameter",
|
| 411 |
+
cmd->Name);
|
| 412 |
+
exit(1);
|
| 413 |
+
}
|
| 414 |
+
cmd->ArgStr = strdup(s);
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
static int Scan(const char *ProgName, Cmd_T *cmds, char *Line)
|
| 418 |
+
{
|
| 419 |
+
char *q,
|
| 420 |
+
*p;
|
| 421 |
+
int i,
|
| 422 |
+
hl,
|
| 423 |
+
HasToMatch = FALSE,
|
| 424 |
+
c0,
|
| 425 |
+
c;
|
| 426 |
+
|
| 427 |
+
p = Line+strspn(Line, SepString);
|
| 428 |
+
hl = strcspn(p, SepString);
|
| 429 |
+
if(!hl) {
|
| 430 |
+
return 0;
|
| 431 |
+
}
|
| 432 |
+
q = strchr(p, '/');
|
| 433 |
+
if(q && q-p<hl) {
|
| 434 |
+
*q = 0;
|
| 435 |
+
if(strcmp(p, ProgName)) {
|
| 436 |
+
*q = '/';
|
| 437 |
+
return 0;
|
| 438 |
+
}
|
| 439 |
+
*q = '/';
|
| 440 |
+
HasToMatch=TRUE;
|
| 441 |
+
p = q+1;
|
| 442 |
+
}
|
| 443 |
+
hl = strcspn(p, SepString);
|
| 444 |
+
if(!hl) {
|
| 445 |
+
return 0;
|
| 446 |
+
}
|
| 447 |
+
c0 = p[hl];
|
| 448 |
+
p[hl] = 0;
|
| 449 |
+
for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
|
| 450 |
+
;
|
| 451 |
+
p[hl] = c0;
|
| 452 |
+
|
| 453 |
+
if (c)
|
| 454 |
+
return HasToMatch && c;
|
| 455 |
+
|
| 456 |
+
SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
|
| 457 |
+
return 0;
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
static void PrintEnum(const Cmd_T *cmd, int ValFlag, FILE *fp)
|
| 461 |
+
{
|
| 462 |
+
const Enum_T *en;
|
| 463 |
+
|
| 464 |
+
fprintf(fp, "%s", cmd->Name);
|
| 465 |
+
if(ValFlag) {
|
| 466 |
+
for(en=get_p_enums(cmd); en->Name; en++) {
|
| 467 |
+
if(*en->Name && en->Idx==get_val_int(cmd)) {
|
| 468 |
+
fprintf(fp, ": %s", en->Name);
|
| 469 |
+
}
|
| 470 |
+
}
|
| 471 |
+
}
|
| 472 |
+
fprintf(fp, "\n");
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
static void PrintStrArray(const Cmd_T *cmd, int ValFlag, FILE *fp)
|
| 476 |
+
{
|
| 477 |
+
char *indent,
|
| 478 |
+
**s = *(char***)cmd->Val;
|
| 479 |
+
int l = 4+strlen(cmd->Name);
|
| 480 |
+
|
| 481 |
+
fprintf(fp, "%s", cmd->Name);
|
| 482 |
+
indent = malloc(l+2);
|
| 483 |
+
memset(indent, ' ', l+1);
|
| 484 |
+
indent[l+1] = 0;
|
| 485 |
+
if(ValFlag) {
|
| 486 |
+
fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
|
| 487 |
+
if(s) while(*s) {
|
| 488 |
+
fprintf(fp, "\n%s %s", indent, *s++);
|
| 489 |
+
}
|
| 490 |
+
}
|
| 491 |
+
free(indent);
|
| 492 |
+
fprintf(fp, "\n");
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
static void PrintParam(const Cmd_T *cmd, int ValFlag, FILE *fp)
|
| 496 |
+
{
|
| 497 |
+
fprintf(fp, "%4s", "");
|
| 498 |
+
switch(cmd->Type) {
|
| 499 |
+
case CMDDOUBLETYPE:
|
| 500 |
+
fprintf(fp, "%s", cmd->Name);
|
| 501 |
+
if(ValFlag) fprintf(fp, ": %22.15e", get_val_double(cmd));
|
| 502 |
+
fprintf(fp, "\n");
|
| 503 |
+
break;
|
| 504 |
+
case CMDENUMTYPE:
|
| 505 |
+
PrintEnum(cmd, ValFlag, fp);
|
| 506 |
+
break;
|
| 507 |
+
case CMDINTTYPE:
|
| 508 |
+
case CMDSUBRANGETYPE:
|
| 509 |
+
case CMDGTETYPE:
|
| 510 |
+
case CMDLTETYPE:
|
| 511 |
+
fprintf(fp, "%s", cmd->Name);
|
| 512 |
+
if(ValFlag) fprintf(fp, ": %d", get_val_int(cmd));
|
| 513 |
+
fprintf(fp, "\n");
|
| 514 |
+
break;
|
| 515 |
+
case CMDSTRINGTYPE:
|
| 516 |
+
fprintf(fp, "%s", cmd->Name);
|
| 517 |
+
if(ValFlag) {
|
| 518 |
+
const char *value = get_val_char(cmd);
|
| 519 |
+
if(value) {
|
| 520 |
+
fprintf(fp, ": \"%s\"", value);
|
| 521 |
+
} else {
|
| 522 |
+
fprintf(fp, ": %s", "NULL");
|
| 523 |
+
}
|
| 524 |
+
}
|
| 525 |
+
fprintf(fp, "\n");
|
| 526 |
+
break;
|
| 527 |
+
case CMDSTRARRAYTYPE:
|
| 528 |
+
PrintStrArray(cmd, ValFlag, fp);
|
| 529 |
+
break;
|
| 530 |
+
default:
|
| 531 |
+
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
| 532 |
+
"PrintParam",
|
| 533 |
+
"Unknown Type",
|
| 534 |
+
cmd->Type,
|
| 535 |
+
"for parameter",
|
| 536 |
+
cmd->Name);
|
| 537 |
+
exit(1);
|
| 538 |
+
}
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
static void PrintParams(int ValFlag, FILE *fp)
|
| 542 |
+
{
|
| 543 |
+
int i;
|
| 544 |
+
|
| 545 |
+
fflush(fp);
|
| 546 |
+
if(ValFlag) {
|
| 547 |
+
fprintf(fp, "Parameters Values:\n");
|
| 548 |
+
} else {
|
| 549 |
+
fprintf(fp, "Parameters:\n");
|
| 550 |
+
}
|
| 551 |
+
for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
|
| 552 |
+
fprintf(fp, "\n");
|
| 553 |
+
fflush(fp);
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
static void CmdError(const char *opt)
|
| 557 |
+
{
|
| 558 |
+
fprintf(stderr, "Invalid option \"%s\"\n", opt);
|
| 559 |
+
fprintf(stderr, "This program expectes the following parameters:\n");
|
| 560 |
+
PrintParams(FALSE, stderr);
|
| 561 |
+
exit(0);
|
| 562 |
+
}
|
| 563 |
+
|
| 564 |
+
int GetParams(int *n, char ***a, const char *CmdFileName)
|
| 565 |
+
{
|
| 566 |
+
char *Line,
|
| 567 |
+
*ProgName;
|
| 568 |
+
int argc = *n;
|
| 569 |
+
char **argv = *a,
|
| 570 |
+
*s;
|
| 571 |
+
FILE *fp;
|
| 572 |
+
int IsPipe;
|
| 573 |
+
|
| 574 |
+
#ifdef MSDOS
|
| 575 |
+
#define PATHSEP '\\'
|
| 576 |
+
char *dot = NULL;
|
| 577 |
+
#else
|
| 578 |
+
#define PATHSEP '/'
|
| 579 |
+
#endif
|
| 580 |
+
|
| 581 |
+
if(!(Line=malloc(LINSIZ))) {
|
| 582 |
+
fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
|
| 583 |
+
LINSIZ);
|
| 584 |
+
exit(1);
|
| 585 |
+
}
|
| 586 |
+
if((ProgName=strrchr(*argv, PATHSEP))) {
|
| 587 |
+
++ProgName;
|
| 588 |
+
} else {
|
| 589 |
+
ProgName = *argv;
|
| 590 |
+
}
|
| 591 |
+
#ifdef MSDOS
|
| 592 |
+
if(dot=strchr(ProgName, '.')) *dot = 0;
|
| 593 |
+
#endif
|
| 594 |
+
--argc;
|
| 595 |
+
++argv;
|
| 596 |
+
for(;;) {
|
| 597 |
+
if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
|
| 598 |
+
CmdFileName = argv[0]+2;
|
| 599 |
+
++argv;
|
| 600 |
+
--argc;
|
| 601 |
+
}
|
| 602 |
+
if(!CmdFileName) {
|
| 603 |
+
break;
|
| 604 |
+
}
|
| 605 |
+
IsPipe = !strncmp(CmdFileName, "@@", 2);
|
| 606 |
+
fp = IsPipe
|
| 607 |
+
? popen(CmdFileName+2, "r")
|
| 608 |
+
: strcmp(CmdFileName, "-")
|
| 609 |
+
? fopen(CmdFileName, "r")
|
| 610 |
+
: stdin;
|
| 611 |
+
if(!fp) {
|
| 612 |
+
fprintf(stderr, "Unable to open command file %s\n",
|
| 613 |
+
CmdFileName);
|
| 614 |
+
exit(1);
|
| 615 |
+
}
|
| 616 |
+
while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
|
| 617 |
+
if(Scan(ProgName, cmds, Line)) {
|
| 618 |
+
CmdError(Line);
|
| 619 |
+
}
|
| 620 |
+
}
|
| 621 |
+
if(fp!=stdin) {
|
| 622 |
+
if(IsPipe) pclose(fp);
|
| 623 |
+
else fclose(fp);
|
| 624 |
+
}
|
| 625 |
+
CmdFileName = NULL;
|
| 626 |
+
}
|
| 627 |
+
while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
|
| 628 |
+
*s = ' ';
|
| 629 |
+
sprintf(Line, "%s/%s", ProgName, *argv+1);
|
| 630 |
+
*s = '=';
|
| 631 |
+
if(Scan(ProgName, cmds, Line)) CmdError(*argv);
|
| 632 |
+
--argc;
|
| 633 |
+
++argv;
|
| 634 |
+
}
|
| 635 |
+
*n = argc;
|
| 636 |
+
*a = argv;
|
| 637 |
+
#ifdef MSDOS
|
| 638 |
+
if(dot) *dot = '.';
|
| 639 |
+
#endif
|
| 640 |
+
free(Line);
|
| 641 |
+
return 0;
|
| 642 |
+
}
|
mosesdecoder/symal/cmd.h
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
// $Id$
|
| 3 |
+
|
| 4 |
+
#if !defined(CMD_H)
|
| 5 |
+
|
| 6 |
+
#define CMD_H
|
| 7 |
+
|
| 8 |
+
enum CommandType {
|
| 9 |
+
CMDDOUBLETYPE = 1,
|
| 10 |
+
CMDENUMTYPE,
|
| 11 |
+
CMDINTTYPE,
|
| 12 |
+
CMDSTRINGTYPE,
|
| 13 |
+
CMDSUBRANGETYPE,
|
| 14 |
+
CMDGTETYPE,
|
| 15 |
+
CMDLTETYPE,
|
| 16 |
+
CMDSTRARRAYTYPE,
|
| 17 |
+
CMDBOOLTYPE
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
typedef struct {
|
| 21 |
+
const char *Name;
|
| 22 |
+
int Idx;
|
| 23 |
+
} Enum_T;
|
| 24 |
+
|
| 25 |
+
#ifdef __cplusplus
|
| 26 |
+
extern "C" {
|
| 27 |
+
#endif
|
| 28 |
+
|
| 29 |
+
int DeclareParams(const char *, ...);
|
| 30 |
+
int GetParams(int *n, char ***a, const char *CmdFileName);
|
| 31 |
+
|
| 32 |
+
#ifdef __cplusplus
|
| 33 |
+
}
|
| 34 |
+
#endif
|
| 35 |
+
#endif
|
mosesdecoder/symal/symal.cpp
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
#include <cassert>
|
| 4 |
+
#include <iomanip>
|
| 5 |
+
#include <iostream>
|
| 6 |
+
#include <fstream>
|
| 7 |
+
#include <sstream>
|
| 8 |
+
#include <stdexcept>
|
| 9 |
+
#include <string>
|
| 10 |
+
#include <list>
|
| 11 |
+
#include <vector>
|
| 12 |
+
#include <set>
|
| 13 |
+
#include <algorithm>
|
| 14 |
+
#include <cstring>
|
| 15 |
+
#include "cmd.h"
|
| 16 |
+
|
| 17 |
+
using namespace std;
|
| 18 |
+
|
| 19 |
+
const int MAX_WORD = 10000; // maximum lengthsource/target strings
|
| 20 |
+
const int MAX_M = 400; // maximum length of source strings
|
| 21 |
+
const int MAX_N = 400; // maximum length of target strings
|
| 22 |
+
|
| 23 |
+
enum Alignment {
|
| 24 |
+
UNION = 1,
|
| 25 |
+
INTERSECT,
|
| 26 |
+
GROW,
|
| 27 |
+
SRCTOTGT,
|
| 28 |
+
TGTTOSRC,
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
const Enum_T END_ENUM = {0, 0};
|
| 32 |
+
|
| 33 |
+
namespace
|
| 34 |
+
{
|
| 35 |
+
Enum_T AlignEnum [] = {
|
| 36 |
+
{ "union", UNION },
|
| 37 |
+
{ "u", UNION },
|
| 38 |
+
{ "intersect", INTERSECT},
|
| 39 |
+
{ "i", INTERSECT},
|
| 40 |
+
{ "grow", GROW },
|
| 41 |
+
{ "g", GROW },
|
| 42 |
+
{ "srctotgt", SRCTOTGT },
|
| 43 |
+
{ "s2t", SRCTOTGT },
|
| 44 |
+
{ "tgttosrc", TGTTOSRC },
|
| 45 |
+
{ "t2s", TGTTOSRC },
|
| 46 |
+
END_ENUM
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
Enum_T BoolEnum [] = {
|
| 50 |
+
{ "true", true },
|
| 51 |
+
{ "yes", true },
|
| 52 |
+
{ "y", true },
|
| 53 |
+
{ "false", false },
|
| 54 |
+
{ "no", false },
|
| 55 |
+
{ "n", false },
|
| 56 |
+
END_ENUM
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
// global variables and constants
|
| 60 |
+
|
| 61 |
+
int* fa; //counters of covered foreign positions
|
| 62 |
+
int* ea; //counters of covered english positions
|
| 63 |
+
int** A; //alignment matrix with information symmetric/direct/inverse alignments
|
| 64 |
+
|
| 65 |
+
int verbose=0;
|
| 66 |
+
|
| 67 |
+
//read an alignment pair from the input stream.
|
| 68 |
+
|
| 69 |
+
int lc = 0;
|
| 70 |
+
|
| 71 |
+
int getals(istream& inp,int& m, int *a,int& n, int *b)
|
| 72 |
+
{
|
| 73 |
+
char w[MAX_WORD], dummy[10];
|
| 74 |
+
int i,j,freq;
|
| 75 |
+
if (inp >> freq) {
|
| 76 |
+
++lc;
|
| 77 |
+
//target sentence
|
| 78 |
+
inp >> n;
|
| 79 |
+
assert(n<MAX_N);
|
| 80 |
+
for (i=1; i<=n; i++) {
|
| 81 |
+
inp >> setw(MAX_WORD) >> w;
|
| 82 |
+
if (strlen(w)>=MAX_WORD-1) {
|
| 83 |
+
cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
|
| 84 |
+
<< MAX_WORD-1 << endl;
|
| 85 |
+
assert(strlen(w)<MAX_WORD-1);
|
| 86 |
+
}
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
inp >> dummy; //# separator
|
| 90 |
+
// inverse alignment
|
| 91 |
+
for (i=1; i<=n; i++) inp >> b[i];
|
| 92 |
+
|
| 93 |
+
//source sentence
|
| 94 |
+
inp >> m;
|
| 95 |
+
assert(m<MAX_M);
|
| 96 |
+
for (j=1; j<=m; j++) {
|
| 97 |
+
inp >> setw(MAX_WORD) >> w;
|
| 98 |
+
if (strlen(w)>=MAX_WORD-1) {
|
| 99 |
+
cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
|
| 100 |
+
<< MAX_WORD-1 << endl;
|
| 101 |
+
assert(strlen(w)<MAX_WORD-1);
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
inp >> dummy; //# separator
|
| 106 |
+
|
| 107 |
+
// direct alignment
|
| 108 |
+
for (j=1; j<=m; j++) {
|
| 109 |
+
inp >> a[j];
|
| 110 |
+
assert(0<=a[j] && a[j]<=n);
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
//check inverse alignemnt
|
| 114 |
+
for (i=1; i<=n; i++)
|
| 115 |
+
assert(0<=b[i] && b[i]<=m);
|
| 116 |
+
|
| 117 |
+
return 1;
|
| 118 |
+
|
| 119 |
+
} else
|
| 120 |
+
return 0;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
//compute union alignment
|
| 125 |
+
int prunionalignment(ostream& out,int m,int *a,int n,int* b)
|
| 126 |
+
{
|
| 127 |
+
|
| 128 |
+
ostringstream sout;
|
| 129 |
+
|
| 130 |
+
for (int j=1; j<=m; j++)
|
| 131 |
+
if (a[j])
|
| 132 |
+
sout << j-1 << "-" << a[j]-1 << " ";
|
| 133 |
+
|
| 134 |
+
for (int i=1; i<=n; i++)
|
| 135 |
+
if (b[i] && a[b[i]]!=i)
|
| 136 |
+
sout << b[i]-1 << "-" << i-1 << " ";
|
| 137 |
+
|
| 138 |
+
//fix the last " "
|
| 139 |
+
string str = sout.str();
|
| 140 |
+
if (str.length() == 0)
|
| 141 |
+
str = "\n";
|
| 142 |
+
else
|
| 143 |
+
str.replace(str.length()-1,1,"\n");
|
| 144 |
+
|
| 145 |
+
out << str;
|
| 146 |
+
out.flush();
|
| 147 |
+
|
| 148 |
+
return 1;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
//Compute intersection alignment
|
| 153 |
+
|
| 154 |
+
int printersect(ostream& out,int m,int *a,int n,int* b)
|
| 155 |
+
{
|
| 156 |
+
|
| 157 |
+
ostringstream sout;
|
| 158 |
+
|
| 159 |
+
for (int j=1; j<=m; j++)
|
| 160 |
+
if (a[j] && b[a[j]]==j)
|
| 161 |
+
sout << j-1 << "-" << a[j]-1 << " ";
|
| 162 |
+
|
| 163 |
+
//fix the last " "
|
| 164 |
+
string str = sout.str();
|
| 165 |
+
if (str.length() == 0)
|
| 166 |
+
str = "\n";
|
| 167 |
+
else
|
| 168 |
+
str.replace(str.length()-1,1,"\n");
|
| 169 |
+
|
| 170 |
+
out << str;
|
| 171 |
+
out.flush();
|
| 172 |
+
|
| 173 |
+
return 1;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
//Compute target-to-source alignment
|
| 177 |
+
|
| 178 |
+
int printtgttosrc(ostream& out,int m,int *a,int n,int* b)
|
| 179 |
+
{
|
| 180 |
+
|
| 181 |
+
ostringstream sout;
|
| 182 |
+
|
| 183 |
+
for (int i=1; i<=n; i++)
|
| 184 |
+
if (b[i])
|
| 185 |
+
sout << b[i]-1 << "-" << i-1 << " ";
|
| 186 |
+
|
| 187 |
+
//fix the last " "
|
| 188 |
+
string str = sout.str();
|
| 189 |
+
if (str.length() == 0)
|
| 190 |
+
str = "\n";
|
| 191 |
+
else
|
| 192 |
+
str.replace(str.length()-1,1,"\n");
|
| 193 |
+
|
| 194 |
+
out << str;
|
| 195 |
+
out.flush();
|
| 196 |
+
|
| 197 |
+
return 1;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
//Compute source-to-target alignment
|
| 201 |
+
|
| 202 |
+
int printsrctotgt(ostream& out,int m,int *a,int n,int* b)
|
| 203 |
+
{
|
| 204 |
+
|
| 205 |
+
ostringstream sout;
|
| 206 |
+
|
| 207 |
+
for (int j=1; j<=m; j++)
|
| 208 |
+
if (a[j])
|
| 209 |
+
sout << j-1 << "-" << a[j]-1 << " ";
|
| 210 |
+
|
| 211 |
+
//fix the last " "
|
| 212 |
+
string str = sout.str();
|
| 213 |
+
if (str.length() == 0)
|
| 214 |
+
str = "\n";
|
| 215 |
+
else
|
| 216 |
+
str.replace(str.length()-1,1,"\n");
|
| 217 |
+
|
| 218 |
+
out << str;
|
| 219 |
+
out.flush();
|
| 220 |
+
|
| 221 |
+
return 1;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
//Compute Grow Diagonal Alignment
|
| 225 |
+
//Nice property: you will never introduce more points
|
| 226 |
+
//than the unionalignment alignemt. Hence, you will always be able
|
| 227 |
+
//to represent the grow alignment as the unionalignment of a
|
| 228 |
+
//directed and inverted alignment
|
| 229 |
+
|
| 230 |
+
int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool isfinal=false,bool bothuncovered=false)
|
| 231 |
+
{
|
| 232 |
+
|
| 233 |
+
ostringstream sout;
|
| 234 |
+
|
| 235 |
+
vector <pair <int,int> > neighbors; //neighbors
|
| 236 |
+
|
| 237 |
+
pair <int,int> entry;
|
| 238 |
+
|
| 239 |
+
neighbors.push_back(make_pair(-1,-0));
|
| 240 |
+
neighbors.push_back(make_pair(0,-1));
|
| 241 |
+
neighbors.push_back(make_pair(1,0));
|
| 242 |
+
neighbors.push_back(make_pair(0,1));
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
if (diagonal) {
|
| 246 |
+
neighbors.push_back(make_pair(-1,-1));
|
| 247 |
+
neighbors.push_back(make_pair(-1,1));
|
| 248 |
+
neighbors.push_back(make_pair(1,-1));
|
| 249 |
+
neighbors.push_back(make_pair(1,1));
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
int i,j;
|
| 254 |
+
size_t o;
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
//covered foreign and english positions
|
| 258 |
+
|
| 259 |
+
memset(fa,0,(m+1)*sizeof(int));
|
| 260 |
+
memset(ea,0,(n+1)*sizeof(int));
|
| 261 |
+
|
| 262 |
+
//matrix to quickly check if one point is in the symmetric
|
| 263 |
+
//alignment (value=2), direct alignment (=1) and inverse alignment
|
| 264 |
+
|
| 265 |
+
for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int));
|
| 266 |
+
|
| 267 |
+
set <pair <int,int> > currentpoints; //symmetric alignment
|
| 268 |
+
set <pair <int,int> > unionalignment; //union alignment
|
| 269 |
+
|
| 270 |
+
pair <int,int> point; //variable to store points
|
| 271 |
+
set<pair <int,int> >::const_iterator k; //iterator over sets
|
| 272 |
+
|
| 273 |
+
//fill in the alignments
|
| 274 |
+
for (j=1; j<=m; j++) {
|
| 275 |
+
if (a[j]) {
|
| 276 |
+
unionalignment.insert(make_pair(a[j],j));
|
| 277 |
+
if (b[a[j]]==j) {
|
| 278 |
+
fa[j]=1;
|
| 279 |
+
ea[a[j]]=1;
|
| 280 |
+
A[a[j]][j]=2;
|
| 281 |
+
currentpoints.insert(make_pair(a[j],j));
|
| 282 |
+
} else
|
| 283 |
+
A[a[j]][j]=-1;
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
for (i=1; i<=n; i++)
|
| 288 |
+
if (b[i] && a[b[i]]!=i) { //not intersection
|
| 289 |
+
unionalignment.insert(make_pair(i,b[i]));
|
| 290 |
+
A[i][b[i]]=1;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
int added=1;
|
| 295 |
+
|
| 296 |
+
while (added) {
|
| 297 |
+
added=0;
|
| 298 |
+
///scan the current alignment
|
| 299 |
+
for (k=currentpoints.begin(); k!=currentpoints.end(); k++) {
|
| 300 |
+
//cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
|
| 301 |
+
for (o=0; o<neighbors.size(); o++) {
|
| 302 |
+
//cout << "go over check all neighbors\n";
|
| 303 |
+
point.first=k->first+neighbors[o].first;
|
| 304 |
+
point.second=k->second+neighbors[o].second;
|
| 305 |
+
//cout << point.second-1 << " " << point.first-1 << "\n";
|
| 306 |
+
//check if neighbor is inside 'matrix'
|
| 307 |
+
if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
|
| 308 |
+
//check if neighbor is in the unionalignment alignment
|
| 309 |
+
if (b[point.first]==point.second || a[point.second]==point.first) {
|
| 310 |
+
//cout << "In unionalignment ";cout.flush();
|
| 311 |
+
//check if it connects at least one uncovered word
|
| 312 |
+
if (!(ea[point.first] && fa[point.second])) {
|
| 313 |
+
//insert point in currentpoints!
|
| 314 |
+
currentpoints.insert(point);
|
| 315 |
+
A[point.first][point.second]=2;
|
| 316 |
+
ea[point.first]=1;
|
| 317 |
+
fa[point.second]=1;
|
| 318 |
+
added=1;
|
| 319 |
+
//cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
|
| 320 |
+
}
|
| 321 |
+
}
|
| 322 |
+
}
|
| 323 |
+
}
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
if (isfinal) {
|
| 327 |
+
for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
|
| 328 |
+
if (A[k->first][k->second]==1) {
|
| 329 |
+
point.first=k->first;
|
| 330 |
+
point.second=k->second;
|
| 331 |
+
//one of the two words is not covered yet
|
| 332 |
+
//cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
|
| 333 |
+
if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
|
| 334 |
+
(!bothuncovered && !(ea[point.first] && fa[point.second]))) {
|
| 335 |
+
//add it!
|
| 336 |
+
currentpoints.insert(point);
|
| 337 |
+
A[point.first][point.second]=2;
|
| 338 |
+
//keep track of new covered positions
|
| 339 |
+
ea[point.first]=1;
|
| 340 |
+
fa[point.second]=1;
|
| 341 |
+
|
| 342 |
+
//added=1;
|
| 343 |
+
//cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
|
| 344 |
+
}
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
|
| 348 |
+
if (A[k->first][k->second]==-1) {
|
| 349 |
+
point.first=k->first;
|
| 350 |
+
point.second=k->second;
|
| 351 |
+
//one of the two words is not covered yet
|
| 352 |
+
//cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
|
| 353 |
+
if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
|
| 354 |
+
(!bothuncovered && !(ea[point.first] && fa[point.second]))) {
|
| 355 |
+
//add it!
|
| 356 |
+
currentpoints.insert(point);
|
| 357 |
+
A[point.first][point.second]=2;
|
| 358 |
+
//keep track of new covered positions
|
| 359 |
+
ea[point.first]=1;
|
| 360 |
+
fa[point.second]=1;
|
| 361 |
+
|
| 362 |
+
//added=1;
|
| 363 |
+
//cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
for (k=currentpoints.begin(); k!=currentpoints.end(); k++)
|
| 370 |
+
sout << k->second-1 << "-" << k->first-1 << " ";
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
//fix the last " "
|
| 374 |
+
string str = sout.str();
|
| 375 |
+
if (str.length() == 0)
|
| 376 |
+
str = "\n";
|
| 377 |
+
else
|
| 378 |
+
str.replace(str.length()-1,1,"\n");
|
| 379 |
+
|
| 380 |
+
out << str;
|
| 381 |
+
out.flush();
|
| 382 |
+
return 1;
|
| 383 |
+
|
| 384 |
+
return 1;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
} // namespace
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
//Main file here
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
int main(int argc, char** argv)
|
| 394 |
+
{
|
| 395 |
+
|
| 396 |
+
int alignment=0;
|
| 397 |
+
char* input= NULL;
|
| 398 |
+
char* output= NULL;
|
| 399 |
+
int diagonal=false;
|
| 400 |
+
int isfinal=false;
|
| 401 |
+
int bothuncovered=false;
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
|
| 405 |
+
"alignment", CMDENUMTYPE, &alignment, AlignEnum,
|
| 406 |
+
"d", CMDENUMTYPE, &diagonal, BoolEnum,
|
| 407 |
+
"diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
|
| 408 |
+
"f", CMDENUMTYPE, &isfinal, BoolEnum,
|
| 409 |
+
"final", CMDENUMTYPE, &isfinal, BoolEnum,
|
| 410 |
+
"b", CMDENUMTYPE, &bothuncovered, BoolEnum,
|
| 411 |
+
"both", CMDENUMTYPE, &bothuncovered, BoolEnum,
|
| 412 |
+
"i", CMDSTRINGTYPE, &input,
|
| 413 |
+
"o", CMDSTRINGTYPE, &output,
|
| 414 |
+
"v", CMDENUMTYPE, &verbose, BoolEnum,
|
| 415 |
+
"verbose", CMDENUMTYPE, &verbose, BoolEnum,
|
| 416 |
+
|
| 417 |
+
NULL);
|
| 418 |
+
|
| 419 |
+
GetParams(&argc, &argv, NULL);
|
| 420 |
+
|
| 421 |
+
if (alignment==0) {
|
| 422 |
+
cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
|
| 423 |
+
<< "Input file or std must be in .bal format (see script giza2bal.pl).\n";
|
| 424 |
+
|
| 425 |
+
exit(1);
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
istream *inp = &std::cin;
|
| 429 |
+
ostream *out = &std::cout;
|
| 430 |
+
|
| 431 |
+
try {
|
| 432 |
+
if (input) {
|
| 433 |
+
fstream *fin = new fstream(input,ios::in);
|
| 434 |
+
if (!fin->is_open()) throw runtime_error("cannot open " + string(input));
|
| 435 |
+
inp = fin;
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
if (output) {
|
| 439 |
+
fstream *fout = new fstream(output,ios::out);
|
| 440 |
+
if (!fout->is_open()) throw runtime_error("cannot open " + string(output));
|
| 441 |
+
out = fout;
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
int a[MAX_M],b[MAX_N],m,n;
|
| 445 |
+
fa=new int[MAX_M+1];
|
| 446 |
+
ea=new int[MAX_N+1];
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
int sents = 0;
|
| 450 |
+
A=new int *[MAX_N+1];
|
| 451 |
+
for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
|
| 452 |
+
|
| 453 |
+
switch (alignment) {
|
| 454 |
+
case UNION:
|
| 455 |
+
cerr << "symal: computing union alignment\n";
|
| 456 |
+
while(getals(*inp,m,a,n,b)) {
|
| 457 |
+
prunionalignment(*out,m,a,n,b);
|
| 458 |
+
sents++;
|
| 459 |
+
}
|
| 460 |
+
cerr << "Sents: " << sents << endl;
|
| 461 |
+
break;
|
| 462 |
+
case INTERSECT:
|
| 463 |
+
cerr << "symal: computing intersect alignment\n";
|
| 464 |
+
while(getals(*inp,m,a,n,b)) {
|
| 465 |
+
printersect(*out,m,a,n,b);
|
| 466 |
+
sents++;
|
| 467 |
+
}
|
| 468 |
+
cerr << "Sents: " << sents << endl;
|
| 469 |
+
break;
|
| 470 |
+
case GROW:
|
| 471 |
+
cerr << "symal: computing grow alignment: diagonal ("
|
| 472 |
+
<< diagonal << ") final ("<< isfinal << ")"
|
| 473 |
+
<< "both-uncovered (" << bothuncovered <<")\n";
|
| 474 |
+
|
| 475 |
+
while(getals(*inp,m,a,n,b))
|
| 476 |
+
printgrow(*out,m,a,n,b,diagonal,isfinal,bothuncovered);
|
| 477 |
+
|
| 478 |
+
break;
|
| 479 |
+
case TGTTOSRC:
|
| 480 |
+
cerr << "symal: computing target-to-source alignment\n";
|
| 481 |
+
|
| 482 |
+
while(getals(*inp,m,a,n,b)) {
|
| 483 |
+
printtgttosrc(*out,m,a,n,b);
|
| 484 |
+
sents++;
|
| 485 |
+
}
|
| 486 |
+
cerr << "Sents: " << sents << endl;
|
| 487 |
+
break;
|
| 488 |
+
case SRCTOTGT:
|
| 489 |
+
cerr << "symal: computing source-to-target alignment\n";
|
| 490 |
+
|
| 491 |
+
while(getals(*inp,m,a,n,b)) {
|
| 492 |
+
printsrctotgt(*out,m,a,n,b);
|
| 493 |
+
sents++;
|
| 494 |
+
}
|
| 495 |
+
cerr << "Sents: " << sents << endl;
|
| 496 |
+
break;
|
| 497 |
+
default:
|
| 498 |
+
throw runtime_error("Unknown alignment");
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
delete [] fa;
|
| 502 |
+
delete [] ea;
|
| 503 |
+
for (int i=1; i<=MAX_N; i++) delete [] A[i];
|
| 504 |
+
delete [] A;
|
| 505 |
+
|
| 506 |
+
if (inp != &std::cin) {
|
| 507 |
+
delete inp;
|
| 508 |
+
}
|
| 509 |
+
if (out != &std::cout) {
|
| 510 |
+
delete inp;
|
| 511 |
+
}
|
| 512 |
+
} catch (const std::exception &e) {
|
| 513 |
+
cerr << e.what() << std::endl;
|
| 514 |
+
exit(1);
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
exit(0);
|
| 518 |
+
}
|
mosesdecoder/symal/symal.vcproj
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="Windows-1252"?>
|
| 2 |
+
<VisualStudioProject
|
| 3 |
+
ProjectType="Visual C++"
|
| 4 |
+
Version="9.00"
|
| 5 |
+
Name="symal"
|
| 6 |
+
ProjectGUID="{6716FB26-8298-47A3-A915-958AF0AC80F8}"
|
| 7 |
+
RootNamespace="symal"
|
| 8 |
+
Keyword="Win32Proj"
|
| 9 |
+
TargetFrameworkVersion="131072"
|
| 10 |
+
>
|
| 11 |
+
<Platforms>
|
| 12 |
+
<Platform
|
| 13 |
+
Name="Win32"
|
| 14 |
+
/>
|
| 15 |
+
</Platforms>
|
| 16 |
+
<ToolFiles>
|
| 17 |
+
</ToolFiles>
|
| 18 |
+
<Configurations>
|
| 19 |
+
<Configuration
|
| 20 |
+
Name="Debug|Win32"
|
| 21 |
+
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
| 22 |
+
IntermediateDirectory="$(ConfigurationName)"
|
| 23 |
+
ConfigurationType="1"
|
| 24 |
+
CharacterSet="1"
|
| 25 |
+
>
|
| 26 |
+
<Tool
|
| 27 |
+
Name="VCPreBuildEventTool"
|
| 28 |
+
/>
|
| 29 |
+
<Tool
|
| 30 |
+
Name="VCCustomBuildTool"
|
| 31 |
+
/>
|
| 32 |
+
<Tool
|
| 33 |
+
Name="VCXMLDataGeneratorTool"
|
| 34 |
+
/>
|
| 35 |
+
<Tool
|
| 36 |
+
Name="VCWebServiceProxyGeneratorTool"
|
| 37 |
+
/>
|
| 38 |
+
<Tool
|
| 39 |
+
Name="VCMIDLTool"
|
| 40 |
+
/>
|
| 41 |
+
<Tool
|
| 42 |
+
Name="VCCLCompilerTool"
|
| 43 |
+
Optimization="0"
|
| 44 |
+
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;__STDC__"
|
| 45 |
+
MinimalRebuild="true"
|
| 46 |
+
BasicRuntimeChecks="3"
|
| 47 |
+
RuntimeLibrary="3"
|
| 48 |
+
UsePrecompiledHeader="0"
|
| 49 |
+
WarningLevel="3"
|
| 50 |
+
Detect64BitPortabilityProblems="true"
|
| 51 |
+
DebugInformationFormat="4"
|
| 52 |
+
/>
|
| 53 |
+
<Tool
|
| 54 |
+
Name="VCManagedResourceCompilerTool"
|
| 55 |
+
/>
|
| 56 |
+
<Tool
|
| 57 |
+
Name="VCResourceCompilerTool"
|
| 58 |
+
/>
|
| 59 |
+
<Tool
|
| 60 |
+
Name="VCPreLinkEventTool"
|
| 61 |
+
/>
|
| 62 |
+
<Tool
|
| 63 |
+
Name="VCLinkerTool"
|
| 64 |
+
LinkIncremental="2"
|
| 65 |
+
GenerateDebugInformation="true"
|
| 66 |
+
SubSystem="1"
|
| 67 |
+
RandomizedBaseAddress="1"
|
| 68 |
+
DataExecutionPrevention="0"
|
| 69 |
+
TargetMachine="1"
|
| 70 |
+
/>
|
| 71 |
+
<Tool
|
| 72 |
+
Name="VCALinkTool"
|
| 73 |
+
/>
|
| 74 |
+
<Tool
|
| 75 |
+
Name="VCManifestTool"
|
| 76 |
+
/>
|
| 77 |
+
<Tool
|
| 78 |
+
Name="VCXDCMakeTool"
|
| 79 |
+
/>
|
| 80 |
+
<Tool
|
| 81 |
+
Name="VCBscMakeTool"
|
| 82 |
+
/>
|
| 83 |
+
<Tool
|
| 84 |
+
Name="VCFxCopTool"
|
| 85 |
+
/>
|
| 86 |
+
<Tool
|
| 87 |
+
Name="VCAppVerifierTool"
|
| 88 |
+
/>
|
| 89 |
+
<Tool
|
| 90 |
+
Name="VCPostBuildEventTool"
|
| 91 |
+
/>
|
| 92 |
+
</Configuration>
|
| 93 |
+
<Configuration
|
| 94 |
+
Name="Release|Win32"
|
| 95 |
+
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
| 96 |
+
IntermediateDirectory="$(ConfigurationName)"
|
| 97 |
+
ConfigurationType="1"
|
| 98 |
+
CharacterSet="1"
|
| 99 |
+
WholeProgramOptimization="1"
|
| 100 |
+
>
|
| 101 |
+
<Tool
|
| 102 |
+
Name="VCPreBuildEventTool"
|
| 103 |
+
/>
|
| 104 |
+
<Tool
|
| 105 |
+
Name="VCCustomBuildTool"
|
| 106 |
+
/>
|
| 107 |
+
<Tool
|
| 108 |
+
Name="VCXMLDataGeneratorTool"
|
| 109 |
+
/>
|
| 110 |
+
<Tool
|
| 111 |
+
Name="VCWebServiceProxyGeneratorTool"
|
| 112 |
+
/>
|
| 113 |
+
<Tool
|
| 114 |
+
Name="VCMIDLTool"
|
| 115 |
+
/>
|
| 116 |
+
<Tool
|
| 117 |
+
Name="VCCLCompilerTool"
|
| 118 |
+
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;__STDC__"
|
| 119 |
+
RuntimeLibrary="2"
|
| 120 |
+
UsePrecompiledHeader="0"
|
| 121 |
+
WarningLevel="3"
|
| 122 |
+
Detect64BitPortabilityProblems="true"
|
| 123 |
+
DebugInformationFormat="3"
|
| 124 |
+
/>
|
| 125 |
+
<Tool
|
| 126 |
+
Name="VCManagedResourceCompilerTool"
|
| 127 |
+
/>
|
| 128 |
+
<Tool
|
| 129 |
+
Name="VCResourceCompilerTool"
|
| 130 |
+
/>
|
| 131 |
+
<Tool
|
| 132 |
+
Name="VCPreLinkEventTool"
|
| 133 |
+
/>
|
| 134 |
+
<Tool
|
| 135 |
+
Name="VCLinkerTool"
|
| 136 |
+
LinkIncremental="1"
|
| 137 |
+
GenerateDebugInformation="true"
|
| 138 |
+
SubSystem="1"
|
| 139 |
+
OptimizeReferences="2"
|
| 140 |
+
EnableCOMDATFolding="2"
|
| 141 |
+
RandomizedBaseAddress="1"
|
| 142 |
+
DataExecutionPrevention="0"
|
| 143 |
+
TargetMachine="1"
|
| 144 |
+
/>
|
| 145 |
+
<Tool
|
| 146 |
+
Name="VCALinkTool"
|
| 147 |
+
/>
|
| 148 |
+
<Tool
|
| 149 |
+
Name="VCManifestTool"
|
| 150 |
+
/>
|
| 151 |
+
<Tool
|
| 152 |
+
Name="VCXDCMakeTool"
|
| 153 |
+
/>
|
| 154 |
+
<Tool
|
| 155 |
+
Name="VCBscMakeTool"
|
| 156 |
+
/>
|
| 157 |
+
<Tool
|
| 158 |
+
Name="VCFxCopTool"
|
| 159 |
+
/>
|
| 160 |
+
<Tool
|
| 161 |
+
Name="VCAppVerifierTool"
|
| 162 |
+
/>
|
| 163 |
+
<Tool
|
| 164 |
+
Name="VCPostBuildEventTool"
|
| 165 |
+
/>
|
| 166 |
+
</Configuration>
|
| 167 |
+
</Configurations>
|
| 168 |
+
<References>
|
| 169 |
+
</References>
|
| 170 |
+
<Files>
|
| 171 |
+
<Filter
|
| 172 |
+
Name="Source Files"
|
| 173 |
+
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
| 174 |
+
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
| 175 |
+
>
|
| 176 |
+
<File
|
| 177 |
+
RelativePath=".\cmd.c"
|
| 178 |
+
>
|
| 179 |
+
</File>
|
| 180 |
+
<File
|
| 181 |
+
RelativePath=".\symal.cpp"
|
| 182 |
+
>
|
| 183 |
+
</File>
|
| 184 |
+
</Filter>
|
| 185 |
+
<Filter
|
| 186 |
+
Name="Header Files"
|
| 187 |
+
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
| 188 |
+
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
| 189 |
+
>
|
| 190 |
+
<File
|
| 191 |
+
RelativePath=".\cmd.h"
|
| 192 |
+
>
|
| 193 |
+
</File>
|
| 194 |
+
</Filter>
|
| 195 |
+
<Filter
|
| 196 |
+
Name="Resource Files"
|
| 197 |
+
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
| 198 |
+
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
|
| 199 |
+
>
|
| 200 |
+
</Filter>
|
| 201 |
+
</Files>
|
| 202 |
+
<Globals>
|
| 203 |
+
</Globals>
|
| 204 |
+
</VisualStudioProject>
|
mosesdecoder/util/CMakeLists.txt
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cmake_minimum_required(VERSION 2.8.8)
|
| 2 |
+
#
|
| 3 |
+
# The KenLM cmake files make use of add_library(... OBJECTS ...)
|
| 4 |
+
#
|
| 5 |
+
# This syntax allows grouping of source files when compiling
|
| 6 |
+
# (effectively creating "fake" libraries based on source subdirs).
|
| 7 |
+
#
|
| 8 |
+
# This syntax was only added in cmake version 2.8.8
|
| 9 |
+
#
|
| 10 |
+
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Explicitly list the source files for this subdirectory
|
| 17 |
+
#
|
| 18 |
+
# If you add any source files to this subdirectory
|
| 19 |
+
# that should be included in the kenlm library,
|
| 20 |
+
# (this excludes any unit test files)
|
| 21 |
+
# you should add them to the following list:
|
| 22 |
+
#
|
| 23 |
+
# Because we do not set PARENT_SCOPE in the following definition,
|
| 24 |
+
# CMake files in the parent directory won't be able to access this variable.
|
| 25 |
+
#
|
| 26 |
+
set(KENLM_UTIL_SOURCE
|
| 27 |
+
bit_packing.cc
|
| 28 |
+
ersatz_progress.cc
|
| 29 |
+
exception.cc
|
| 30 |
+
file.cc
|
| 31 |
+
file_piece.cc
|
| 32 |
+
float_to_string.cc
|
| 33 |
+
integer_to_string.cc
|
| 34 |
+
mmap.cc
|
| 35 |
+
murmur_hash.cc
|
| 36 |
+
parallel_read.cc
|
| 37 |
+
pool.cc
|
| 38 |
+
read_compressed.cc
|
| 39 |
+
scoped.cc
|
| 40 |
+
string_piece.cc
|
| 41 |
+
usage.cc
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# This directory has children that need to be processed
|
| 45 |
+
add_subdirectory(double-conversion)
|
| 46 |
+
add_subdirectory(stream)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Group these objects together for later use.
|
| 50 |
+
#
|
| 51 |
+
# Given add_library(foo OBJECT ${my_foo_sources}),
|
| 52 |
+
# refer to these objects as $<TARGET_OBJECTS:foo>
|
| 53 |
+
#
|
| 54 |
+
add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL_STREAM_SOURCE} ${KENLM_UTIL_SOURCE})
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Only compile and run unit tests if tests should be run
|
| 59 |
+
if(BUILD_TESTING)
|
| 60 |
+
|
| 61 |
+
# Explicitly list the Boost test files to be compiled
|
| 62 |
+
set(KENLM_BOOST_TESTS_LIST
|
| 63 |
+
bit_packing_test
|
| 64 |
+
joint_sort_test
|
| 65 |
+
multi_intersection_test
|
| 66 |
+
probing_hash_table_test
|
| 67 |
+
read_compressed_test
|
| 68 |
+
sorted_uniform_test
|
| 69 |
+
tokenize_piece_test
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
|
| 73 |
+
DEPENDS $<TARGET_OBJECTS:kenlm_util>
|
| 74 |
+
LIBRARIES ${Boost_LIBRARIES} pthread)
|
| 75 |
+
|
| 76 |
+
# file_piece_test requires an extra command line parameter
|
| 77 |
+
KenLMAddTest(TEST file_piece_test
|
| 78 |
+
DEPENDS $<TARGET_OBJECTS:kenlm_util>
|
| 79 |
+
LIBRARIES ${Boost_LIBRARIES} pthread
|
| 80 |
+
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc)
|
| 81 |
+
endif()
|
mosesdecoder/util/Jamfile
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
local compressed_flags = <include>.. <define>HAVE_ZLIB ;
|
| 2 |
+
local compressed_deps = /top//z ;
|
| 3 |
+
if [ test_library "bz2" ] && [ test_header "bzlib.h" ] {
|
| 4 |
+
external-lib bz2 ;
|
| 5 |
+
compressed_flags += <define>HAVE_BZLIB ;
|
| 6 |
+
compressed_deps += bz2 ;
|
| 7 |
+
}
|
| 8 |
+
if [ test_library "lzma" ] && [ test_header "lzma.h" ] {
|
| 9 |
+
external-lib lzma ;
|
| 10 |
+
compressed_flags += <define>HAVE_XZLIB ;
|
| 11 |
+
compressed_deps += lzma ;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
#rt is needed for clock_gettime on linux. But it's already included with threading=multi
|
| 15 |
+
lib rt ;
|
| 16 |
+
|
| 17 |
+
obj read_compressed.o : read_compressed.cc : $(compressed_flags) ;
|
| 18 |
+
alias read_compressed : read_compressed.o $(compressed_deps) ;
|
| 19 |
+
obj read_compressed_test.o : read_compressed_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
|
| 20 |
+
obj file_piece_test.o : file_piece_test.cc /top//boost_unit_test_framework : $(compressed_flags) ;
|
| 21 |
+
|
| 22 |
+
fakelib parallel_read : parallel_read.cc : <threading>multi:<source>/top//boost_thread <threading>multi:<define>WITH_THREADS : : <include>.. ;
|
| 23 |
+
|
| 24 |
+
fakelib kenutil : [ glob *.cc : parallel_read.cc read_compressed.cc *_main.cc *_test.cc ] read_compressed parallel_read double-conversion//double-conversion : <include>.. <os>LINUX,<threading>single:<source>rt : : <include>.. ;
|
| 25 |
+
|
| 26 |
+
exe cat_compressed : cat_compressed_main.cc kenutil ;
|
| 27 |
+
|
| 28 |
+
#Does not install this
|
| 29 |
+
exe probing_hash_table_benchmark : probing_hash_table_benchmark_main.cc kenutil ;
|
| 30 |
+
|
| 31 |
+
alias programs : cat_compressed ;
|
| 32 |
+
|
| 33 |
+
import testing ;
|
| 34 |
+
|
| 35 |
+
run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
|
| 36 |
+
for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
|
| 37 |
+
local name = [ MATCH "(.*)\.cc" : $(t) ] ;
|
| 38 |
+
unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_filesystem /top//boost_system ;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
build-project stream ;
|
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab8564bc029fa8f47779ac76015e129d019fb75c9c8bbda96ccb54e419e67a5c
|
| 3 |
+
size 1747304
|
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.output
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Boost.Test WARNING: token "util/file_piece.cc" does not correspond to the Boost.Test argument
|
| 2 |
+
and should be placed after all Boost.Test arguments and the -- separator.
|
| 3 |
+
For example: file_piece_test --random -- util/file_piece.cc
|
| 4 |
+
Running 6 test cases...
|
| 5 |
+
|
| 6 |
+
*** No errors detected
|
| 7 |
+
|
| 8 |
+
EXIT STATUS: 0
|
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.run
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Boost.Test WARNING: token "util/file_piece.cc" does not correspond to the Boost.Test argument
|
| 2 |
+
and should be placed after all Boost.Test arguments and the -- separator.
|
| 3 |
+
For example: file_piece_test --random -- util/file_piece.cc
|
| 4 |
+
Running 6 test cases...
|
| 5 |
+
|
| 6 |
+
*** No errors detected
|
| 7 |
+
|
| 8 |
+
EXIT STATUS: 0
|
mosesdecoder/util/bin/file_piece_test.test/gcc-9/release/link-static/threading-multi/file_piece_test.test
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
passed
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing.o
ADDED
|
Binary file (10.4 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:488bd05ca0bdcd9baf07b6c4618502d8b9cdf5b1ef97a55149be622a2e33cc60
|
| 3 |
+
size 1701840
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.o
ADDED
|
Binary file (109 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/bit_packing_test.passed
ADDED
|
File without changes
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed
ADDED
|
Binary file (839 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/cat_compressed_main.o
ADDED
|
Binary file (6.75 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/ersatz_progress.o
ADDED
|
Binary file (5.73 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/exception.o
ADDED
|
Binary file (21.2 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file.o
ADDED
|
Binary file (144 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece.o
ADDED
|
Binary file (65.4 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/file_piece_test.o
ADDED
|
Binary file (210 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/float_to_string.o
ADDED
|
Binary file (4.61 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string.o
ADDED
|
Binary file (12.5 kB). View file
|
|
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47f73d0d532194f98ca0b5de04960b79173ae4cb6291106f75af83f478b2ee94
|
| 3 |
+
size 1715968
|
mosesdecoder/util/bin/gcc-9/release/link-static/threading-multi/integer_to_string_test.o
ADDED
|
Binary file (178 kB). View file
|
|
|