| | #ifndef LM_MODEL_H |
| | #define LM_MODEL_H |
| |
|
| | #include "lm/bhiksha.hh" |
| | #include "lm/binary_format.hh" |
| | #include "lm/config.hh" |
| | #include "lm/facade.hh" |
| | #include "lm/quantize.hh" |
| | #include "lm/search_hashed.hh" |
| | #include "lm/search_trie.hh" |
| | #include "lm/state.hh" |
| | #include "lm/value.hh" |
| | #include "lm/vocab.hh" |
| | #include "lm/weights.hh" |
| |
|
| | #include "util/murmur_hash.hh" |
| |
|
| | #include <algorithm> |
| | #include <vector> |
| | #include <cstring> |
| |
|
| | namespace util { class FilePiece; } |
| |
|
| | namespace lm { |
| | namespace ngram { |
| | namespace detail { |
| |
|
| | |
| | |
| | template <class Search, class VocabularyT> class GenericModel : public base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> { |
| | private: |
| | typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P; |
| | public: |
| | |
| | static const ModelType kModelType; |
| |
|
| | static const unsigned int kVersion = Search::kVersion; |
| |
|
| | |
| | |
| | |
| | |
| | static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config = Config()); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | explicit GenericModel(const char *file, const Config &config = Config()); |
| |
|
| | |
| | |
| | |
| | |
| | FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const; |
| |
|
| | |
| | |
| | |
| | |
| | FullScoreReturn ExtendLeft( |
| | |
| | const WordIndex *add_rbegin, const WordIndex *add_rend, |
| | |
| | const float *backoff_in, |
| | |
| | uint64_t extend_pointer, |
| | |
| | unsigned char extend_length, |
| | |
| | float *backoff_out, |
| | |
| | unsigned char &next_use) const; |
| |
|
| | |
| | |
| | |
| | |
| | float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const { |
| | |
| | return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0; |
| | } |
| |
|
| | private: |
| | FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const; |
| |
|
| | |
| | void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const; |
| |
|
| | |
| | void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config); |
| |
|
| | void InitializeFromARPA(int fd, const char *file, const Config &config); |
| |
|
| | float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const; |
| |
|
| | BinaryFormat backing_; |
| |
|
| | VocabularyT vocab_; |
| |
|
| | Search search_; |
| | }; |
| |
|
| | } |
| |
|
| | |
| | |
| | #define LM_COMMA() , |
| | #define LM_NAME_MODEL(name, from)\ |
| | class name : public from {\ |
| | public:\ |
| | name(const char *file, const Config &config = Config()) : from(file, config) {}\ |
| | }; |
| |
|
| | LM_NAME_MODEL(ProbingModel, detail::GenericModel<detail::HashedSearch<BackoffValue> LM_COMMA() ProbingVocabulary>); |
| | LM_NAME_MODEL(RestProbingModel, detail::GenericModel<detail::HashedSearch<RestValue> LM_COMMA() ProbingVocabulary>); |
| | LM_NAME_MODEL(TrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>); |
| | LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>); |
| | LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>); |
| | LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>); |
| |
|
| | |
| | typedef ::lm::ngram::ProbingVocabulary Vocabulary; |
| | typedef ProbingModel Model; |
| |
|
| | |
| | |
| | |
| | base::Model *LoadVirtual(const char *file_name, const Config &config = Config(), ModelType if_arpa = PROBING); |
| |
|
| | } |
| | } |
| |
|
| | #endif |
| |
|