| namespace lm { | |
| namespace base { | |
| template <class T, class U, class V> class ModelFacade; | |
| /* Vocabulary interface. Call Index(string) and get a word index for use in | |
| * calling Model. It provides faster convenience functions for <s>, </s>, and | |
| * <unk> although you can also find these using Index. | |
| * | |
| * Some models do not load the mapping from index to string. If you need this, | |
| * check if the model Vocabulary class implements such a function and access it | |
| * directly. | |
| * | |
| * The Vocabulary object is always owned by the Model and can be retrieved from | |
| * the Model using BaseVocabulary() for this abstract interface or | |
| * GetVocabulary() for the actual implementation (in which case you'll need the | |
| * actual implementation of the Model too). | |
| */ | |
| class Vocabulary { | |
| public: | |
| virtual ~Vocabulary(); | |
| WordIndex BeginSentence() const { return begin_sentence_; } | |
| WordIndex EndSentence() const { return end_sentence_; } | |
| WordIndex NotFound() const { return not_found_; } | |
| /* Most implementations allow StringPiece lookups and need only override | |
| * Index(StringPiece). SRI requires null termination and overrides all | |
| * three methods. | |
| */ | |
| virtual WordIndex Index(const StringPiece &str) const = 0; | |
| virtual WordIndex Index(const std::string &str) const { | |
| return Index(StringPiece(str)); | |
| } | |
| virtual WordIndex Index(const char *str) const { | |
| return Index(StringPiece(str)); | |
| } | |
| protected: | |
| // Call SetSpecial afterward. | |
| Vocabulary() {} | |
| Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) { | |
| SetSpecial(begin_sentence, end_sentence, not_found); | |
| } | |
| void SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found); | |
| WordIndex begin_sentence_, end_sentence_, not_found_; | |
| private: | |
| // Disable copy constructors. They're private and undefined. | |
| // Ersatz boost::noncopyable. | |
| Vocabulary(const Vocabulary &); | |
| Vocabulary &operator=(const Vocabulary &); | |
| }; | |
| /* There are two ways to access a Model. | |
| * | |
| * | |
| * OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh). | |
| * | |
| * Every Model implements the scoring function: | |
| * float Score( | |
| * const Model::State &in_state, | |
| * const WordIndex new_word, | |
| * Model::State &out_state) const; | |
| * | |
| * It can also return the length of n-gram matched by the model: | |
| * FullScoreReturn FullScore( | |
| * const Model::State &in_state, | |
| * const WordIndex new_word, | |
| * Model::State &out_state) const; | |
| * | |
| * | |
| * There are also accessor functions: | |
| * const State &BeginSentenceState() const; | |
| * const State &NullContextState() const; | |
| * const Vocabulary &GetVocabulary() const; | |
| * unsigned int Order() const; | |
| * | |
| * NB: In case you're wondering why the model implementation looks like it's | |
| * missing these methods, see facade.hh. | |
| * | |
| * This is the fastest way to use a model and presents a normal State class to | |
| * be included in a hypothesis state structure. | |
| * | |
| * | |
| * OPTION 2: Use the virtual interface below. | |
| * | |
| * The virtual interface allow you to decide which Model to use at runtime | |
| * without templatizing everything on the Model type. However, each Model has | |
| * its own State class, so a single State cannot be efficiently provided (it | |
| * would require using the maximum memory of any Model's State or memory | |
| * allocation with each lookup). This means you become responsible for | |
| * allocating memory with size StateSize() and passing it to the Score or | |
| * FullScore functions provided here. | |
| * | |
| * For example, cdec has a std::string containing the entire state of a | |
| * hypothesis. It can reserve StateSize bytes in this string for the model | |
| * state. | |
| * | |
| * All the State objects are POD, so it's ok to use raw memory for storing | |
| * State. | |
| * in_state and out_state must not have the same address. | |
| */ | |
| class Model { | |
| public: | |
| virtual ~Model(); | |
| size_t StateSize() const { return state_size_; } | |
| const void *BeginSentenceMemory() const { return begin_sentence_memory_; } | |
| void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); } | |
| const void *NullContextMemory() const { return null_context_memory_; } | |
| void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); } | |
| // Requires in_state != out_state | |
| virtual float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0; | |
| // Requires in_state != out_state | |
| virtual FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0; | |
| // Prefer to use FullScore. The context words should be provided in reverse order. | |
| virtual FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const = 0; | |
| unsigned char Order() const { return order_; } | |
| const Vocabulary &BaseVocabulary() const { return *base_vocab_; } | |
| private: | |
| template <class T, class U, class V> friend class ModelFacade; | |
| explicit Model(size_t state_size) : state_size_(state_size) {} | |
| const size_t state_size_; | |
| const void *begin_sentence_memory_, *null_context_memory_; | |
| const Vocabulary *base_vocab_; | |
| unsigned char order_; | |
| // Disable copy constructors. They're private and undefined. | |
| // Ersatz boost::noncopyable. | |
| Model(const Model &); | |
| Model &operator=(const Model &); | |
| }; | |
| } // mamespace base | |
| } // namespace lm | |