| namespace util { namespace stream { class ChainPositions; }} | |
| // Warning: PrintARPA routines read all unigrams before all bigrams before all | |
| // trigrams etc. So if other parts of the chain move jointly, you'll have to | |
| // buffer. | |
| namespace lm { | |
| class VocabReconstitute { | |
| public: | |
| // fd must be alive for life of this object; does not take ownership. | |
| explicit VocabReconstitute(int fd); | |
| const char *Lookup(WordIndex index) const { | |
| assert(index < map_.size() - 1); | |
| return map_[index]; | |
| } | |
| StringPiece LookupPiece(WordIndex index) const { | |
| return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]); | |
| } | |
| std::size_t Size() const { | |
| // There's an extra entry to support StringPiece lengths. | |
| return map_.size() - 1; | |
| } | |
| private: | |
| util::scoped_memory memory_; | |
| std::vector<const char*> map_; | |
| }; | |
| class PrintARPA { | |
| public: | |
| // Does not take ownership of vocab_fd or out_fd. | |
| explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts) | |
| : vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {} | |
| void Run(const util::stream::ChainPositions &positions); | |
| private: | |
| int vocab_fd_; | |
| int out_fd_; | |
| std::vector<uint64_t> counts_; | |
| }; | |
| } // namespace lm | |