| #ifndef LM_BUILDER_PIPELINE_H |
| #define LM_BUILDER_PIPELINE_H |
|
|
| #include "lm/builder/adjust_counts.hh" |
| #include "lm/builder/initial_probabilities.hh" |
| #include "lm/builder/header_info.hh" |
| #include "lm/lm_exception.hh" |
| #include "lm/word_index.hh" |
| #include "util/stream/config.hh" |
| #include "util/file_piece.hh" |
|
|
| #include <string> |
| #include <cstddef> |
|
|
| namespace lm { namespace builder { |
|
|
| class Output; |
|
|
| struct PipelineConfig { |
| std::size_t order; |
| util::stream::SortConfig sort; |
| InitialProbabilitiesConfig initial_probs; |
| util::stream::ChainConfig read_backoffs; |
|
|
| |
| |
| lm::WordIndex vocab_estimate; |
|
|
| |
| std::size_t minimum_block; |
|
|
| |
| std::size_t block_count; |
|
|
| |
| |
| std::vector<uint64_t> prune_thresholds; |
| bool prune_vocab; |
| std::string prune_vocab_file; |
|
|
| |
| bool renumber_vocabulary; |
|
|
| |
| DiscountConfig discount; |
|
|
| |
| bool output_q; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| uint64_t vocab_size_for_unk; |
|
|
| |
| |
| |
| |
| WarningAction disallowed_symbol_action; |
|
|
| const std::string &TempPrefix() const { return sort.temp_prefix; } |
| std::size_t TotalMemory() const { return sort.total_memory; } |
| }; |
|
|
| |
| void Pipeline(PipelineConfig &config, int text_file, Output &output); |
|
|
| }} |
| #endif |
|
|