| |
| |
| |
| |
| @@ -21,7 +21,7 @@ |
| |
| #include "bpe_model.h" |
| #include "freelist.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| +#include "absl/container/flat_hash_map.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -18,7 +18,8 @@ |
| #include <vector> |
| |
| #include "bpe_model_trainer.h" |
| -#include "third_party/absl/container/flat_hash_set.h" |
| +#include "absl/container/flat_hash_set.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -171,7 +172,7 @@ void Trainer::UpdateActiveSymbols() { |
| active_symbols_.insert(symbols.begin(), symbols.begin() + size); |
| } |
| |
| -util::Status Trainer::Train() { |
| +absl::Status Trainer::Train() { |
| RETURN_IF_ERROR(status()); |
| |
| CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); |
| |
| |
| |
| |
| @@ -20,7 +20,8 @@ |
| #include <vector> |
| |
| #include "sentencepiece_model.pb.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/status/status.h" |
| #include "trainer_interface.h" |
| |
| namespace sentencepiece { |
| @@ -35,7 +36,7 @@ class Trainer : public TrainerInterface { |
| : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, |
| denormalizer_spec) {} |
| |
| - util::Status Train() override; |
| + absl::Status Train() override; |
| |
| private: |
| // Symbol represents a character or symbol bigram. |
| |
| |
| |
| |
| @@ -20,8 +20,8 @@ |
| #include "sentencepiece_processor.h" |
| #include "sentencepiece_trainer.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -18,10 +18,11 @@ |
| |
| #include "builder.h" |
| #include "filesystem.h" |
| -#include "third_party/absl/strings/str_join.h" |
| -#include "third_party/absl/strings/str_replace.h" |
| -#include "third_party/absl/strings/str_split.h" |
| -#include "third_party/absl/strings/strip.h" |
| +#include "absl/strings/str_join.h" |
| +#include "absl/strings/str_replace.h" |
| +#include "absl/strings/str_split.h" |
| +#include "absl/strings/strip.h" |
| +#include "absl/status/status.h" |
| |
| #ifdef ENABLE_NFKC_COMPILE |
| #include <unicode/errorcode.h> |
| @@ -36,7 +37,7 @@ |
| |
| #include "normalization_rule.h" |
| #include "normalizer.h" |
| -#include "third_party/darts_clone/darts.h" |
| +#include "include/darts.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -145,7 +146,7 @@ Builder::Chars Normalize(const Builder::CharsMap &chars_map, |
| } // namespace |
| |
| // static |
| -util::Status Builder::CompileCharsMap(const CharsMap &chars_map, |
| +absl::Status Builder::CompileCharsMap(const CharsMap &chars_map, |
| std::string *output) { |
| CHECK_OR_RETURN(output); |
| CHECK_OR_RETURN(!chars_map.empty()); |
| @@ -212,7 +213,7 @@ util::Status Builder::CompileCharsMap(const CharsMap &chars_map, |
| } |
| |
| // static |
| -util::Status Builder::DecompileCharsMap(absl::string_view blob, |
| +absl::Status Builder::DecompileCharsMap(absl::string_view blob, |
| Builder::CharsMap *chars_map) { |
| CHECK_OR_RETURN(chars_map); |
| chars_map->clear(); |
| @@ -265,7 +266,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob, |
| } |
| |
| // static |
| -util::Status Builder::GetPrecompiledCharsMap(const std::string &name, |
| +absl::Status Builder::GetPrecompiledCharsMap(const std::string &name, |
| std::string *output) { |
| CHECK_OR_RETURN(output); |
| |
| @@ -282,12 +283,12 @@ util::Status Builder::GetPrecompiledCharsMap(const std::string &name, |
| return util::OkStatus(); |
| } |
| } |
| - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) |
| + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) |
| << "No precompiled charsmap is found: " << name; |
| } |
| |
| // static |
| -util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { |
| +absl::Status Builder::BuildNFKCMap(CharsMap *chars_map) { |
| #ifdef ENABLE_NFKC_COMPILE |
| LOG(INFO) << "Running BuildNFKCMap"; |
| |
| @@ -345,7 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { |
| return util::OkStatus(); |
| } |
| |
| -util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { |
| +absl::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { |
| #ifdef ENABLE_NFKC_COMPILE |
| LOG(INFO) << "Running BuildNmtNFKCMap"; |
| |
| @@ -420,7 +421,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { |
| } |
| |
| // static |
| -util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { |
| +absl::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { |
| #ifdef ENABLE_NFKC_COMPILE |
| for (auto &c : *chars_map) { |
| std::vector<char32> trg; |
| @@ -445,7 +446,7 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { |
| } |
| |
| // static |
| -util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { |
| +absl::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { |
| #ifdef ENABLE_NFKC_COMPILE |
| CharsMap nfkc_map; |
| RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map)); |
| @@ -460,7 +461,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { |
| } |
| |
| // static |
| -util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { |
| +absl::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { |
| #ifdef ENABLE_NFKC_COMPILE |
| CharsMap nfkc_map; |
| RETURN_IF_ERROR(Builder::BuildNmtNFKCMap(&nfkc_map)); |
| @@ -475,7 +476,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { |
| } |
| |
| // static |
| -util::Status Builder::LoadCharsMap(absl::string_view filename, |
| +absl::Status Builder::LoadCharsMap(absl::string_view filename, |
| CharsMap *chars_map) { |
| LOG(INFO) << "Loading mapping file: " << filename.data(); |
| CHECK_OR_RETURN(chars_map); |
| @@ -510,7 +511,7 @@ util::Status Builder::LoadCharsMap(absl::string_view filename, |
| } |
| |
| // static |
| -util::Status Builder::SaveCharsMap(absl::string_view filename, |
| +absl::Status Builder::SaveCharsMap(absl::string_view filename, |
| const Builder::CharsMap &chars_map) { |
| auto output = filesystem::NewWritableFile(filename); |
| RETURN_IF_ERROR(output->status()); |
| @@ -540,7 +541,7 @@ util::Status Builder::SaveCharsMap(absl::string_view filename, |
| } |
| |
| // static |
| -util::Status Builder::RemoveRedundantMap(CharsMap *chars_map) { |
| +absl::Status Builder::RemoveRedundantMap(CharsMap *chars_map) { |
| CHECK_OR_RETURN(chars_map); |
| |
| CharsMap new_chars_map; |
| |
| |
| |
| |
| @@ -22,7 +22,8 @@ |
| #include "common.h" |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| namespace normalizer { |
| @@ -43,15 +44,15 @@ class Builder { |
| // String-to-string mapping. |
| using CharsMap = std::map<Chars, Chars>; |
| |
| - static util::Status CompileCharsMap(const CharsMap &chars_map, |
| + static absl::Status CompileCharsMap(const CharsMap &chars_map, |
| std::string *output); |
| |
| // Decompiles `blob` into `chars_map`. |
| - static util::Status DecompileCharsMap(absl::string_view blob, |
| + static absl::Status DecompileCharsMap(absl::string_view blob, |
| CharsMap *chars_map); |
| |
| // Returns a pre-compiled binary index with `name`. |
| - static util::Status GetPrecompiledCharsMap(const std::string &name, |
| + static absl::Status GetPrecompiledCharsMap(const std::string &name, |
| std::string *output); |
| |
| // Makes a normalization mapping based on NFKC. |
| @@ -89,30 +90,30 @@ class Builder { |
| // normalizer is the goal of SentencePiece. |
| // |
| // TODO(taku): Make NFC, NFD, and NFKD mapping if necessary. |
| - static util::Status BuildNFKCMap(CharsMap *chars_map); |
| + static absl::Status BuildNFKCMap(CharsMap *chars_map); |
| |
| // Makes an NFKC-based mapping with NMT specific modifications around |
| // whitespaces. |
| - static util::Status BuildNmtNFKCMap(CharsMap *chars_map); |
| + static absl::Status BuildNmtNFKCMap(CharsMap *chars_map); |
| |
| // Merge Unicode case folding mapping into `chars_map`. |
| - static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); |
| + static absl::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); |
| |
| // Makes NFKC with Unicode case folding. |
| - static util::Status BuildNFKC_CFMap(CharsMap *chars_map); |
| + static absl::Status BuildNFKC_CFMap(CharsMap *chars_map); |
| |
| // Makes NMT NFKC with Unicode case folding. |
| - static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); |
| + static absl::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); |
| |
| // Builds Chars map save in `filename`. |
| // Format: |
| // src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2... |
| // (src|trg)_ucharX must be a hex of Unicode code point. |
| - static util::Status LoadCharsMap(absl::string_view filename, |
| + static absl::Status LoadCharsMap(absl::string_view filename, |
| CharsMap *chars_map); |
| |
| // Saves Chars map to `filename` as TSV. |
| - static util::Status SaveCharsMap(absl::string_view filename, |
| + static absl::Status SaveCharsMap(absl::string_view filename, |
| const CharsMap &chars_map); |
| |
| private: |
| @@ -121,7 +122,7 @@ class Builder { |
| // Removes redundant rules from `chars_map`. |
| // When char_maps have "aa" => "bb" and "a" => "b", the first |
| // rule is not necessary since the second rule can cover the first rule. |
| - static util::Status RemoveRedundantMap(CharsMap *chars_map); |
| + static absl::Status RemoveRedundantMap(CharsMap *chars_map); |
| }; |
| } // namespace normalizer |
| } // namespace sentencepiece |
| |
| |
| |
| |
| @@ -18,7 +18,7 @@ |
| #include "normalizer.h" |
| #include "sentencepiece_trainer.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| +#include "absl/strings/str_cat.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -16,12 +16,13 @@ |
| |
| #include "char_model.h" |
| #include "char_model_trainer.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| namespace character { |
| |
| -util::Status Trainer::Train() { |
| +absl::Status Trainer::Train() { |
| RETURN_IF_ERROR(status()); |
| |
| CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); |
| |
| |
| |
| |
| @@ -17,6 +17,7 @@ |
| |
| #include "sentencepiece_model.pb.h" |
| #include "trainer_interface.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| namespace character { |
| @@ -30,7 +31,7 @@ class Trainer : public TrainerInterface { |
| : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, |
| denormalizer_spec) {} |
| |
| - util::Status Train() override; |
| + absl::Status Train() override; |
| }; |
| } // namespace character |
| } // namespace sentencepiece |
| |
| |
| |
| |
| @@ -19,8 +19,8 @@ |
| #include "filesystem.h" |
| #include "sentencepiece_processor.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -146,6 +146,7 @@ inline const char *BaseName(const char *path) { |
| } // namespace logging |
| } // namespace sentencepiece |
| |
| +#ifndef LOG |
| #define LOG(severity) \ |
| (::sentencepiece::logging::GetMinLogLevel() > \ |
| ::sentencepiece::logging::LOG_##severity) \ |
| @@ -156,6 +157,7 @@ inline const char *BaseName(const char *path) { |
| std::cerr << ::sentencepiece::logging::BaseName(__FILE__) << "(" \ |
| << __LINE__ << ") " \ |
| << "LOG(" << #severity << ") " |
| +#endif // LOG |
| |
| #define CHECK(condition) \ |
| (condition) ? 0 \ |
| |
| |
| |
| |
| @@ -22,8 +22,9 @@ |
| #include "filesystem.h" |
| #include "init.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| |
| using sentencepiece::normalizer::Builder; |
| |
| @@ -160,7 +161,7 @@ int main(int argc, char **argv) { |
| |
| const std::vector<std::pair< |
| std::string, |
| - std::function<sentencepiece::util::Status(Builder::CharsMap *)>>> |
| + std::function<sentencepiece::absl::Status(Builder::CharsMap *)>>> |
| kRuleList = {{"nfkc", Builder::BuildNFKCMap}, |
| {"nmt_nfkc", Builder::BuildNmtNFKCMap}, |
| {"nfkc_cf", Builder::BuildNFKC_CFMap}, |
| |
| |
| |
| |
| @@ -20,8 +20,8 @@ |
| #ifdef _USE_EXTERNAL_ABSL |
| // Naive workaround to define minloglevel on external absl package. |
| // We want to define them in other cc file. |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/flags/parse.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/flags/parse.h" |
| ABSL_FLAG(int32, minloglevel, 0, |
| "Messages logged at a lower level than this don't actually."); |
| #endif |
| |
| |
| |
| |
| @@ -15,7 +15,8 @@ |
| #include <iostream> |
| |
| #include "filesystem.h" |
| -#include "third_party/absl/memory/memory.h" |
| +#include "absl/status/status.h" |
| +#include "absl/memory/memory.h" |
| #include "util.h" |
| |
| #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) |
| @@ -36,7 +37,7 @@ class PosixReadableFile : public ReadableFile { |
| is_binary ? std::ios::binary | std::ios::in |
| : std::ios::in)) { |
| if (!*is_) |
| - status_ = util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) |
| + status_ = util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) |
| << "\"" << filename.data() << "\": " << util::StrError(errno); |
| } |
| |
| @@ -44,7 +45,7 @@ class PosixReadableFile : public ReadableFile { |
| if (is_ != &std::cin) delete is_; |
| } |
| |
| - util::Status status() const { return status_; } |
| + absl::Status status() const { return status_; } |
| |
| bool ReadLine(std::string *line) { |
| return static_cast<bool>(std::getline(*is_, *line)); |
| @@ -61,7 +62,7 @@ class PosixReadableFile : public ReadableFile { |
| } |
| |
| private: |
| - util::Status status_; |
| + absl::Status status_; |
| std::istream *is_; |
| }; |
| |
| @@ -75,7 +76,7 @@ class PosixWritableFile : public WritableFile { |
| : std::ios::out)) { |
| if (!*os_) |
| status_ = |
| - util::StatusBuilder(util::StatusCode::kPermissionDenied, GTL_LOC) |
| + util::StatusBuilder(absl::StatusCode::kPermissionDenied, GTL_LOC) |
| << "\"" << filename.data() << "\": " << util::StrError(errno); |
| } |
| |
| @@ -83,7 +84,7 @@ class PosixWritableFile : public WritableFile { |
| if (os_ != &std::cout) delete os_; |
| } |
| |
| - util::Status status() const { return status_; } |
| + absl::Status status() const { return status_; } |
| |
| bool Write(absl::string_view text) { |
| os_->write(text.data(), text.size()); |
| @@ -93,7 +94,7 @@ class PosixWritableFile : public WritableFile { |
| bool WriteLine(absl::string_view text) { return Write(text) && Write("\n"); } |
| |
| private: |
| - util::Status status_; |
| + absl::Status status_; |
| std::ostream *os_; |
| }; |
| |
| |
| |
| |
| |
| @@ -23,7 +23,8 @@ |
| |
| #include "common.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| namespace filesystem { |
| @@ -33,7 +34,7 @@ class ReadableFile { |
| explicit ReadableFile(absl::string_view filename, bool is_binary = false) {} |
| virtual ~ReadableFile() {} |
| |
| - virtual util::Status status() const = 0; |
| + virtual absl::Status status() const = 0; |
| virtual bool ReadLine(std::string *line) = 0; |
| virtual bool ReadAll(std::string *line) = 0; |
| }; |
| @@ -44,7 +45,7 @@ class WritableFile { |
| explicit WritableFile(absl::string_view filename, bool is_binary = false) {} |
| virtual ~WritableFile() {} |
| |
| - virtual util::Status status() const = 0; |
| + virtual absl::Status status() const = 0; |
| virtual bool Write(absl::string_view text) = 0; |
| virtual bool WriteLine(absl::string_view text) = 0; |
| }; |
| |
| |
| |
| |
| @@ -14,7 +14,7 @@ |
| |
| #include "filesystem.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| +#include "absl/strings/str_cat.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -16,8 +16,8 @@ |
| #define INIT_H_ |
| |
| #include "common.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/flags/parse.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/flags/parse.h" |
| |
| ABSL_DECLARE_FLAG(int32, minloglevel); |
| |
| |
| |
| |
| |
| @@ -15,7 +15,7 @@ |
| #include "bpe_model.h" |
| #include "char_model.h" |
| #include "model_factory.h" |
| -#include "third_party/absl/memory/memory.h" |
| +#include "absl/memory/memory.h" |
| #include "unigram_model.h" |
| #include "word_model.h" |
| |
| |
| |
| |
| |
| @@ -16,8 +16,8 @@ |
| |
| #include "model_interface.h" |
| #include "sentencepiece_model.pb.h" |
| -#include "third_party/absl/memory/memory.h" |
| -#include "third_party/absl/strings/str_format.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/strings/str_format.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -25,9 +25,10 @@ |
| #include "normalizer.h" |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| -#include "third_party/absl/strings/string_view.h" |
| -#include "third_party/darts_clone/darts.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| +#include "include/darts.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -69,7 +70,7 @@ class ModelInterface { |
| |
| // Returns Status. |
| // Encode/Decode functions are valid only when status is OK. |
| - virtual util::Status status() const { return status_; } |
| + virtual absl::Status status() const { return status_; } |
| |
| virtual const ModelProto &model_proto() const { return *model_proto_; } |
| |
| @@ -82,7 +83,7 @@ class ModelInterface { |
| // normally users do not need to call this function. This function is provided |
| // just in case that a user want to manually choose which encoder version to |
| // use. |
| - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) { |
| + virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version) { |
| encoder_version_ = encoder_version; |
| return util::OkStatus(); |
| } |
| @@ -261,7 +262,7 @@ class ModelInterface { |
| EncoderVersion encoder_version_ = EncoderVersion::kOptimized; |
| |
| // status. |
| - util::Status status_; |
| + absl::Status status_; |
| }; |
| } // namespace sentencepiece |
| #endif // MODEL_INTERFACE_H_ |
| |
| |
| |
| |
| @@ -15,7 +15,7 @@ |
| #include "model_factory.h" |
| #include "model_interface.h" |
| #include "testharness.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| +#include "absl/container/flat_hash_map.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -18,11 +18,12 @@ |
| #include <vector> |
| |
| #include "common.h" |
| -#include "third_party/absl/memory/memory.h" |
| -#include "third_party/absl/strings/match.h" |
| -#include "third_party/absl/strings/string_view.h" |
| -#include "third_party/absl/strings/strip.h" |
| -#include "third_party/darts_clone/darts.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/strings/match.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/strings/strip.h" |
| +#include "absl/status/status.h" |
| +#include "include/darts.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -71,7 +72,7 @@ void Normalizer::Init() { |
| } |
| } |
| |
| -util::Status Normalizer::Normalize(absl::string_view input, |
| +absl::Status Normalizer::Normalize(absl::string_view input, |
| std::string *normalized, |
| std::vector<size_t> *norm_to_orig) const { |
| norm_to_orig->clear(); |
| @@ -274,7 +275,7 @@ std::string Normalizer::EncodePrecompiledCharsMap( |
| } |
| |
| // static |
| -util::Status Normalizer::DecodePrecompiledCharsMap( |
| +absl::Status Normalizer::DecodePrecompiledCharsMap( |
| absl::string_view blob, absl::string_view *trie_blob, |
| absl::string_view *normalized, std::string *buffer) { |
| uint32 trie_blob_size = 0; |
| |
| |
| |
| |
| @@ -24,8 +24,9 @@ |
| #include "common.h" |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/strings/string_view.h" |
| -#include "third_party/darts_clone/darts.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| +#include "include/darts.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -75,7 +76,7 @@ class Normalizer { |
| |
| // Returns Status. |
| // Normalizes function is valid only when status is OK. |
| - virtual util::Status status() const { return status_; } |
| + virtual absl::Status status() const { return status_; } |
| |
| // Normalizes a plain utf8 string into an internal representation for |
| // Sentencepiece model. |norm_to_orig| stores the byte-alignment from |
| @@ -86,7 +87,7 @@ class Normalizer { |
| // - Adds a prefix space. |
| // - Replaces a space with a meta symbol. |
| // - Removing heading, tailing and other redundant spaces. |
| - virtual util::Status Normalize(absl::string_view input, |
| + virtual absl::Status Normalize(absl::string_view input, |
| std::string *normalized, |
| std::vector<size_t> *norm_to_orig) const; |
| |
| @@ -121,7 +122,7 @@ class Normalizer { |
| absl::string_view normalized); |
| |
| // Decodes blob into trie_blob and normalized string. |
| - static util::Status DecodePrecompiledCharsMap(absl::string_view blob, |
| + static absl::Status DecodePrecompiledCharsMap(absl::string_view blob, |
| absl::string_view *trie_blob, |
| absl::string_view *normalized, |
| std::string *buffer = nullptr); |
| @@ -153,7 +154,7 @@ class Normalizer { |
| #endif |
| |
| // Normalizer's status. |
| - util::Status status_; |
| + absl::Status status_; |
| }; |
| } // namespace normalizer |
| } // namespace sentencepiece |
| |
| |
| |
| |
| @@ -14,7 +14,7 @@ |
| #include <string> |
| |
| #include "pretokenizer_for_training.h" |
| -#include "third_party/absl/strings/str_replace.h" |
| +#include "absl/strings/str_replace.h" |
| |
| namespace sentencepiece { |
| namespace pretokenizer { |
| |
| |
| |
| |
| @@ -21,7 +21,8 @@ |
| #include "common.h" |
| #include "sentencepiece.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| namespace pretokenizer { |
| @@ -30,7 +31,7 @@ class PretokenizerForTrainingInterface { |
| public: |
| PretokenizerForTrainingInterface() {} |
| virtual ~PretokenizerForTrainingInterface() {} |
| - virtual util::Status status() const = 0; |
| + virtual absl::Status status() const = 0; |
| |
| // Puts kUPPBoundaryStr before and after the pre-tokenizer's segmentation |
| // when there are no spaces between these tokens. |
| |
| |
| |
| |
| @@ -13,8 +13,9 @@ |
| // limitations under the License.! |
| #include "pretokenizer_for_training.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| +#include "absl/strings/str_cat.h" |
| #include "trainer_interface.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| namespace pretokenizer { |
| @@ -28,7 +29,7 @@ class MockPretokenizer : public PretokenizerForTrainingInterface { |
| return spt_; |
| } |
| |
| - util::Status status() const override { return util::OkStatus(); } |
| + absl::Status status() const override { return util::OkStatus(); } |
| |
| void SetOutput(const SentencePieceText &spt) { spt_ = spt; } |
| |
| |
| |
| |
| |
| @@ -23,14 +23,15 @@ |
| #include "normalizer.h" |
| #include "sentencepiece.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/memory/memory.h" |
| -#include "third_party/absl/strings/numbers.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| -#include "third_party/absl/strings/str_replace.h" |
| -#include "third_party/absl/strings/str_split.h" |
| -#include "third_party/absl/strings/string_view.h" |
| -#include "third_party/absl/strings/strip.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/strings/numbers.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| +#include "absl/strings/str_replace.h" |
| +#include "absl/strings/str_split.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/strings/strip.h" |
| +#include "absl/status/status.h" |
| #include "unigram_model.h" |
| #include "util.h" |
| |
| @@ -52,7 +53,7 @@ const char kReplacementCharacter[] = "\xef\xbf\xbd"; |
| SentencePieceProcessor::SentencePieceProcessor() {} |
| SentencePieceProcessor::~SentencePieceProcessor() {} |
| |
| -util::Status SentencePieceProcessor::Load(absl::string_view filename) { |
| +absl::Status SentencePieceProcessor::Load(absl::string_view filename) { |
| auto model_proto = absl::make_unique<ModelProto>(); |
| RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get())); |
| return Load(std::move(model_proto)); |
| @@ -62,13 +63,13 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) { |
| CHECK_OK(Load(filename)); |
| } |
| |
| -util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { |
| +absl::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { |
| auto model_proto_copy = absl::make_unique<ModelProto>(); |
| *model_proto_copy = model_proto; |
| return Load(std::move(model_proto_copy)); |
| } |
| |
| -util::Status SentencePieceProcessor::LoadFromSerializedProto( |
| +absl::Status SentencePieceProcessor::LoadFromSerializedProto( |
| absl::string_view serialized) { |
| auto model_proto = absl::make_unique<ModelProto>(); |
| CHECK_OR_RETURN( |
| @@ -76,7 +77,7 @@ util::Status SentencePieceProcessor::LoadFromSerializedProto( |
| return Load(std::move(model_proto)); |
| } |
| |
| -util::Status SentencePieceProcessor::Load( |
| +absl::Status SentencePieceProcessor::Load( |
| std::unique_ptr<ModelProto> model_proto) { |
| model_proto_ = std::move(model_proto); |
| model_ = ModelFactory::Create(*model_proto_); |
| @@ -117,7 +118,7 @@ util::Status SentencePieceProcessor::Load( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::SetEncoderVersion( |
| +absl::Status SentencePieceProcessor::SetEncoderVersion( |
| EncoderVersion encoder_version) { |
| return model_->SetEncoderVersion(encoder_version); |
| } |
| @@ -126,17 +127,17 @@ EncoderVersion SentencePieceProcessor::GetEncoderVersion() const { |
| return model_->GetEncoderVersion(); |
| } |
| |
| -util::Status SentencePieceProcessor::SetEncodeExtraOptions( |
| +absl::Status SentencePieceProcessor::SetEncodeExtraOptions( |
| absl::string_view extra_options) { |
| return ParseExtraOptions(extra_options, &encode_extra_options_); |
| } |
| |
| -util::Status SentencePieceProcessor::SetDecodeExtraOptions( |
| +absl::Status SentencePieceProcessor::SetDecodeExtraOptions( |
| absl::string_view extra_options) { |
| return ParseExtraOptions(extra_options, &decode_extra_options_); |
| } |
| |
| -util::Status SentencePieceProcessor::status() const { |
| +absl::Status SentencePieceProcessor::status() const { |
| CHECK_OR_RETURN(model_) << "Model is not initialized."; |
| CHECK_OR_RETURN(normalizer_) << "Normalizer is not initialized."; |
| RETURN_IF_ERROR(model_->status()); |
| @@ -144,7 +145,7 @@ util::Status SentencePieceProcessor::status() const { |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::SetVocabulary( |
| +absl::Status SentencePieceProcessor::SetVocabulary( |
| const std::vector<std::string> &valid_vocab) { |
| RETURN_IF_ERROR(status()); |
| |
| @@ -174,7 +175,7 @@ util::Status SentencePieceProcessor::SetVocabulary( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::ResetVocabulary() { |
| +absl::Status SentencePieceProcessor::ResetVocabulary() { |
| RETURN_IF_ERROR(status()); |
| for (auto &piece : *(model_proto_->mutable_pieces())) { |
| if (piece.type() == ModelProto::SentencePiece::UNUSED) |
| @@ -184,7 +185,7 @@ util::Status SentencePieceProcessor::ResetVocabulary() { |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, |
| +absl::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, |
| int threshold) { |
| auto input = filesystem::NewReadableFile(filename); |
| RETURN_IF_ERROR(input->status()); |
| @@ -221,7 +222,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, |
| |
| ////////////////////////////////////////////////////////////// |
| // Simple API. |
| -util::Status SentencePieceProcessor::Encode( |
| +absl::Status SentencePieceProcessor::Encode( |
| absl::string_view input, std::vector<std::string> *pieces) const { |
| CHECK_OR_RETURN_STATUS_STL(pieces); |
| |
| @@ -234,7 +235,7 @@ util::Status SentencePieceProcessor::Encode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::Encode(absl::string_view input, |
| +absl::Status SentencePieceProcessor::Encode(absl::string_view input, |
| std::vector<int> *ids) const { |
| CHECK_OR_RETURN_STATUS_STL(ids); |
| |
| @@ -247,7 +248,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::Decode( |
| +absl::Status SentencePieceProcessor::Decode( |
| const std::vector<std::string> &pieces, std::string *detokenized) const { |
| CHECK_OR_RETURN_STATUS_STL(detokenized); |
| |
| @@ -258,7 +259,7 @@ util::Status SentencePieceProcessor::Decode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids, |
| +absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids, |
| std::string *detokenized) const { |
| CHECK_OR_RETURN_STATUS_STL(detokenized); |
| |
| @@ -269,7 +270,7 @@ util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids, |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::NBestEncode( |
| +absl::Status SentencePieceProcessor::NBestEncode( |
| absl::string_view input, int nbest_size, |
| std::vector<std::vector<std::string>> *pieces) const { |
| CHECK_OR_RETURN_STATUS_STL(pieces); |
| @@ -287,7 +288,7 @@ util::Status SentencePieceProcessor::NBestEncode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::NBestEncode( |
| +absl::Status SentencePieceProcessor::NBestEncode( |
| absl::string_view input, int nbest_size, |
| std::vector<std::vector<int>> *ids) const { |
| CHECK_OR_RETURN_STATUS_STL(ids); |
| @@ -305,7 +306,7 @@ util::Status SentencePieceProcessor::NBestEncode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::SampleEncode( |
| +absl::Status SentencePieceProcessor::SampleEncode( |
| absl::string_view input, int nbest_size, float alpha, |
| std::vector<std::string> *pieces) const { |
| CHECK_OR_RETURN_STATUS_STL(pieces); |
| @@ -319,7 +320,7 @@ util::Status SentencePieceProcessor::SampleEncode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, |
| +absl::Status SentencePieceProcessor::SampleEncode(absl::string_view input, |
| int nbest_size, float alpha, |
| std::vector<int> *ids) const { |
| CHECK_OR_RETURN_STATUS_STL(ids); |
| @@ -333,7 +334,7 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::PopulateSentencePieceText( |
| +absl::Status SentencePieceProcessor::PopulateSentencePieceText( |
| absl::string_view input, absl::string_view normalized, |
| const std::vector<size_t> &norm_to_orig, const EncodeResult &result, |
| SentencePieceText *spt) const { |
| @@ -424,7 +425,7 @@ util::Status SentencePieceProcessor::PopulateSentencePieceText( |
| return util::OkStatus(); |
| } // namespace sentencepiece |
| |
| -util::Status SentencePieceProcessor::Encode(absl::string_view input, |
| +absl::Status SentencePieceProcessor::Encode(absl::string_view input, |
| SentencePieceText *spt) const { |
| CHECK_OR_RETURN_STATUS_PROTO(spt); |
| |
| @@ -439,7 +440,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::NBestEncode( |
| +absl::Status SentencePieceProcessor::NBestEncode( |
| absl::string_view input, int nbest_size, |
| NBestSentencePieceText *nbest_spt) const { |
| CHECK_OR_RETURN_STATUS_PROTO(nbest_spt); |
| @@ -464,7 +465,7 @@ util::Status SentencePieceProcessor::NBestEncode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::SampleEncode( |
| +absl::Status SentencePieceProcessor::SampleEncode( |
| absl::string_view input, int nbest_size, float alpha, |
| SentencePieceText *spt) const { |
| CHECK_OR_RETURN_STATUS_PROTO(spt); |
| @@ -503,7 +504,7 @@ util::Status SentencePieceProcessor::SampleEncode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::SampleEncodeAndScore( |
| +absl::Status SentencePieceProcessor::SampleEncodeAndScore( |
| absl::string_view input, int samples, float theta, bool wor, |
| bool include_best, NBestSentencePieceText *samples_spt) const { |
| CHECK_OR_RETURN(model_->IsSampleEncodeAndScoreAvailable()) |
| @@ -527,7 +528,7 @@ util::Status SentencePieceProcessor::SampleEncodeAndScore( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, |
| +absl::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, |
| float theta, |
| float *entropy) const { |
| CHECK_OR_RETURN(model_->IsCalculateEntropyAvailable()) |
| @@ -540,7 +541,7 @@ util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::Decode( |
| +absl::Status SentencePieceProcessor::Decode( |
| const std::vector<std::string> &pieces, SentencePieceText *spt) const { |
| CHECK_OR_RETURN_STATUS_PROTO(spt); |
| |
| @@ -591,7 +592,7 @@ util::Status SentencePieceProcessor::Decode( |
| }; |
| |
| auto ProcessBytePieces = [&](int token_index_begin, |
| - int token_index_end) -> util::Status { |
| + int token_index_end) -> absl::Status { |
| if (token_index_begin >= token_index_end) { |
| return util::OkStatus(); |
| } |
| @@ -661,14 +662,14 @@ util::Status SentencePieceProcessor::Decode( |
| return util::OkStatus(); |
| } |
| |
| -util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids, |
| +absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids, |
| SentencePieceText *spt) const { |
| std::vector<std::string> pieces; |
| const int num_pieces = GetPieceSize(); |
| pieces.reserve(ids.size()); |
| for (const int id : ids) { |
| if (id < 0 || id >= num_pieces) { |
| - return util::Status(util::StatusCode::kOutOfRange, |
| + return absl::Status(absl::StatusCode::kOutOfRange, |
| absl::StrCat("Invalid id: ", id)); |
| } |
| pieces.emplace_back(IdToPiece(id)); |
| @@ -783,7 +784,7 @@ int SentencePieceProcessor::pad_id() const { |
| } |
| |
| // static |
| -util::Status SentencePieceProcessor::ApplyExtraOptions( |
| +absl::Status SentencePieceProcessor::ApplyExtraOptions( |
| const std::vector<ExtraOption> &extra_options, |
| SentencePieceText *spt) const { |
| for (const auto &extra_option : extra_options) { |
| @@ -818,7 +819,7 @@ util::Status SentencePieceProcessor::ApplyExtraOptions( |
| } |
| |
| // static |
| -util::Status SentencePieceProcessor::ParseExtraOptions( |
| +absl::Status SentencePieceProcessor::ParseExtraOptions( |
| absl::string_view _extra_option, |
| std::vector<SentencePieceProcessor::ExtraOption> *extra_options) const { |
| absl::string_view extra_option(_extra_option.data(), _extra_option.size()); |
| @@ -877,7 +878,7 @@ void SetRandomGeneratorSeed(unsigned int seed); |
| |
| namespace io { |
| |
| -util::Status LoadModelProto(absl::string_view filename, |
| +absl::Status LoadModelProto(absl::string_view filename, |
| ModelProto *model_proto) { |
| if (filename.empty()) { |
| return util::NotFoundError("model file path should not be empty."); |
| @@ -893,7 +894,7 @@ util::Status LoadModelProto(absl::string_view filename, |
| return util::OkStatus(); |
| } |
| |
| -util::Status SaveModelProto(absl::string_view filename, |
| +absl::Status SaveModelProto(absl::string_view filename, |
| const ModelProto &model_proto) { |
| if (filename.empty()) { |
| return util::NotFoundError("model file path should not be empty."); |
| |
| |
| |
| |
| @@ -20,9 +20,10 @@ |
| #include <string> |
| #include <utility> |
| #include <vector> |
| +#include "absl/status/status.h" |
| |
| #if defined(_USE_INTERNAL_STRING_VIEW) |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| #elif defined(_USE_TF_STRING_VIEW) |
| #include "absl/strings/string_view.h" |
| #else |
| @@ -185,7 +186,7 @@ class SentencePieceProcessor { |
| |
| // Loads model from `filename`. |
| // Returns false if `filename` cannot be loaded. |
| - virtual util::Status Load(absl::string_view filename); |
| + virtual absl::Status Load(absl::string_view filename); |
| |
| // Loads model from `filename`. |
| // Crash if `filename` cannot be loaded. |
| @@ -193,24 +194,24 @@ class SentencePieceProcessor { |
| |
| // Loads model from `model_proto`. |
| // `model_proto` is copied. |
| - virtual util::Status Load(const ModelProto &model_proto); |
| + virtual absl::Status Load(const ModelProto &model_proto); |
| |
| // Loads model from `model_proto`. |
| // `model_proto` is moved. |
| - virtual util::Status Load(std::unique_ptr<ModelProto> model_proto); |
| + virtual absl::Status Load(std::unique_ptr<ModelProto> model_proto); |
| |
| // Loads model from `serialized`, which is a string-serialized model proto. |
| // Useful to load the model from a platform independent blob object. |
| - virtual util::Status LoadFromSerializedProto(absl::string_view serialized); |
| + virtual absl::Status LoadFromSerializedProto(absl::string_view serialized); |
| |
| // Returns the status. Encode/Decode methods are valid when status is OK. |
| - virtual util::Status status() const; |
| + virtual absl::Status status() const; |
| |
| // Sets encode extra_option sequence. |
| - virtual util::Status SetEncodeExtraOptions(absl::string_view extra_option); |
| + virtual absl::Status SetEncodeExtraOptions(absl::string_view extra_option); |
| |
| // Sets decode extra_option sequence. |
| - virtual util::Status SetDecodeExtraOptions(absl::string_view extra_option); |
| + virtual absl::Status SetDecodeExtraOptions(absl::string_view extra_option); |
| |
| ////////////////////////////////////////////////////////////// |
| // Vocabulary restriction. |
| @@ -219,41 +220,41 @@ class SentencePieceProcessor { |
| |
| // Restricts the vocabulary set. |
| // The input sentences are encoded into the tokens in `valid_vocab`. |
| - virtual util::Status SetVocabulary( |
| + virtual absl::Status SetVocabulary( |
| const std::vector<std::string> &valid_vocab); |
| |
| // Reverts the vocabulary restriction. |
| - virtual util::Status ResetVocabulary(); |
| + virtual absl::Status ResetVocabulary(); |
| |
| // Loads the valid vocabulary set from `filename` in TSV format. |
| // Format: <token> <tab> <freq>. |
| // Any token with frequency < threshold will be treated as OOV. |
| - virtual util::Status LoadVocabulary(absl::string_view filename, |
| + virtual absl::Status LoadVocabulary(absl::string_view filename, |
| int threshold); |
| |
| ////////////////////////////////////////////////////////////// |
| // Simple API. |
| // |
| // Given a UTF8 input, encodes it into a sequence of sentence pieces. |
| - virtual util::Status Encode(absl::string_view input, |
| + virtual absl::Status Encode(absl::string_view input, |
| std::vector<std::string> *pieces) const; |
| |
| // Given a UTF8 input, encodes it into a sequence of ids. |
| - virtual util::Status Encode(absl::string_view input, |
| + virtual absl::Status Encode(absl::string_view input, |
| std::vector<int> *ids) const; |
| |
| // Given a sequence of pieces, decodes it into a detokenized output. |
| - virtual util::Status Decode(const std::vector<std::string> &pieces, |
| + virtual absl::Status Decode(const std::vector<std::string> &pieces, |
| std::string *detokenized) const; |
| |
| // Given a sequence of ids, decodes it into a detokenized output. |
| - virtual util::Status Decode(const std::vector<int> &ids, |
| + virtual absl::Status Decode(const std::vector<int> &ids, |
| std::string *detokenized) const; |
| |
| // Sets the encoder version. Normally users do not need to call this function. |
| // But they can call this fucntion just in case if they want to fall back to |
| // the original encoder. |
| - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version); |
| + virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version); |
| |
| // Returns the current encoder version in use. |
| virtual EncoderVersion GetEncoderVersion() const; |
| @@ -261,12 +262,12 @@ class SentencePieceProcessor { |
| ////////////////////////////////////////////////////////////// |
| // NBest API. |
| // Same as Encode, but returns nbest results. |
| - virtual util::Status NBestEncode( |
| + virtual absl::Status NBestEncode( |
| absl::string_view input, int nbest_size, |
| std::vector<std::vector<std::string>> *pieces) const; |
| |
| // Same as Encode, but returns nbest results. |
| - virtual util::Status NBestEncode(absl::string_view input, int nbest_size, |
| + virtual absl::Status NBestEncode(absl::string_view input, int nbest_size, |
| std::vector<std::vector<int>> *ids) const; |
| |
| ////////////////////////////////////////////////////////////// |
| @@ -289,12 +290,12 @@ class SentencePieceProcessor { |
| // in https://arxiv.org/abs/1910.13267 |
| // Nbest-based sampling is not supported so nbest_size parameter is ignored in |
| // BPE. |
| - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, |
| + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, |
| float alpha, |
| std::vector<std::string> *pieces) const; |
| |
| // Same as above, but returns a sequence of ids. |
| - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, |
| + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, |
| float alpha, std::vector<int> *ids) const; |
| |
| ////////////////////////////////////////////////////////////// |
| @@ -303,16 +304,16 @@ class SentencePieceProcessor { |
| // and internal sentencepiece sequence. |
| // |
| // Given a UTF8 input, encodes it into SentencePieceText. |
| - virtual util::Status Encode(absl::string_view input, |
| + virtual absl::Status Encode(absl::string_view input, |
| SentencePieceText *spt) const; |
| |
| // Same as above, but returns NBestSentencePieceText. |
| - virtual util::Status NBestEncode(absl::string_view input, int nbest_size, |
| + virtual absl::Status NBestEncode(absl::string_view input, int nbest_size, |
| NBestSentencePieceText *nbest_spt) const; |
| |
| // Same as above, but samples one segmentation from the hypotheses |
| // (Lattice). |
| - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, |
| + virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, |
| float alpha, SentencePieceText *spt) const; |
| |
| // Sample `samples` segmentations from the segmentation lattice. |
| @@ -323,21 +324,21 @@ class SentencePieceProcessor { |
| // If `include_best` is true, the best tokenization is always included in the |
| // sample, and the remaining elements are sampled excluding the best. |
| // This method is only available in Unigram mode. |
| - virtual util::Status SampleEncodeAndScore( |
| + virtual absl::Status SampleEncodeAndScore( |
| absl::string_view input, int samples, float theta, bool wor, |
| bool include_best, NBestSentencePieceText *samples_spt) const; |
| |
| // Calculate entropy of possible tokenization. |
| // Only available in unigram mode. |
| - virtual util::Status CalculateEntropy(absl::string_view input, float theta, |
| + virtual absl::Status CalculateEntropy(absl::string_view input, float theta, |
| float *entropy) const; |
| |
| // Given a sequence of pieces, decodes it into SentencePieceText. |
| - virtual util::Status Decode(const std::vector<std::string> &pieces, |
| + virtual absl::Status Decode(const std::vector<std::string> &pieces, |
| SentencePieceText *spt) const; |
| |
| // Given a sequence of ids, decodes it into SentencePieceText. |
| - virtual util::Status Decode(const std::vector<int> &ids, |
| + virtual absl::Status Decode(const std::vector<int> &ids, |
| SentencePieceText *spt) const; |
| |
| ////////////////////////////////////////////////////////////// |
| @@ -487,13 +488,13 @@ class SentencePieceProcessor { |
| private: |
| enum ExtraOption { REVERSE, BOS, EOS }; |
| |
| - util::Status ParseExtraOptions(absl::string_view extra_option, |
| + absl::Status ParseExtraOptions(absl::string_view extra_option, |
| std::vector<ExtraOption> *extra_options) const; |
| |
| - util::Status ApplyExtraOptions(const std::vector<ExtraOption> &extra_options, |
| + absl::Status ApplyExtraOptions(const std::vector<ExtraOption> &extra_options, |
| SentencePieceText *spt) const; |
| |
| - util::Status PopulateSentencePieceText( |
| + absl::Status PopulateSentencePieceText( |
| absl::string_view input, absl::string_view normalized, |
| const std::vector<size_t> &norm_to_orig, |
| const std::vector<std::pair<absl::string_view, int>> &result, |
| @@ -526,10 +527,10 @@ namespace io { |
| // io::LoadModelProto("//path/spm.model", model_proto.get()); |
| // SentencePieceProcessor sp; |
| // CHECK_OK(sp.Load(std::move(model_proto))); |
| -util::Status LoadModelProto(absl::string_view, ModelProto *model_proto); |
| +absl::Status LoadModelProto(absl::string_view, ModelProto *model_proto); |
| |
| // Saves `model_proto` as `filename`. |
| -util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); |
| +absl::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); |
| } // namespace io |
| #endif // SWIG |
| } // namespace sentencepiece |
| |
| |
| |
| |
| @@ -23,10 +23,10 @@ |
| #include "sentencepiece_processor.h" |
| #include "sentencepiece_trainer.h" |
| #include "testharness.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| -#include "third_party/absl/memory/memory.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/string_view.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -22,12 +22,13 @@ |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_trainer.h" |
| #include "spec_parser.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/strings/numbers.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_split.h" |
| -#include "third_party/absl/strings/string_view.h" |
| -#include "third_party/absl/strings/strip.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/strings/numbers.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_split.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/strings/strip.h" |
| +#include "absl/status/status.h" |
| #include "trainer_factory.h" |
| #include "util.h" |
| |
| @@ -37,7 +38,7 @@ static constexpr char kDefaultNormalizerName[] = "nmt_nfkc"; |
| } // namespace |
| |
| // static |
| -util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, |
| +absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, |
| SentenceIterator *sentence_iterator, |
| std::string *serialized_model_proto) { |
| NormalizerSpec normalizer_spec; |
| @@ -45,7 +46,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, |
| serialized_model_proto); |
| } |
| |
| -util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, |
| +absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, |
| const NormalizerSpec &normalizer_spec, |
| SentenceIterator *sentence_iterator, |
| std::string *serialized_model_proto) { |
| @@ -55,7 +56,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::Train( |
| +absl::Status SentencePieceTrainer::Train( |
| const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec, |
| const NormalizerSpec &denormalizer_spec, |
| SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { |
| @@ -97,7 +98,7 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(absl::string_view name) { |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::MergeSpecsFromArgs( |
| +absl::Status SentencePieceTrainer::MergeSpecsFromArgs( |
| absl::string_view args, TrainerSpec *trainer_spec, |
| NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec) { |
| CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null."; |
| @@ -125,7 +126,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::MergeSpecsFromArgs( |
| +absl::Status SentencePieceTrainer::MergeSpecsFromArgs( |
| const std::unordered_map<std::string, std::string> &kwargs, |
| TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, |
| NormalizerSpec *denormalizer_spec) { |
| @@ -171,7 +172,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::Train(absl::string_view args, |
| +absl::Status SentencePieceTrainer::Train(absl::string_view args, |
| SentenceIterator *sentence_iterator, |
| std::string *serialized_model_proto) { |
| LOG(INFO) << "Running command: " << args.data(); |
| @@ -185,7 +186,7 @@ util::Status SentencePieceTrainer::Train(absl::string_view args, |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::Train( |
| +absl::Status SentencePieceTrainer::Train( |
| const std::unordered_map<std::string, std::string> &kwargs, |
| SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { |
| TrainerSpec trainer_spec; |
| @@ -198,7 +199,7 @@ util::Status SentencePieceTrainer::Train( |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::PopulateNormalizerSpec( |
| +absl::Status SentencePieceTrainer::PopulateNormalizerSpec( |
| NormalizerSpec *normalizer_spec, bool is_denormalizer) { |
| CHECK_OR_RETURN(normalizer_spec); |
| |
| @@ -226,7 +227,7 @@ util::Status SentencePieceTrainer::PopulateNormalizerSpec( |
| } |
| |
| // static |
| -util::Status SentencePieceTrainer::PopulateModelTypeFromString( |
| +absl::Status SentencePieceTrainer::PopulateModelTypeFromString( |
| absl::string_view type, TrainerSpec *spec) { |
| static const std::unordered_map<std::string, TrainerSpec::ModelType> |
| kModelTypeMap = {{"unigram", TrainerSpec::UNIGRAM}, |
| @@ -239,7 +240,7 @@ util::Status SentencePieceTrainer::PopulateModelTypeFromString( |
| return util::OkStatus(); |
| } |
| |
| - return util::StatusBuilder(util::StatusCode::kInternal, GTL_LOC) |
| + return util::StatusBuilder(absl::StatusCode::kInternal, GTL_LOC) |
| << "\"" << type << "\" is not found in TrainerSpec"; |
| } |
| |
| @@ -248,7 +249,7 @@ const pretokenizer::PretokenizerForTrainingInterface *g_pretokenizer = nullptr; |
| } // namespace |
| |
| // static |
| -util::Status SentencePieceTrainer::SetPretokenizerForTraining( |
| +absl::Status SentencePieceTrainer::SetPretokenizerForTraining( |
| const pretokenizer::PretokenizerForTrainingInterface *pretokenizer) { |
| g_pretokenizer = pretokenizer; |
| return util::OkStatus(); |
| |
| |
| |
| |
| @@ -19,6 +19,7 @@ |
| #include <unordered_map> |
| |
| #include "sentencepiece_processor.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| |
| @@ -46,7 +47,7 @@ class SentenceIterator { |
| virtual bool done() const = 0; |
| virtual void Next() = 0; |
| virtual const std::string &value() const = 0; |
| - virtual util::Status status() const = 0; |
| + virtual absl::Status status() const = 0; |
| }; |
| |
| class SentencePieceTrainer { |
| @@ -54,14 +55,14 @@ class SentencePieceTrainer { |
| // Trains SentencePiece model with `trainer_spec`. |
| // Default `normalizer_spec` is used. |
| // When `sentence_iterator` is passed, load sentences from the iterator. |
| - static util::Status Train(const TrainerSpec &trainer_spec, |
| + static absl::Status Train(const TrainerSpec &trainer_spec, |
| SentenceIterator *sentence_iterator = nullptr, |
| std::string *serialized_model_proto = nullptr); |
| |
| // Trains SentencePiece model with `trainer_spec` and |
| // `normalizer_spec`. |
| // When `sentence_iterator` is passed, load sentences from the iterator. |
| - static util::Status Train(const TrainerSpec &trainer_spec, |
| + static absl::Status Train(const TrainerSpec &trainer_spec, |
| const NormalizerSpec &normalizer_spec, |
| SentenceIterator *sentence_iterator = nullptr, |
| std::string *serialized_model_proto = nullptr); |
| @@ -69,7 +70,7 @@ class SentencePieceTrainer { |
| // Trains SentencePiece model with `trainer_spec`, `normalizer_spec` |
| // and `denormalizer_spec`. |
| // When `sentence_iterator` is passed, load sentences from the iterator. |
| - static util::Status Train(const TrainerSpec &trainer_spec, |
| + static absl::Status Train(const TrainerSpec &trainer_spec, |
| const NormalizerSpec &normalizer_spec, |
| const NormalizerSpec &denormalizer_spec, |
| SentenceIterator *sentence_iterator = nullptr, |
| @@ -78,13 +79,13 @@ class SentencePieceTrainer { |
| // e.g., |
| // '--input=data --model_prefix=m --vocab_size=8192 model_type=unigram' |
| // When `sentence_iterator` is passed, load sentences from the iterator. |
| - static util::Status Train(absl::string_view args, |
| + static absl::Status Train(absl::string_view args, |
| SentenceIterator *sentence_iterator = nullptr, |
| std::string *serialized_model_proto = nullptr); |
| |
| // Trains SentencePiece model with mapin `kwargs`. |
| // e.g., {{"input", "data"}, {"model_prefix, "m"}, {"vocab_size", "8192"}...} |
| - static util::Status Train( |
| + static absl::Status Train( |
| const std::unordered_map<std::string, std::string> &kwargs, |
| SentenceIterator *sentence_iterator = nullptr, |
| std::string *serialized_model_proto = nullptr); |
| @@ -96,19 +97,19 @@ class SentencePieceTrainer { |
| |
| // Populates necessary fields (precompiled_charmap) from |
| // `NormalizerSpec::name` or `NormalizerSpec::normalization_rule_tsv`. |
| - static util::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec, |
| + static absl::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec, |
| bool is_denormalizer = false); |
| |
| // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the |
| // std::unordered_map in `kargs`. |
| - static util::Status MergeSpecsFromArgs( |
| + static absl::Status MergeSpecsFromArgs( |
| const std::unordered_map<std::string, std::string> &kwargs, |
| TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, |
| NormalizerSpec *denormalizer_spec); |
| |
| // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the |
| // command line flags in `args`. |
| - static util::Status MergeSpecsFromArgs(absl::string_view args, |
| + static absl::Status MergeSpecsFromArgs(absl::string_view args, |
| TrainerSpec *trainer_spec, |
| NormalizerSpec *normalizer_spec, |
| NormalizerSpec *denormalizer_spec); |
| @@ -116,7 +117,7 @@ class SentencePieceTrainer { |
| // Injects global pre-tokenizer that are applied in training time. |
| // Pretokenizer is only used for extracting pieces. |
| // TODO(taku): It would be better to inject per `trainer_spec`. |
| - static util::Status SetPretokenizerForTraining( |
| + static absl::Status SetPretokenizerForTraining( |
| const pretokenizer::PretokenizerForTrainingInterface *pretokenizer); |
| |
| // Returns the current pretokenizer. if no pretokenizer is defined, returns |
| @@ -129,17 +130,17 @@ class SentencePieceTrainer { |
| // with comma-separated values. `field_name` must not be a nested message. |
| // The body of these functions are automatically generated with |
| // data/gen_spec_parser.pl |
| - static util::Status SetProtoField(const std::string &name, |
| + static absl::Status SetProtoField(const std::string &name, |
| const std::string &value, |
| TrainerSpec *message); |
| |
| - static util::Status SetProtoField(const std::string &name, |
| + static absl::Status SetProtoField(const std::string &name, |
| const std::string &value, |
| NormalizerSpec *message); |
| |
| // Populates model type from string representation, e.g., "bpe". |
| // Supported model: "unigram", "bpe", "word", "char". |
| - static util::Status PopulateModelTypeFromString(absl::string_view type, |
| + static absl::Status PopulateModelTypeFromString(absl::string_view type, |
| TrainerSpec *trainer_spec); |
| |
| private: |
| |
| |
| |
| |
| @@ -16,7 +16,8 @@ |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_trainer.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -109,7 +110,7 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) { |
| bool done() const override { return idx_ == vec_.size(); } |
| void Next() override { ++idx_; } |
| const std::string &value() const override { return vec_[idx_]; } |
| - util::Status status() const override { return util::OkStatus(); } |
| + absl::Status status() const override { return util::OkStatus(); } |
| |
| private: |
| std::vector<std::string> vec_; |
| |
| |
| |
| |
| @@ -19,8 +19,9 @@ |
| #include <vector> |
| |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/strings/ascii.h" |
| -#include "third_party/absl/strings/str_split.h" |
| +#include "absl/strings/ascii.h" |
| +#include "absl/strings/str_split.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -49,7 +50,7 @@ namespace sentencepiece { |
| if (name == #param_name) { \ |
| int32 v; \ |
| if (!string_util::lexical_cast(value, &v)) \ |
| - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ |
| + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ |
| << "cannot parse \"" << value << "\" as int."; \ |
| message->set_##param_name(v); \ |
| return util::OkStatus(); \ |
| @@ -59,7 +60,7 @@ namespace sentencepiece { |
| if (name == #param_name) { \ |
| uint64 v; \ |
| if (!string_util::lexical_cast(value, &v)) \ |
| - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ |
| + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ |
| << "cannot parse \"" << value << "\" as int."; \ |
| message->set_##param_name(v); \ |
| return util::OkStatus(); \ |
| @@ -69,7 +70,7 @@ namespace sentencepiece { |
| if (name == #param_name) { \ |
| double v; \ |
| if (!string_util::lexical_cast(value, &v)) \ |
| - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ |
| + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ |
| << "cannot parse \"" << value << "\" as int."; \ |
| message->set_##param_name(v); \ |
| return util::OkStatus(); \ |
| @@ -79,7 +80,7 @@ namespace sentencepiece { |
| if (name == #param_name) { \ |
| bool v; \ |
| if (!string_util::lexical_cast(value.empty() ? "true" : value, &v)) \ |
| - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ |
| + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ |
| << "cannot parse \"" << value << "\" as bool."; \ |
| message->set_##param_name(v); \ |
| return util::OkStatus(); \ |
| @@ -89,7 +90,7 @@ namespace sentencepiece { |
| if (name == #param_name) { \ |
| const auto it = map_name.find(absl::AsciiStrToUpper(value)); \ |
| if (it == map_name.end()) \ |
| - return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ |
| + return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ |
| << "unknown enumeration value of \"" << value << "\" as " \ |
| << #map_name; \ |
| message->set_##param_name(it->second); \ |
| @@ -186,7 +187,7 @@ inline std::string PrintProto(const NormalizerSpec &message, |
| return os.str(); |
| } |
| |
| -util::Status SentencePieceTrainer::SetProtoField(const std::string &name, |
| +absl::Status SentencePieceTrainer::SetProtoField(const std::string &name, |
| const std::string &value, |
| TrainerSpec *message) { |
| CHECK_OR_RETURN(message); |
| @@ -239,11 +240,11 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, |
| PARSE_STRING(pad_piece); |
| PARSE_STRING(unk_surface); |
| |
| - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) |
| + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) |
| << "unknown field name \"" << name << "\" in TrainerSpec."; |
| } |
| |
| -util::Status SentencePieceTrainer::SetProtoField(const std::string &name, |
| +absl::Status SentencePieceTrainer::SetProtoField(const std::string &name, |
| const std::string &value, |
| NormalizerSpec *message) { |
| CHECK_OR_RETURN(message); |
| @@ -255,7 +256,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, |
| PARSE_BOOL(escape_whitespaces); |
| PARSE_STRING(normalization_rule_tsv); |
| |
| - return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) |
| + return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) |
| << "unknown field name \"" << name << "\" in NormalizerSpec."; |
| } |
| |
| |
| |
| |
| |
| @@ -21,8 +21,8 @@ |
| #include "init.h" |
| #include "sentencepiece.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/strings/str_split.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/strings/str_split.h" |
| #include "util.h" |
| |
| ABSL_FLAG(std::string, model, "", "model file name"); |
| |
| |
| |
| |
| @@ -21,10 +21,10 @@ |
| #include "init.h" |
| #include "sentencepiece.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| #include "trainer_interface.h" |
| |
| ABSL_FLAG(std::string, model, "", "model file name"); |
| |
| |
| |
| |
| @@ -20,7 +20,7 @@ |
| #include "init.h" |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/flags/flag.h" |
| +#include "absl/flags/flag.h" |
| |
| ABSL_FLAG(std::string, output, "", "Output filename"); |
| ABSL_FLAG(std::string, model, "", "input model file name"); |
| |
| |
| |
| |
| @@ -21,7 +21,7 @@ |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| #include "sentencepiece_trainer.h" |
| -#include "third_party/absl/flags/flag.h" |
| +#include "absl/flags/flag.h" |
| |
| ABSL_FLAG(std::string, model, "", "Model file name"); |
| ABSL_FLAG(bool, use_internal_normalization, false, |
| |
| |
| |
| |
| @@ -18,10 +18,10 @@ |
| #include "init.h" |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_trainer.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/strings/ascii.h" |
| -#include "third_party/absl/strings/str_join.h" |
| -#include "third_party/absl/strings/str_split.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/strings/ascii.h" |
| +#include "absl/strings/str_join.h" |
| +#include "absl/strings/str_split.h" |
| #include "util.h" |
| |
| using sentencepiece::NormalizerSpec; |
| |
| |
| |
| |
| @@ -26,7 +26,7 @@ |
| #include <vector> |
| |
| #include "common.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| +#include "absl/strings/str_cat.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -21,9 +21,9 @@ |
| #include <string> |
| |
| #include "common.h" |
| -#include "third_party/absl/flags/flag.h" |
| -#include "third_party/absl/flags/parse.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/flags/flag.h" |
| +#include "absl/flags/parse.h" |
| +#include "absl/strings/string_view.h" |
| |
| ABSL_DECLARE_FLAG(std::string, test_tmpdir); |
| ABSL_DECLARE_FLAG(std::string, test_srcdir); |
| |
| |
| |
| |
| @@ -14,7 +14,7 @@ |
| |
| #include "bpe_model_trainer.h" |
| #include "char_model_trainer.h" |
| -#include "third_party/absl/memory/memory.h" |
| +#include "absl/memory/memory.h" |
| #include "trainer_factory.h" |
| #include "unigram_model_trainer.h" |
| #include "word_model_trainer.h" |
| |
| |
| |
| |
| @@ -26,13 +26,14 @@ |
| #include "normalizer.h" |
| #include "sentencepiece_processor.h" |
| #include "sentencepiece_trainer.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| -#include "third_party/absl/memory/memory.h" |
| -#include "third_party/absl/strings/numbers.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_format.h" |
| -#include "third_party/absl/strings/str_join.h" |
| -#include "third_party/absl/strings/str_split.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/strings/numbers.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_format.h" |
| +#include "absl/strings/str_join.h" |
| +#include "absl/strings/str_split.h" |
| +#include "absl/status/status.h" |
| #include "trainer_interface.h" |
| #include "unicode_script.h" |
| #include "util.h" |
| @@ -49,7 +50,7 @@ const char32 TrainerInterface::kUPPBoundaryChar = L'\u0009'; |
| const char TrainerInterface::kUPPBoundaryStr[] = "\t"; |
| |
| namespace { |
| -util::Status VerifySpec(const TrainerSpec &trainer_spec) { |
| +absl::Status VerifySpec(const TrainerSpec &trainer_spec) { |
| CHECK_GT_OR_RETURN(trainer_spec.vocab_size(), 0); |
| |
| if (trainer_spec.model_type() == TrainerSpec::UNIGRAM || |
| @@ -164,7 +165,7 @@ bool MultiFileSentenceIterator::done() const { |
| return (!read_done_ && file_index_ == files_.size()); |
| } |
| |
| -util::Status MultiFileSentenceIterator::status() const { |
| +absl::Status MultiFileSentenceIterator::status() const { |
| CHECK_OR_RETURN(fp_); |
| return fp_->status(); |
| } |
| @@ -296,7 +297,7 @@ bool TrainerInterface::IsValidSentencePiece( |
| return true; |
| } |
| |
| -util::Status TrainerInterface::LoadSentences() { |
| +absl::Status TrainerInterface::LoadSentences() { |
| RETURN_IF_ERROR(status()); |
| CHECK_OR_RETURN(sentences_.empty()); |
| CHECK_OR_RETURN(required_chars_.empty()); |
| @@ -537,7 +538,7 @@ void TrainerInterface::SplitSentencesByWhitespace() { |
| LOG(INFO) << "Done! " << sentences_.size(); |
| } |
| |
| -util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { |
| +absl::Status TrainerInterface::Serialize(ModelProto *model_proto) const { |
| RETURN_IF_ERROR(status()); |
| |
| // Duplicated sentencepiece is not allowed. |
| @@ -611,7 +612,7 @@ util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { |
| return util::OkStatus(); |
| } |
| |
| -util::Status TrainerInterface::SaveModel(absl::string_view filename) const { |
| +absl::Status TrainerInterface::SaveModel(absl::string_view filename) const { |
| LOG(INFO) << "Saving model: " << filename; |
| ModelProto model_proto; |
| RETURN_IF_ERROR(Serialize(&model_proto)); |
| @@ -622,7 +623,7 @@ util::Status TrainerInterface::SaveModel(absl::string_view filename) const { |
| return util::OkStatus(); |
| } |
| |
| -util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { |
| +absl::Status TrainerInterface::SaveVocab(absl::string_view filename) const { |
| LOG(INFO) << "Saving vocabs: " << filename; |
| ModelProto model_proto; |
| RETURN_IF_ERROR(Serialize(&model_proto)); |
| @@ -644,7 +645,7 @@ util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { |
| return util::OkStatus(); |
| } |
| |
| -util::Status TrainerInterface::Save() const { |
| +absl::Status TrainerInterface::Save() const { |
| if (output_model_proto_) { |
| RETURN_IF_ERROR(Serialize(output_model_proto_)); |
| } else { |
| @@ -654,7 +655,7 @@ util::Status TrainerInterface::Save() const { |
| return util::OkStatus(); |
| } |
| |
| -util::Status TrainerInterface::InitMetaPieces() { |
| +absl::Status TrainerInterface::InitMetaPieces() { |
| CHECK_OR_RETURN(meta_pieces_.empty()); |
| bool has_unk = false; |
| |
| |
| |
| |
| |
| @@ -27,7 +27,8 @@ |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| #include "sentencepiece_trainer.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -57,7 +58,7 @@ class MultiFileSentenceIterator : public SentenceIterator { |
| bool done() const override; |
| void Next() override; |
| const std::string &value() const override { return value_; } |
| - util::Status status() const override; |
| + absl::Status status() const override; |
| |
| private: |
| void TryRead(); |
| @@ -90,16 +91,16 @@ class TrainerInterface { |
| |
| // Loads sentence from `sentence_iterator` and stores the model |
| // to `output_model_proto`. |
| - virtual util::Status Train(SentenceIterator *sentence_iterator, |
| + virtual absl::Status Train(SentenceIterator *sentence_iterator, |
| ModelProto *output_model_proto) { |
| sentence_iterator_ = sentence_iterator; |
| output_model_proto_ = output_model_proto; |
| return Train(); |
| } |
| |
| - virtual util::Status Train() { return status(); } |
| + virtual absl::Status Train() { return status(); } |
| |
| - virtual util::Status status() const { return status_; } |
| + virtual absl::Status status() const { return status_; } |
| |
| FRIEND_TEST(TrainerInterfaceTest, IsValidSentencePieceTest); |
| FRIEND_TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest); |
| @@ -115,7 +116,7 @@ class TrainerInterface { |
| |
| // Loads all sentences from spec.input() or SentenceIterator. |
| // It loads at most input_sentence_size sentences. |
| - util::Status LoadSentences(); |
| + absl::Status LoadSentences(); |
| |
| // Splits all sentencecs by whitespaces and |
| // replace the |sentences_| with tokenized string. |
| @@ -125,7 +126,7 @@ class TrainerInterface { |
| void SplitSentencesByWhitespace(); |
| |
| // Save model files into spec.model_prefix(). |
| - util::Status Save() const; |
| + absl::Status Save() const; |
| |
| // Set of characters which must be included in the final vocab. |
| // The value of this map stores the frequency. |
| @@ -152,7 +153,7 @@ class TrainerInterface { |
| meta_pieces_; |
| |
| // Detect errors on initialization. |
| - util::Status status_; |
| + absl::Status status_; |
| |
| // Loads sentences from SentenceIterator if not null. |
| SentenceIterator *sentence_iterator_ = nullptr; |
| @@ -162,19 +163,19 @@ class TrainerInterface { |
| |
| private: |
| // Serialize final_pieces_ to |model_proto|. |
| - util::Status Serialize(ModelProto *model_proto) const; |
| + absl::Status Serialize(ModelProto *model_proto) const; |
| |
| // Saves the best sentence split with the current model for debugging. |
| - util::Status SaveSplits(absl::string_view filename) const; |
| + absl::Status SaveSplits(absl::string_view filename) const; |
| |
| // Saves model file. |
| - util::Status SaveModel(absl::string_view filename) const; |
| + absl::Status SaveModel(absl::string_view filename) const; |
| |
| // Saves vocabulary file for NMT. |
| - util::Status SaveVocab(absl::string_view filename) const; |
| + absl::Status SaveVocab(absl::string_view filename) const; |
| |
| // Initializes `meta_pieces_` from TrainerSpec. |
| - util::Status InitMetaPieces(); |
| + absl::Status InitMetaPieces(); |
| |
| // Randomly sampled raw sentences for self-testing. |
| std::vector<std::string> self_test_samples_; |
| |
| |
| |
| |
| @@ -16,8 +16,8 @@ |
| |
| #include "filesystem.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_format.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_format.h" |
| #include "trainer_interface.h" |
| #include "util.h" |
| |
| |
| |
| |
| |
| @@ -14,7 +14,7 @@ |
| |
| #include <unordered_map> |
| |
| -#include "third_party/absl/container/flat_hash_map.h" |
| +#include "absl/container/flat_hash_map.h" |
| #include "unicode_script.h" |
| #include "unicode_script_map.h" |
| #include "util.h" |
| |
| |
| |
| |
| @@ -14,7 +14,7 @@ |
| |
| #ifndef UNICODE_SCRIPT_DATA_H_ |
| #define UNICODE_SCRIPT_DATA_H_ |
| -#include "third_party/absl/container/flat_hash_map.h" |
| +#include "absl/container/flat_hash_map.h" |
| namespace sentencepiece { |
| namespace unicode_script { |
| namespace { |
| |
| |
| |
| |
| @@ -14,7 +14,7 @@ |
| |
| #include "common.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| #include "unicode_script.h" |
| #include "util.h" |
| |
| |
| |
| |
| |
| @@ -22,9 +22,9 @@ |
| #include <utility> |
| #include <vector> |
| |
| -#include "third_party/absl/memory/memory.h" |
| -#include "third_party/absl/strings/str_split.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/strings/str_split.h" |
| +#include "absl/strings/string_view.h" |
| #include "unigram_model.h" |
| #include "util.h" |
| |
| |
| |
| |
| |
| @@ -24,7 +24,7 @@ |
| #include "freelist.h" |
| #include "model_interface.h" |
| #include "sentencepiece_model.pb.h" |
| -#include "third_party/darts_clone/darts.h" |
| +#include "include/darts.h" |
| |
| namespace sentencepiece { |
| namespace unigram { |
| |
| |
| |
| |
| @@ -22,8 +22,8 @@ |
| #include "sentencepiece_model.pb.h" |
| #include "sentencepiece_processor.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| |
| |
| |
| |
| @@ -25,8 +25,9 @@ |
| #include "normalizer.h" |
| #include "pretokenizer_for_training.h" |
| #include "sentencepiece_trainer.h" |
| -#include "third_party/absl/container/flat_hash_map.h" |
| -#include "third_party/absl/memory/memory.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/memory/memory.h" |
| +#include "absl/status/status.h" |
| #include "third_party/esaxx/esa.hxx" // Suffix array library. |
| #include "unicode_script.h" |
| #include "unigram_model_trainer.h" |
| @@ -463,7 +464,7 @@ TrainerModel::SentencePieces Trainer::FinalizeSentencePieces( |
| return Sorted(final_sentencepieces); |
| } |
| |
| -util::Status Trainer::Train() { |
| +absl::Status Trainer::Train() { |
| RETURN_IF_ERROR(status()); |
| |
| CHECK_EQ_OR_RETURN(TrainerSpec::UNIGRAM, trainer_spec_.model_type()); |
| |
| |
| |
| |
| @@ -21,7 +21,8 @@ |
| #include <vector> |
| |
| #include "sentencepiece_model.pb.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| #include "trainer_interface.h" |
| #include "unigram_model.h" |
| #include "util.h" |
| @@ -68,7 +69,7 @@ class Trainer : public TrainerInterface { |
| : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, |
| denormalizer_spec) {} |
| |
| - util::Status Train() override; |
| + absl::Status Train() override; |
| |
| private: |
| FRIEND_TEST(TrainerTest, IsValidSentencePieceTest); |
| |
| |
| |
| |
| @@ -16,8 +16,8 @@ |
| #include "sentencepiece_processor.h" |
| #include "sentencepiece_trainer.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| #include "unigram_model_trainer.h" |
| #include "util.h" |
| |
| |
| |
| |
| |
| @@ -30,7 +30,8 @@ |
| |
| #include "common.h" |
| #include "sentencepiece_processor.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| |
| #ifdef SPM_NO_THREADLOCAL |
| #include <pthread.h> |
| @@ -359,14 +360,14 @@ std::string StrError(int errnum); |
| |
| std::vector<std::string> StrSplitAsCSV(absl::string_view text); |
| |
| -inline Status OkStatus() { return Status(); } |
| +inline absl::Status OkStatus() { return absl::Status(); } |
| |
| #define DECLARE_ERROR(FUNC) \ |
| - inline util::Status FUNC##Error(absl::string_view str) { \ |
| - return util::Status(StatusCode::k##FUNC, str.data()); \ |
| + inline absl::Status FUNC##Error(absl::string_view str) { \ |
| + return absl::Status(absl::StatusCode::k##FUNC, str.data()); \ |
| } \ |
| - inline bool Is##FUNC(const util::Status &status) { \ |
| - return status.code() == StatusCode::k##FUNC; \ |
| + inline bool Is##FUNC(const absl::Status &status) { \ |
| + return status.code() ==absl::StatusCode::k##FUNC; \ |
| } |
| |
| DECLARE_ERROR(Cancelled) |
| @@ -390,8 +391,8 @@ DECLARE_ERROR(Unauthenticated) |
| |
| class StatusBuilder { |
| public: |
| - explicit StatusBuilder(StatusCode code) : code_(code) {} |
| - explicit StatusBuilder(StatusCode code, int loc) : code_(code) {} |
| + explicit StatusBuilder(absl::StatusCode code) : code_(code) {} |
| + explicit StatusBuilder(absl::StatusCode code, int loc) : code_(code) {} |
| |
| template <typename T> |
| StatusBuilder &operator<<(const T &value) { |
| @@ -399,10 +400,10 @@ class StatusBuilder { |
| return *this; |
| } |
| |
| - operator Status() const { return Status(code_, os_.str()); } |
| + operator absl::Status() const { return absl::Status(code_, os_.str()); } |
| |
| private: |
| - StatusCode code_; |
| + absl::StatusCode code_; |
| std::ostringstream os_; |
| }; |
| |
| @@ -410,7 +411,7 @@ class StatusBuilder { |
| if (condition) { \ |
| } else /* NOLINT */ \ |
| return ::sentencepiece::util::StatusBuilder( \ |
| - ::sentencepiece::util::StatusCode::kInternal) \ |
| + ::absl::StatusCode::kInternal) \ |
| << __FILE__ << "(" << __LINE__ << ") [" << #condition << "] " |
| |
| #define CHECK_EQ_OR_RETURN(a, b) CHECK_OR_RETURN((a) == (b)) |
| |
| |
| |
| |
| @@ -16,7 +16,8 @@ |
| |
| #include "filesystem.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| |
| namespace sentencepiece { |
| @@ -376,27 +377,27 @@ TEST(UtilTest, STLDeleteELementsTest) { |
| } |
| |
| TEST(UtilTest, StatusTest) { |
| - const util::Status ok; |
| + const absl::Status ok; |
| EXPECT_TRUE(ok.ok()); |
| - EXPECT_EQ(util::StatusCode::kOk, ok.code()); |
| + EXPECT_EQ(absl::StatusCode::kOk, ok.code()); |
| EXPECT_EQ(std::string(""), ok.message()); |
| |
| - const util::Status s1(util::StatusCode::kUnknown, "unknown"); |
| - const util::Status s2(util::StatusCode::kUnknown, std::string("unknown")); |
| + const absl::Status s1(absl::StatusCode::kUnknown, "unknown"); |
| + const absl::Status s2(absl::StatusCode::kUnknown, std::string("unknown")); |
| |
| - EXPECT_EQ(util::StatusCode::kUnknown, s1.code()); |
| - EXPECT_EQ(util::StatusCode::kUnknown, s2.code()); |
| + EXPECT_EQ(absl::StatusCode::kUnknown, s1.code()); |
| + EXPECT_EQ(absl::StatusCode::kUnknown, s2.code()); |
| EXPECT_EQ(std::string("unknown"), s1.message()); |
| EXPECT_EQ(std::string("unknown"), s2.message()); |
| |
| auto ok2 = util::OkStatus(); |
| EXPECT_TRUE(ok2.ok()); |
| - EXPECT_EQ(util::StatusCode::kOk, ok2.code()); |
| + EXPECT_EQ(absl::StatusCode::kOk, ok2.code()); |
| EXPECT_EQ(std::string(""), ok2.message()); |
| |
| util::OkStatus().IgnoreError(); |
| for (int i = 1; i <= 16; ++i) { |
| - util::Status s(static_cast<util::StatusCode>(i), "message"); |
| + absl::Status s(static_cast<absl::StatusCode>(i), "message"); |
| EXPECT_TRUE(s.ToString().find("message") != std::string::npos) |
| << s.ToString(); |
| } |
| |
| |
| |
| |
| @@ -15,8 +15,9 @@ |
| #include <cmath> |
| #include <string> |
| |
| -#include "third_party/absl/container/flat_hash_map.h" |
| -#include "third_party/absl/strings/string_view.h" |
| +#include "absl/container/flat_hash_map.h" |
| +#include "absl/strings/string_view.h" |
| +#include "absl/status/status.h" |
| #include "util.h" |
| #include "word_model.h" |
| #include "word_model_trainer.h" |
| @@ -24,7 +25,7 @@ |
| namespace sentencepiece { |
| namespace word { |
| |
| -util::Status Trainer::Train() { |
| +absl::Status Trainer::Train() { |
| RETURN_IF_ERROR(status()); |
| |
| CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); |
| |
| |
| |
| |
| @@ -17,6 +17,7 @@ |
| |
| #include "sentencepiece_model.pb.h" |
| #include "trainer_interface.h" |
| +#include "absl/status/status.h" |
| |
| namespace sentencepiece { |
| namespace word { |
| @@ -34,7 +35,7 @@ class Trainer : public TrainerInterface { |
| : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, |
| denormalizer_spec) {} |
| |
| - util::Status Train() override; |
| + absl::Status Train() override; |
| }; |
| } // namespace word |
| } // namespace sentencepiece |
| |
| |
| |
| |
| @@ -18,8 +18,8 @@ |
| #include "filesystem.h" |
| #include "sentencepiece_processor.h" |
| #include "testharness.h" |
| -#include "third_party/absl/strings/str_cat.h" |
| -#include "third_party/absl/strings/str_join.h" |
| +#include "absl/strings/str_cat.h" |
| +#include "absl/strings/str_join.h" |
| #include "util.h" |
| #include "word_model_trainer.h" |
| |