| #pragma once |
|
|
| #include <boost/unordered_set.hpp> |
| #include <boost/unordered_map.hpp> |
| #include <cstdio> |
| #include <sstream> |
| #include <fstream> |
| #include <iostream> |
| #include <string> |
| #include <queue> |
| #include <sys/stat.h> |
|
|
| #include "hash.h" |
| #include "probing_hash_utils.h" |
| #include "vocabid.h" |
|
|
| #include "util/file_piece.hh" |
| #include "util/file.hh" |
|
|
| namespace probingpt |
| { |
| typedef std::vector<uint64_t> SourcePhrase; |
|
|
|
|
| class Node |
| { |
| typedef boost::unordered_map<uint64_t, Node> Children; |
| Children m_children; |
|
|
| public: |
| uint64_t key; |
| bool done; |
|
|
| Node() |
| :done(false) |
| {} |
|
|
| void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0); |
| void Write(Table &table); |
| }; |
|
|
|
|
| void createProbingPT(const std::string &phrasetable_path, |
| const std::string &basepath, int num_scores, int num_lex_scores, |
| bool log_prob, int max_cache_size, bool scfg); |
| uint64_t getKey(const std::vector<uint64_t> &source_phrase); |
|
|
| std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos); |
|
|
| template<typename T> |
| std::string Debug(const std::vector<T> &vec) |
| { |
| std::stringstream strm; |
| for (size_t i = 0; i < vec.size(); ++i) { |
| strm << vec[i] << " "; |
| } |
| return strm.str(); |
| } |
|
|
| size_t countUniqueSource(const std::string &path); |
|
|
| class CacheItem |
| { |
| public: |
| std::string source; |
| uint64_t sourceKey; |
| float count; |
| CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount) |
| :source(vSource) |
| ,sourceKey(vSourceKey) |
| ,count(vCount) { |
| } |
|
|
| bool operator<(const CacheItem &other) const { |
| return count > other.count; |
| } |
| }; |
|
|
| class CacheItemOrderer |
| { |
| public: |
| bool operator()(const CacheItem* a, const CacheItem* b) const { |
| return (*a) < (*b); |
| } |
| }; |
|
|
| void serialize_cache( |
| std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache, |
| const std::string &path, float totalSourceCount); |
|
|
| } |
|
|
|
|