| | #include <string> |
| | #include <map> |
| | #include "moses/FF/StatefulFeatureFunction.h" |
| | #include "moses/FF/FFState.h" |
| | #include "moses/FF/InternalTree.h" |
| | #include "moses/Word.h" |
| |
|
| | #include <boost/thread/tss.hpp> |
| | #include <boost/array.hpp> |
| |
|
| | #ifdef WITH_THREADS |
| | #include <boost/thread/shared_mutex.hpp> |
| | #endif |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | namespace nplm |
| | { |
| | class neuralTM; |
| | } |
| |
|
| | namespace Moses |
| | { |
| |
|
| | namespace rdlm |
| | { |
| |
|
| | |
| | |
| | |
| | class ThreadLocal |
| | { |
| | public: |
| | std::vector<int> ancestor_heads; |
| | std::vector<int> ancestor_labels; |
| | std::vector<int> ngram; |
| | std::vector<int> heads; |
| | std::vector<int> labels; |
| | std::vector<int> heads_output; |
| | std::vector<int> labels_output; |
| | std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack; |
| | nplm::neuralTM* lm_head; |
| | nplm::neuralTM* lm_label; |
| |
|
| | ThreadLocal(nplm::neuralTM *lm_head_base_instance_, nplm::neuralTM *lm_label_base_instance_, bool normalizeHeadLM, bool normalizeLabelLM, int cacheSize); |
| | ~ThreadLocal(); |
| | }; |
| | } |
| |
|
| | class RDLMState : public TreeState |
| | { |
| | float m_approx_head; |
| | float m_approx_label; |
| | size_t m_hash; |
| | public: |
| | RDLMState(TreePointer tree, float approx_head, float approx_label, size_t hash) |
| | : TreeState(tree) |
| | , m_approx_head(approx_head) |
| | , m_approx_label(approx_label) |
| | , m_hash(hash) |
| | {} |
| |
|
| | float GetApproximateScoreHead() const { |
| | return m_approx_head; |
| | } |
| |
|
| | float GetApproximateScoreLabel() const { |
| | return m_approx_label; |
| | } |
| |
|
| | size_t GetHash() const { |
| | return m_hash; |
| | } |
| |
|
| | int Compare(const FFState& other) const { |
| | if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0; |
| | else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1; |
| | else return -1; |
| | } |
| | }; |
| |
|
| | class RDLM : public StatefulFeatureFunction |
| | { |
| | typedef std::map<InternalTree*,TreePointer> TreePointerMap; |
| |
|
| | nplm::neuralTM* lm_head_base_instance_; |
| | nplm::neuralTM* lm_label_base_instance_; |
| |
|
| | mutable boost::thread_specific_ptr<rdlm::ThreadLocal> thread_objects_backend_; |
| |
|
| | std::string m_glueSymbolString; |
| | Word dummy_head; |
| | Word m_glueSymbol; |
| | Word m_startSymbol; |
| | Word m_endSymbol; |
| | Word m_endTag; |
| | std::string m_path_head_lm; |
| | std::string m_path_label_lm; |
| | bool m_isPretermBackoff; |
| | size_t m_context_left; |
| | size_t m_context_right; |
| | size_t m_context_up; |
| | bool m_premultiply; |
| | bool m_rerank; |
| | bool m_normalizeHeadLM; |
| | bool m_normalizeLabelLM; |
| | bool m_sharedVocab; |
| | std::string m_debugPath; |
| | int m_binarized; |
| | int m_cacheSize; |
| |
|
| | size_t offset_up_head; |
| | size_t offset_up_label; |
| |
|
| | size_t size_head; |
| | size_t size_label; |
| | std::vector<int> static_label_null; |
| | std::vector<int> static_head_null; |
| | int static_dummy_head; |
| | int static_start_head; |
| | int static_start_label; |
| | int static_stop_head; |
| | int static_stop_label; |
| | int static_head_head; |
| | int static_head_label; |
| | int static_root_head; |
| | int static_root_label; |
| |
|
| | int static_head_label_output; |
| | int static_stop_label_output; |
| | int static_start_label_output; |
| |
|
| | FactorType m_factorType; |
| |
|
| | static const int LABEL_INPUT = 0; |
| | static const int LABEL_OUTPUT = 1; |
| | static const int HEAD_INPUT = 2; |
| | static const int HEAD_OUTPUT = 3; |
| | mutable std::vector<int> factor2id_label_input; |
| | mutable std::vector<int> factor2id_label_output; |
| | mutable std::vector<int> factor2id_head_input; |
| | mutable std::vector<int> factor2id_head_output; |
| |
|
| | #ifdef WITH_THREADS |
| | |
| | mutable boost::shared_mutex m_accessLock; |
| | #endif |
| |
|
| | public: |
| | RDLM(const std::string &line) |
| | : StatefulFeatureFunction(2, line) |
| | , m_glueSymbolString("Q") |
| | , m_isPretermBackoff(true) |
| | , m_context_left(3) |
| | , m_context_right(0) |
| | , m_context_up(2) |
| | , m_premultiply(true) |
| | , m_rerank(false) |
| | , m_normalizeHeadLM(false) |
| | , m_normalizeLabelLM(false) |
| | , m_sharedVocab(false) |
| | , m_binarized(0) |
| | , m_cacheSize(1000000) |
| | , m_factorType(0) { |
| | ReadParameters(); |
| | std::vector<FactorType> factors; |
| | factors.push_back(0); |
| | dummy_head.CreateFromString(Output, factors, "<dummy_head>", false); |
| | m_glueSymbol.CreateFromString(Output, factors, m_glueSymbolString, true); |
| | m_startSymbol.CreateFromString(Output, factors, "SSTART", true); |
| | m_endSymbol.CreateFromString(Output, factors, "SEND", true); |
| | m_endTag.CreateFromString(Output, factors, "</s>", false); |
| | } |
| |
|
| | ~RDLM(); |
| |
|
| | virtual const FFState* EmptyHypothesisState(const InputType &input) const { |
| | return new RDLMState(TreePointer(), 0, 0, 0); |
| | } |
| |
|
| | void Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float,4> &score, size_t &boundary_hash, rdlm::ThreadLocal &thread_objects, int num_virtual = 0, int rescoring_levels = 0) const; |
| | bool GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs) const; |
| | void GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, rdlm::ThreadLocal &thread_objects) const; |
| | void GetIDs(const Word & head, const Word & preterminal, std::pair<int,int> & IDs) const; |
| | int Factor2ID(const Factor * const factor, int model_type) const; |
| | void ScoreFile(std::string &path); |
| | void PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const; |
| |
|
| | TreePointerMap AssociateLeafNTs(InternalTree* root, const std::vector<TreePointer> &previous) const; |
| |
|
| | bool IsUseable(const FactorMask &mask) const { |
| | return true; |
| | } |
| |
|
| | void SetParameter(const std::string& key, const std::string& value); |
| |
|
| | FFState* EvaluateWhenApplied( |
| | const Hypothesis& cur_hypo, |
| | const FFState* prev_state, |
| | ScoreComponentCollection* accumulator) const { |
| | UTIL_THROW(util::Exception, "Not implemented"); |
| | }; |
| | FFState* EvaluateWhenApplied( |
| | const ChartHypothesis& , |
| | int , |
| | ScoreComponentCollection* accumulator) const; |
| |
|
| | void Load(AllOptions::ptr const& opts); |
| |
|
| | |
| | class UnbinarizedChildren |
| | { |
| | private: |
| | std::vector<TreePointer>::const_iterator iter; |
| | std::vector<TreePointer>::const_iterator _begin; |
| | bool _ended; |
| | InternalTree* current; |
| | const TreePointerMap & back_pointers; |
| | bool binarized; |
| | std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > &stack; |
| |
|
| | public: |
| | UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary, std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > & persistent_stack): |
| | current(root), |
| | back_pointers(pointers), |
| | binarized(binary), |
| | stack(persistent_stack) { |
| | stack.resize(0); |
| | _ended = current->GetChildren().empty(); |
| | iter = current->GetChildren().begin(); |
| | |
| | while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') { |
| | stack.push_back(std::make_pair(current, iter)); |
| | |
| | if ((*iter)->IsLeafNT()) { |
| | current = back_pointers.find(iter->get())->second.get(); |
| | } else { |
| | current = iter->get(); |
| | } |
| | iter = current->GetChildren().begin(); |
| | } |
| | _begin = iter; |
| | } |
| |
|
| | std::vector<TreePointer>::const_iterator begin() const { |
| | return _begin; |
| | } |
| | bool ended() const { |
| | return _ended; |
| | } |
| |
|
| | std::vector<TreePointer>::const_iterator operator++() { |
| | iter++; |
| | if (iter == current->GetChildren().end()) { |
| | while (!stack.empty()) { |
| | std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back(); |
| | current = active.first; |
| | iter = ++active.second; |
| | stack.pop_back(); |
| | if (iter != current->GetChildren().end()) { |
| | break; |
| | } |
| | } |
| | if (iter == current->GetChildren().end()) { |
| | _ended = true; |
| | return iter; |
| | } |
| | } |
| | |
| | while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') { |
| | stack.push_back(std::make_pair(current, iter)); |
| | |
| | if ((*iter)->IsLeafNT()) { |
| | current = back_pointers.find(iter->get())->second.get(); |
| | } else { |
| | current = iter->get(); |
| | } |
| | iter = current->GetChildren().begin(); |
| | } |
| | return iter; |
| | } |
| | }; |
| |
|
| | }; |
| |
|
| | } |
| |
|