| |
| |
| |
| #pragma once |
| #define PROVIDES_RANKED_SAMPLING 0 |
|
|
| #include <boost/thread.hpp> |
| #include <boost/scoped_ptr.hpp> |
| #include <boost/intrusive_ptr.hpp> |
|
|
| #include "moses/TypeDef.h" |
| #include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h" |
| #include "moses/TranslationModel/UG/generic/sampling/Sampling.h" |
| #include "moses/TranslationModel/UG/generic/file_io/ug_stream.h" |
| #include "moses/TranslationModel/UG/generic/threading/ug_thread_pool.h" |
|
|
| #include "moses/TranslationModel/UG/mm/ug_mm_ttrack.h" |
| #include "moses/TranslationModel/UG/mm/ug_mm_tsa.h" |
| #include "moses/TranslationModel/UG/mm/tpt_tokenindex.h" |
| #include "moses/TranslationModel/UG/mm/ug_corpus_token.h" |
| #include "moses/TranslationModel/UG/mm/ug_typedefs.h" |
| #include "moses/TranslationModel/UG/mm/tpt_pickler.h" |
| #include "moses/TranslationModel/UG/mm/ug_bitext.h" |
| #include "moses/TranslationModel/UG/mm/ug_bitext_sampler.h" |
| #include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h" |
|
|
| #include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h" |
|
|
| #ifndef NO_MOSES |
| #include "moses/FF/LexicalReordering/LexicalReordering.h" |
| #endif |
|
|
| #include "moses/InputFileStream.h" |
| #include "moses/FactorTypeSet.h" |
| #include "moses/TargetPhrase.h" |
| #include <boost/dynamic_bitset.hpp> |
| #include "moses/TargetPhraseCollection.h" |
| #include "util/usage.hh" |
| #include <map> |
|
|
| #include "moses/TranslationModel/PhraseDictionary.h" |
| #include "sapt_phrase_scorers.h" |
|
|
| |
| |
| |
| |
| |
|
|
| namespace Moses |
| { |
| class Mmsapt |
| #ifndef NO_MOSES |
| : public PhraseDictionary |
| #endif |
| { |
| class TPCOllCache; |
| friend class Alignment; |
| std::map<std::string,std::string> param; |
| std::string m_name; |
| #ifndef NO_MOSES |
| |
| friend class PhraseDictionaryGroup; |
| #endif |
| public: |
| typedef sapt::L2R_Token<sapt::SimpleWordId> Token; |
| typedef sapt::mmBitext<Token> mmbitext; |
| typedef sapt::imBitext<Token> imbitext; |
| typedef sapt::Bitext<Token> bitext; |
| typedef sapt::TSA<Token> tsa; |
| typedef sapt::PhraseScorer<Token> pscorer; |
| private: |
| |
| SPTR<mmbitext> btfix; |
| SPTR<imbitext> btdyn; |
| std::string m_bname, m_extra_data, m_bias_file,m_bias_server; |
| std::string L1; |
| std::string L2; |
| float m_lbop_conf; |
| float m_lex_alpha; |
| |
| |
| size_t m_default_sample_size; |
| size_t m_min_sample_size; |
| size_t m_workers; |
| std::vector<std::string> m_feature_set_names; |
| std::string m_bias_logfile; |
| boost::scoped_ptr<std::ofstream> m_bias_logger; |
| std::ostream* m_bias_log; |
| int m_bias_loglevel; |
| #ifndef NO_MOSES |
| LexicalReordering* m_lr_func; |
| #endif |
| std::string m_lr_func_name; |
| sapt::sampling_method m_sampling_method; |
| boost::scoped_ptr<ug::ThreadPool> m_thread_pool; |
| public: |
| void* const bias_key; |
| void* const cache_key; |
| void* const context_key; |
| private: |
| boost::shared_ptr<sapt::SamplingBias> m_bias; |
| boost::shared_ptr<TPCollCache> m_cache; |
| size_t m_cache_size; |
| |
| |
|
|
| |
| |
|
|
|
|
| |
| std::vector<std::string> m_feature_names; |
| std::vector<bool> m_is_logval; |
| std::vector<bool> m_is_integer; |
|
|
| std::vector<SPTR<pscorer > > m_active_ff_fix; |
| std::vector<SPTR<pscorer > > m_active_ff_dyn; |
| std::vector<SPTR<pscorer > > m_active_ff_common; |
| |
|
|
| bool m_track_coord; |
| |
| |
| std::vector<std::vector<SPTR<std::vector<float> > > > m_sid_coord_list; |
| std::vector<size_t> m_coord_spaces; |
|
|
| void |
| parse_factor_spec(std::vector<FactorType>& flist, std::string const key); |
|
|
| void |
| register_ff(SPTR<pscorer> const& ff, std::vector<SPTR<pscorer> > & registry); |
|
|
| template<typename fftype> |
| void |
| check_ff(std::string const ffname,std::vector<SPTR<pscorer> >* registry = NULL); |
| |
|
|
| template<typename fftype> |
| void |
| check_ff(std::string const ffname, float const xtra, |
| std::vector<SPTR<pscorer> >* registry = NULL); |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| void init(std::string const& line); |
| mutable boost::shared_mutex m_lock; |
| |
| |
| bool withPbwd; |
| bool poolCounts; |
| std::vector<FactorType> m_ifactor, m_ofactor; |
|
|
| void setup_local_feature_functions(); |
| void setup_bias(ttasksptr const& ttask); |
|
|
| #if PROVIDES_RANKED_SAMPLING |
| void |
| set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt); |
| #endif |
| private: |
|
|
| void read_config_file(std::string fname, std::map<std::string,std::string>& param); |
|
|
| |
| std::vector<float> feature_weights; |
|
|
| std::vector<std::vector<tpt::id_type> > wlex21; |
| |
| typedef sapt::mm2dTable<tpt::id_type,tpt::id_type,uint32_t,uint32_t> mm2dtable_t; |
| mm2dtable_t COOCraw; |
|
|
| TargetPhrase* |
| mkTPhrase(ttasksptr const& ttask, |
| Phrase const& src, |
| sapt::PhrasePair<Token>* fix, |
| sapt::PhrasePair<Token>* dyn, |
| SPTR<sapt::Bitext<Token> > const& dynbt) const; |
|
|
| void |
| process_pstats |
| (Phrase const& src, |
| uint64_t const pid1, |
| sapt::pstats const& stats, |
| sapt::Bitext<Token> const & bt, |
| TargetPhraseCollection::shared_ptr tpcoll |
| ) const; |
|
|
| bool |
| pool_pstats |
| (Phrase const& src, |
| uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta, |
| uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, |
| TargetPhraseCollection::shared_ptr tpcoll) const; |
|
|
| bool |
| combine_pstats |
| (Phrase const& src, |
| uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta, |
| uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, |
| TargetPhraseCollection::shared_ptr tpcoll) const; |
|
|
| void load_extra_data(std::string bname, bool locking); |
| void load_bias(std::string bname); |
|
|
| public: |
| |
| Mmsapt(std::string const& line); |
|
|
| void Load(AllOptions::ptr const& opts); |
| void Load(AllOptions::ptr const& opts, bool with_checks); |
| size_t SetTableLimit(size_t limit); |
| std::string const& GetName() const; |
|
|
| #ifndef NO_MOSES |
| TargetPhraseCollection::shared_ptr |
| GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const; |
|
|
| |
| |
|
|
| void |
| GetTargetPhraseCollectionBatch |
| (ttasksptr const& ttask, InputPathList const& inputPathQueue) const; |
|
|
| |
| ChartRuleLookupManager* |
| CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &); |
|
|
| ChartRuleLookupManager* |
| CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &, |
| std::size_t); |
| #endif |
|
|
| void add(std::string const& s1, std::string const& s2, std::string const& a); |
| |
|
|
| void setWeights(std::vector<float> const& w); |
|
|
|
|
| |
| |
| |
|
|
|
|
| bool ProvidesPrefixCheck() const; |
| |
| bool PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const; |
|
|
| bool isLogVal(int i) const; |
| bool isInteger(int i) const; |
|
|
| |
| void InitializeForInput(ttasksptr const& ttask); |
| |
| void CleanUpAfterSentenceProcessing(ttasksptr const& ttask); |
|
|
| |
| SPTR<std::vector<int> > |
| align(std::string const& src, std::string const& trg) const; |
|
|
| std::vector<std::string> const& |
| GetFeatureNames() const; |
|
|
| SPTR<sapt::DocumentBias> |
| setupDocumentBias(std::map<std::string,float> const& bias) const; |
|
|
| std::vector<float> DefaultWeights() const; |
| }; |
| } |
|
|
|
|