| | |
| | |
| | |
| | #pragma once |
| | #define PROVIDES_RANKED_SAMPLING 0 |
| |
|
| | #include <boost/thread.hpp> |
| | #include <boost/scoped_ptr.hpp> |
| | #include <boost/intrusive_ptr.hpp> |
| |
|
| | #include "moses/TypeDef.h" |
| | #include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h" |
| | #include "moses/TranslationModel/UG/generic/sampling/Sampling.h" |
| | #include "moses/TranslationModel/UG/generic/file_io/ug_stream.h" |
| | #include "moses/TranslationModel/UG/generic/threading/ug_thread_pool.h" |
| |
|
| | #include "moses/TranslationModel/UG/mm/ug_mm_ttrack.h" |
| | #include "moses/TranslationModel/UG/mm/ug_mm_tsa.h" |
| | #include "moses/TranslationModel/UG/mm/tpt_tokenindex.h" |
| | #include "moses/TranslationModel/UG/mm/ug_corpus_token.h" |
| | #include "moses/TranslationModel/UG/mm/ug_typedefs.h" |
| | #include "moses/TranslationModel/UG/mm/tpt_pickler.h" |
| | #include "moses/TranslationModel/UG/mm/ug_bitext.h" |
| | #include "moses/TranslationModel/UG/mm/ug_bitext_sampler.h" |
| | #include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h" |
| |
|
| | #include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h" |
| |
|
| | #ifndef NO_MOSES |
| | #include "moses/FF/LexicalReordering/LexicalReordering.h" |
| | #endif |
| |
|
| | #include "moses/InputFileStream.h" |
| | #include "moses/FactorTypeSet.h" |
| | #include "moses/TargetPhrase.h" |
| | #include <boost/dynamic_bitset.hpp> |
| | #include "moses/TargetPhraseCollection.h" |
| | #include "util/usage.hh" |
| | #include <map> |
| |
|
| | #include "moses/TranslationModel/PhraseDictionary.h" |
| | #include "sapt_phrase_scorers.h" |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | namespace Moses |
| | { |
| | class Mmsapt |
| | #ifndef NO_MOSES |
| | : public PhraseDictionary |
| | #endif |
| | { |
| | class TPCOllCache; |
| | friend class Alignment; |
| | std::map<std::string,std::string> param; |
| | std::string m_name; |
| | #ifndef NO_MOSES |
| | |
| | friend class PhraseDictionaryGroup; |
| | #endif |
| | public: |
| | typedef sapt::L2R_Token<sapt::SimpleWordId> Token; |
| | typedef sapt::mmBitext<Token> mmbitext; |
| | typedef sapt::imBitext<Token> imbitext; |
| | typedef sapt::Bitext<Token> bitext; |
| | typedef sapt::TSA<Token> tsa; |
| | typedef sapt::PhraseScorer<Token> pscorer; |
| | private: |
| | |
| | SPTR<mmbitext> btfix; |
| | SPTR<imbitext> btdyn; |
| | std::string m_bname, m_extra_data, m_bias_file,m_bias_server; |
| | std::string L1; |
| | std::string L2; |
| | float m_lbop_conf; |
| | float m_lex_alpha; |
| | |
| | |
| | size_t m_default_sample_size; |
| | size_t m_min_sample_size; |
| | size_t m_workers; |
| | std::vector<std::string> m_feature_set_names; |
| | std::string m_bias_logfile; |
| | boost::scoped_ptr<std::ofstream> m_bias_logger; |
| | std::ostream* m_bias_log; |
| | int m_bias_loglevel; |
| | #ifndef NO_MOSES |
| | LexicalReordering* m_lr_func; |
| | #endif |
| | std::string m_lr_func_name; |
| | sapt::sampling_method m_sampling_method; |
| | boost::scoped_ptr<ug::ThreadPool> m_thread_pool; |
| | public: |
| | void* const bias_key; |
| | void* const cache_key; |
| | void* const context_key; |
| | private: |
| | boost::shared_ptr<sapt::SamplingBias> m_bias; |
| | boost::shared_ptr<TPCollCache> m_cache; |
| | size_t m_cache_size; |
| | |
| | |
| |
|
| | |
| | |
| |
|
| |
|
| | |
| | std::vector<std::string> m_feature_names; |
| | std::vector<bool> m_is_logval; |
| | std::vector<bool> m_is_integer; |
| |
|
| | std::vector<SPTR<pscorer > > m_active_ff_fix; |
| | std::vector<SPTR<pscorer > > m_active_ff_dyn; |
| | std::vector<SPTR<pscorer > > m_active_ff_common; |
| | |
| |
|
| | bool m_track_coord; |
| | |
| | |
| | std::vector<std::vector<SPTR<std::vector<float> > > > m_sid_coord_list; |
| | std::vector<size_t> m_coord_spaces; |
| |
|
| | void |
| | parse_factor_spec(std::vector<FactorType>& flist, std::string const key); |
| |
|
| | void |
| | register_ff(SPTR<pscorer> const& ff, std::vector<SPTR<pscorer> > & registry); |
| |
|
| | template<typename fftype> |
| | void |
| | check_ff(std::string const ffname,std::vector<SPTR<pscorer> >* registry = NULL); |
| | |
| |
|
| | template<typename fftype> |
| | void |
| | check_ff(std::string const ffname, float const xtra, |
| | std::vector<SPTR<pscorer> >* registry = NULL); |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void init(std::string const& line); |
| | mutable boost::shared_mutex m_lock; |
| | |
| | |
| | bool withPbwd; |
| | bool poolCounts; |
| | std::vector<FactorType> m_ifactor, m_ofactor; |
| |
|
| | void setup_local_feature_functions(); |
| | void setup_bias(ttasksptr const& ttask); |
| |
|
| | #if PROVIDES_RANKED_SAMPLING |
| | void |
| | set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt); |
| | #endif |
| | private: |
| |
|
| | void read_config_file(std::string fname, std::map<std::string,std::string>& param); |
| |
|
| | |
| | std::vector<float> feature_weights; |
| |
|
| | std::vector<std::vector<tpt::id_type> > wlex21; |
| | |
| | typedef sapt::mm2dTable<tpt::id_type,tpt::id_type,uint32_t,uint32_t> mm2dtable_t; |
| | mm2dtable_t COOCraw; |
| |
|
| | TargetPhrase* |
| | mkTPhrase(ttasksptr const& ttask, |
| | Phrase const& src, |
| | sapt::PhrasePair<Token>* fix, |
| | sapt::PhrasePair<Token>* dyn, |
| | SPTR<sapt::Bitext<Token> > const& dynbt) const; |
| |
|
| | void |
| | process_pstats |
| | (Phrase const& src, |
| | uint64_t const pid1, |
| | sapt::pstats const& stats, |
| | sapt::Bitext<Token> const & bt, |
| | TargetPhraseCollection::shared_ptr tpcoll |
| | ) const; |
| |
|
| | bool |
| | pool_pstats |
| | (Phrase const& src, |
| | uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta, |
| | uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, |
| | TargetPhraseCollection::shared_ptr tpcoll) const; |
| |
|
| | bool |
| | combine_pstats |
| | (Phrase const& src, |
| | uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta, |
| | uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, |
| | TargetPhraseCollection::shared_ptr tpcoll) const; |
| |
|
| | void load_extra_data(std::string bname, bool locking); |
| | void load_bias(std::string bname); |
| |
|
| | public: |
| | |
| | Mmsapt(std::string const& line); |
| |
|
| | void Load(AllOptions::ptr const& opts); |
| | void Load(AllOptions::ptr const& opts, bool with_checks); |
| | size_t SetTableLimit(size_t limit); |
| | std::string const& GetName() const; |
| |
|
| | #ifndef NO_MOSES |
| | TargetPhraseCollection::shared_ptr |
| | GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const; |
| |
|
| | |
| | |
| |
|
| | void |
| | GetTargetPhraseCollectionBatch |
| | (ttasksptr const& ttask, InputPathList const& inputPathQueue) const; |
| |
|
| | |
| | ChartRuleLookupManager* |
| | CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &); |
| |
|
| | ChartRuleLookupManager* |
| | CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &, |
| | std::size_t); |
| | #endif |
| |
|
| | void add(std::string const& s1, std::string const& s2, std::string const& a); |
| | |
| |
|
| | void setWeights(std::vector<float> const& w); |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | bool ProvidesPrefixCheck() const; |
| | |
| | bool PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const; |
| |
|
| | bool isLogVal(int i) const; |
| | bool isInteger(int i) const; |
| |
|
| | |
| | void InitializeForInput(ttasksptr const& ttask); |
| | |
| | void CleanUpAfterSentenceProcessing(ttasksptr const& ttask); |
| |
|
| | |
| | SPTR<std::vector<int> > |
| | align(std::string const& src, std::string const& trg) const; |
| |
|
| | std::vector<std::string> const& |
| | GetFeatureNames() const; |
| |
|
| | SPTR<sapt::DocumentBias> |
| | setupDocumentBias(std::map<std::string,float> const& bias) const; |
| |
|
| | std::vector<float> DefaultWeights() const; |
| | }; |
| | } |
| |
|
| |
|