| #ifndef BLUESCOREFEATURE_H |
| #define BLUESCOREFEATURE_H |
|
|
| #include <utility> |
| #include <string> |
| #include <vector> |
|
|
| #include <boost/unordered_map.hpp> |
|
|
| #include "StatefulFeatureFunction.h" |
|
|
| #include "moses/FF/FFState.h" |
| #include "moses/Phrase.h" |
| #include "moses/ChartHypothesis.h" |
|
|
| namespace Moses |
| { |
|
|
| class BleuScoreFeature; |
|
|
| class BleuScoreState : public FFState |
| { |
| public: |
| friend class BleuScoreFeature; |
| static size_t bleu_order; |
|
|
| BleuScoreState(bool is_syntax); |
| size_t hash() const; |
| virtual bool operator==(const FFState& other) const; |
|
|
| void print(std::ostream& out) const; |
|
|
| private: |
| Phrase m_words; |
| size_t m_source_length; |
| size_t m_target_length; |
| bool m_is_syntax; |
| |
| float m_scaled_ref_length; |
|
|
| std::vector< size_t > m_ngram_counts; |
| std::vector< size_t > m_ngram_matches; |
|
|
| void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches); |
| }; |
|
|
|
|
| std::ostream& operator<<(std::ostream& out, const BleuScoreState& state); |
|
|
| typedef boost::unordered_map< Phrase, size_t > NGrams; |
|
|
| class RefValue : public std::pair<std::vector<size_t>,NGrams> |
| { |
| public: |
| RefValue& operator=( const RefValue& rhs ) { |
| first = rhs.first; |
| second = rhs.second; |
| return *this; |
| } |
| }; |
|
|
|
|
| class BleuScoreFeature : public StatefulFeatureFunction |
| { |
| public: |
| static const std::vector<BleuScoreFeature*>& GetColl() { |
| return s_staticColl; |
| } |
|
|
| typedef boost::unordered_map<size_t, RefValue > RefCounts; |
| typedef boost::unordered_map<size_t, NGrams> Matches; |
|
|
| BleuScoreFeature(const std::string &line); |
|
|
| void SetParameter(const std::string& key, const std::string& value); |
|
|
| std::vector<float> DefaultWeights() const; |
|
|
| void PrintHistory(std::ostream& out) const; |
| void LoadReferences(const std::vector< std::vector< std::string > > &); |
| void SetCurrSourceLength(size_t); |
| void SetCurrNormSourceLength(size_t); |
| void SetCurrShortestRefLength(size_t); |
| void SetCurrAvgRefLength(size_t sent_id); |
| void SetAvgInputLength (float l) { |
| m_avg_input_length = l; |
| } |
| void SetCurrReferenceNgrams(size_t sent_id); |
| size_t GetShortestRefIndex(size_t ref_id); |
| size_t GetClosestRefLength(size_t ref_id, int hypoLength); |
| void UpdateHistory(const std::vector< const Word* >&); |
| void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch); |
| void PrintRefLength(const std::vector<size_t>& ref_ids); |
| void SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength, |
| bool scaleByInverseLength, bool scaleByAvgInverseLength, |
| float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu); |
|
|
| void GetNgramMatchCounts(Phrase&, |
| const NGrams&, |
| std::vector< size_t >&, |
| std::vector< size_t >&, |
| size_t skip = 0) const; |
| void GetNgramMatchCounts_prefix(Phrase&, |
| const NGrams&, |
| std::vector< size_t >&, |
| std::vector< size_t >&, |
| size_t new_start_indices, |
| size_t last_end_index) const; |
| void GetNgramMatchCounts_overlap(Phrase& phrase, |
| const NGrams& ref_ngram_counts, |
| std::vector< size_t >& ret_counts, |
| std::vector< size_t >& ret_matches, |
| size_t overlap_index) const; |
| void GetClippedNgramMatchesAndCounts(Phrase&, |
| const NGrams&, |
| std::vector< size_t >&, |
| std::vector< size_t >&, |
| size_t skip = 0) const; |
|
|
| FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, |
| const FFState* prev_state, |
| ScoreComponentCollection* accumulator) const; |
| FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, |
| int featureID, |
| ScoreComponentCollection* accumulator) const; |
|
|
| bool Enabled() const { |
| return m_enabled; |
| } |
|
|
| bool IsUseable(const FactorMask &mask) const; |
|
|
| float CalculateBleu(BleuScoreState*) const; |
| float CalculateBleu(Phrase translation) const; |
| const FFState* EmptyHypothesisState(const InputType&) const; |
|
|
| float GetSourceLengthHistory() { |
| return m_source_length_history; |
| } |
| float GetTargetLengthHistory() { |
| return m_target_length_history; |
| } |
| float GetAverageInputLength() { |
| return m_avg_input_length; |
| } |
|
|
| void Load(AllOptions::ptr const& opts); |
|
|
| private: |
| static std::vector<BleuScoreFeature*> s_staticColl; |
|
|
| bool m_enabled; |
| bool m_sentence_bleu; |
| bool m_simple_history_bleu; |
| bool m_is_syntax; |
| |
| std::vector< float > m_count_history; |
| std::vector< float > m_match_history; |
| float m_source_length_history; |
| float m_target_length_history; |
| float m_ref_length_history; |
|
|
| size_t m_cur_source_length; |
| size_t m_cur_norm_source_length; |
| RefCounts m_refs; |
| NGrams m_cur_ref_ngrams; |
| float m_cur_ref_length; |
|
|
| |
| bool m_scale_by_input_length; |
| bool m_scale_by_avg_input_length; |
|
|
| |
| bool m_scale_by_inverse_length; |
| bool m_scale_by_avg_inverse_length; |
|
|
| float m_avg_input_length; |
|
|
| float m_scale_by_x; |
|
|
| |
| float m_historySmoothing; |
|
|
| enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 }; |
| SmoothingScheme m_smoothing_scheme; |
| }; |
|
|
| } |
|
|
| #endif |
|
|
|
|