File size: 5,996 Bytes
fd49381 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
#ifndef BLUESCOREFEATURE_H
#define BLUESCOREFEATURE_H
#include <utility>
#include <string>
#include <vector>
#include <boost/unordered_map.hpp>
#include "StatefulFeatureFunction.h"
#include "moses/FF/FFState.h"
#include "moses/Phrase.h"
#include "moses/ChartHypothesis.h"
namespace Moses
{
class BleuScoreFeature;
class BleuScoreState : public FFState
{
public:
friend class BleuScoreFeature;
static size_t bleu_order;
BleuScoreState(bool is_syntax);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
void print(std::ostream& out) const;
private:
Phrase m_words;
size_t m_source_length;
size_t m_target_length;
bool m_is_syntax;
// scaled reference length is needed for scoring incomplete hypotheses against reference translation
float m_scaled_ref_length;
std::vector< size_t > m_ngram_counts;
std::vector< size_t > m_ngram_matches;
void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches);
};
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
typedef boost::unordered_map< Phrase, size_t > NGrams;
class RefValue : public std::pair<std::vector<size_t>,NGrams>
{
public:
RefValue& operator=( const RefValue& rhs ) {
first = rhs.first;
second = rhs.second;
return *this;
}
};
class BleuScoreFeature : public StatefulFeatureFunction
{
public:
static const std::vector<BleuScoreFeature*>& GetColl() {
return s_staticColl;
}
typedef boost::unordered_map<size_t, RefValue > RefCounts;
typedef boost::unordered_map<size_t, NGrams> Matches;
BleuScoreFeature(const std::string &line);
void SetParameter(const std::string& key, const std::string& value);
std::vector<float> DefaultWeights() const;
void PrintHistory(std::ostream& out) const;
void LoadReferences(const std::vector< std::vector< std::string > > &);
void SetCurrSourceLength(size_t);
void SetCurrNormSourceLength(size_t);
void SetCurrShortestRefLength(size_t);
void SetCurrAvgRefLength(size_t sent_id);
void SetAvgInputLength (float l) {
m_avg_input_length = l;
}
void SetCurrReferenceNgrams(size_t sent_id);
size_t GetShortestRefIndex(size_t ref_id);
size_t GetClosestRefLength(size_t ref_id, int hypoLength);
void UpdateHistory(const std::vector< const Word* >&);
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void PrintRefLength(const std::vector<size_t>& ref_ids);
void SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
bool scaleByInverseLength, bool scaleByAvgInverseLength,
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu);
void GetNgramMatchCounts(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t skip = 0) const;
void GetNgramMatchCounts_prefix(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t new_start_indices,
size_t last_end_index) const;
void GetNgramMatchCounts_overlap(Phrase& phrase,
const NGrams& ref_ngram_counts,
std::vector< size_t >& ret_counts,
std::vector< size_t >& ret_matches,
size_t overlap_index) const;
void GetClippedNgramMatchesAndCounts(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t skip = 0) const;
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
bool Enabled() const {
return m_enabled;
}
bool IsUseable(const FactorMask &mask) const;
float CalculateBleu(BleuScoreState*) const;
float CalculateBleu(Phrase translation) const;
const FFState* EmptyHypothesisState(const InputType&) const;
float GetSourceLengthHistory() {
return m_source_length_history;
}
float GetTargetLengthHistory() {
return m_target_length_history;
}
float GetAverageInputLength() {
return m_avg_input_length;
}
void Load(AllOptions::ptr const& opts);
private:
static std::vector<BleuScoreFeature*> s_staticColl;
bool m_enabled;
bool m_sentence_bleu;
bool m_simple_history_bleu;
bool m_is_syntax;
// counts for pseudo-document
std::vector< float > m_count_history;
std::vector< float > m_match_history;
float m_source_length_history;
float m_target_length_history;
float m_ref_length_history;
size_t m_cur_source_length;
size_t m_cur_norm_source_length; // length without <s>, </s>
RefCounts m_refs;
NGrams m_cur_ref_ngrams;
float m_cur_ref_length;
// scale BLEU score by history of input length
bool m_scale_by_input_length;
bool m_scale_by_avg_input_length;
// scale by the inverse of the input length * 100
bool m_scale_by_inverse_length;
bool m_scale_by_avg_inverse_length;
float m_avg_input_length;
float m_scale_by_x;
// smoothing factor for history counts
float m_historySmoothing;
enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 };
SmoothingScheme m_smoothing_scheme;
};
} // Namespace.
#endif //BLUESCOREFEATURE_H
|