sleepyhead111 commited on Apr 20, 2025

Commit

bc7bc6f

verified ·

1 Parent(s): c1c19ef

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

mosesdecoder/moses/FF/BleuScoreFeature.cpp +892 -0
mosesdecoder/moses/FF/CountNonTerms.h +50 -0
mosesdecoder/moses/FF/DeleteRules.h +49 -0
mosesdecoder/moses/FF/Diffs.h +150 -0
mosesdecoder/moses/FF/Dsg-Feature/DsgModel.cpp +156 -0
mosesdecoder/moses/FF/ExampleStatelessFF.h +43 -0
mosesdecoder/moses/FF/GlobalLexicalModel.cpp +199 -0
mosesdecoder/moses/FF/HyperParameterAsWeight.h +55 -0
mosesdecoder/moses/FF/LexicalReordering/HReorderingBackwardState.cpp +50 -0
mosesdecoder/moses/FF/LexicalReordering/HReorderingBackwardState.h +33 -0
mosesdecoder/moses/FF/LexicalReordering/HReorderingForwardState.cpp +78 -0
mosesdecoder/moses/FF/LexicalReordering/HReorderingForwardState.h +33 -0
mosesdecoder/moses/FF/LexicalReordering/LRModel.cpp +219 -0
mosesdecoder/moses/FF/LexicalReordering/LRModel.h +133 -0
mosesdecoder/moses/FF/LexicalReordering/LRState.cpp +88 -0
mosesdecoder/moses/FF/LexicalReordering/LRState.h +81 -0
mosesdecoder/moses/FF/LexicalReordering/LexicalReordering.cpp +170 -0
mosesdecoder/moses/FF/LexicalReordering/LexicalReordering.h +106 -0
mosesdecoder/moses/FF/LexicalReordering/PhraseBasedReorderingState.cpp +72 -0
mosesdecoder/moses/FF/LexicalReordering/ReorderingStack.cpp +87 -0
mosesdecoder/moses/FF/LexicalReordering/ReorderingStack.h +40 -0
mosesdecoder/moses/FF/LexicalReordering/SparseReordering.cpp +315 -0
mosesdecoder/moses/FF/LexicalReordering/SparseReordering.h +139 -0
mosesdecoder/moses/FF/MaxSpanFreeNonTermSource.h +53 -0
mosesdecoder/moses/FF/NieceTerminal.cpp +110 -0
mosesdecoder/moses/FF/NieceTerminal.h +54 -0
mosesdecoder/moses/FF/OSM-Feature/OpSequenceModel.h +68 -0
mosesdecoder/moses/FF/PhraseBoundaryFeature.cpp +118 -0
mosesdecoder/moses/FF/PhraseLengthFeature.h +54 -0
mosesdecoder/moses/FF/PhraseOrientationFeature.h +431 -0
mosesdecoder/moses/FF/RulePairUnlexicalizedSource.cpp +90 -0
mosesdecoder/moses/FF/RulePairUnlexicalizedSource.h +61 -0
mosesdecoder/moses/FF/SetSourcePhrase.cpp +21 -0
mosesdecoder/moses/FF/SourceWordDeletionFeature.cpp +107 -0
mosesdecoder/moses/FF/StatefulFeatureFunction.h +96 -0
mosesdecoder/moses/FF/TargetNgramFeature.h +239 -0
mosesdecoder/moses/FF/UnknownWordPenaltyProducer.h +64 -0
mosesdecoder/moses/FF/VW/AlignmentConstraint.h +40 -0
mosesdecoder/moses/FF/VW/ThreadLocalByFeatureStorage.h +82 -0
mosesdecoder/moses/FF/VW/VWFeatureBase.h +160 -0
mosesdecoder/moses/FF/VW/VWFeatureContext.h +116 -0
mosesdecoder/moses/FF/VW/VWFeatureContextBilingual.h +45 -0
mosesdecoder/moses/FF/VW/VWFeatureSourceBagOfWords.h +34 -0
mosesdecoder/moses/FF/VW/VWFeatureSourceBigrams.h +34 -0
mosesdecoder/moses/FF/VW/VWFeatureSourceExternalFeatures.h +64 -0
mosesdecoder/moses/FF/VW/VWFeatureSourceIndicator.h +42 -0
mosesdecoder/moses/FF/VW/VWFeatureSourcePhraseInternal.h +39 -0
mosesdecoder/moses/FF/VW/VWFeatureSourceSenseWindow.h +141 -0
mosesdecoder/moses/FF/VW/VWFeatureTargetBigrams.h +33 -0
mosesdecoder/moses/FF/VW/VWFeatureTargetIndicator.h +31 -0

mosesdecoder/moses/FF/BleuScoreFeature.cpp ADDED Viewed

	@@ -0,0 +1,892 @@

+#include "BleuScoreFeature.h"
+#include "moses/StaticData.h"
+#include "moses/Hypothesis.h"
+#include "moses/FactorCollection.h"
+#include "util/exception.hh"
+using namespace std;
+namespace Moses
+{
+size_t BleuScoreState::bleu_order = 4;
+std::vector<BleuScoreFeature*> BleuScoreFeature::s_staticColl;
+BleuScoreState::BleuScoreState(bool is_syntax)
+  : m_words(1),
+    m_source_length(0),
+    m_target_length(0),
+    m_is_syntax(false),
+    m_scaled_ref_length(0),
+    m_ngram_counts(bleu_order),
+    m_ngram_matches(bleu_order)
+{ }
+size_t BleuScoreState::hash() const
+{
+  if (m_is_syntax)
+    return 0;
+  size_t ret = hash_value(m_words);
+  return ret;
+}
+bool BleuScoreState::operator==(const FFState& o) const
+{
+  if (&o == this)
+    return true;
+  if (m_is_syntax)
+    return true;
+  const BleuScoreState& other = static_cast<const BleuScoreState&>(o);
+  return m_words == other.m_words;
+}
+std::ostream& operator<<(std::ostream& out, const BleuScoreState& state)
+{
+  state.print(out);
+  return out;
+}
+void BleuScoreState::print(std::ostream& out) const
+{
+  out << "ref=" << m_scaled_ref_length
+      << ";source=" << m_source_length
+      << ";target=" << m_target_length << ";counts=";
+  for (size_t i = 0; i < bleu_order; ++i) {
+    out << m_ngram_matches[i] << "/" << m_ngram_counts[i] << ",";
+  }
+  out << "ctxt=" << m_words;
+}
+void BleuScoreState::AddNgramCountAndMatches(std::vector< size_t >& counts,
+    std::vector< size_t >& matches)
+{
+  for (size_t order = 0; order < BleuScoreState::bleu_order; ++order) {
+    m_ngram_counts[order] += counts[order];
+    m_ngram_matches[order] += matches[order];
+  }
+}
+BleuScoreFeature::BleuScoreFeature(const std::string &line)
+  :StatefulFeatureFunction(1, line),
+   m_enabled(true),
+   m_sentence_bleu(true),
+   m_simple_history_bleu(false),
+   m_count_history(BleuScoreState::bleu_order),
+   m_match_history(BleuScoreState::bleu_order),
+   m_source_length_history(0),
+   m_target_length_history(0),
+   m_ref_length_history(0),
+   m_scale_by_input_length(true),
+   m_scale_by_avg_input_length(false),
+   m_scale_by_inverse_length(false),
+   m_scale_by_avg_inverse_length(false),
+   m_scale_by_x(1),
+   m_historySmoothing(0.9),
+   m_smoothing_scheme(PLUS_POINT_ONE)
+{
+  std::cerr << "Initializing BleuScoreFeature." << std::endl;
+  s_staticColl.push_back(this);
+  m_tuneable = false;
+  ReadParameters();
+  std::cerr << "Finished initializing BleuScoreFeature." << std::endl;
+}
+void BleuScoreFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "references") {
+    vector<string> referenceFiles = Tokenize(value, ",");
+    UTIL_THROW_IF2(referenceFiles.size() == 0, "No reference file");
+    vector<vector<string> > references(referenceFiles.size());
+    for (size_t i =0; i < referenceFiles.size(); ++i) {
+      ifstream in(referenceFiles[i].c_str());
+      if (!in) {
+        UTIL_THROW2("Unable to load references from " << referenceFiles[i]);
+      }
+      string line;
+      while (getline(in,line)) {
+        /*  if (GetSearchAlgorithm() == CYKPlus) {
+        stringstream tmp;
+        tmp << "<s> " << line << " </s>";
+        line = tmp.str();
+        }
+        */
+        references[i].push_back(line);
+      }
+      if (i > 0) {
+        if (references[i].size() != references[i-1].size()) {
+          UTIL_THROW2("Reference files are of different lengths");
+        }
+      }
+      in.close();
+    } // for (size_t i =0; i < referenceFiles.size(); ++i) {
+    //Set the references in the bleu feature
+    LoadReferences(references);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+std::vector<float> BleuScoreFeature::DefaultWeights() const
+{
+  std::vector<float> ret(m_numScoreComponents, 1);
+  return ret;
+}
+void BleuScoreFeature::PrintHistory(std::ostream& out) const
+{
+  out << "source length history=" << m_source_length_history << endl;
+  out << "target length history=" << m_target_length_history << endl;
+  out << "ref length history=" << m_ref_length_history << endl;
+  for (size_t i = 0; i < BleuScoreState::bleu_order; ++i) {
+    out << "match history/count history (" << i << "):" << m_match_history[i] << "/" << m_count_history[i] << endl;
+  }
+}
+void BleuScoreFeature::SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
+    bool scaleByInverseLength, bool scaleByAvgInverseLength,
+    float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu)
+{
+  m_enabled = !disable;
+  m_sentence_bleu = sentenceBleu;
+  m_simple_history_bleu = simpleHistoryBleu;
+  m_scale_by_input_length = scaleByInputLength;
+  m_scale_by_avg_input_length = scaleByAvgInputLength;
+  m_scale_by_inverse_length = scaleByInverseLength;
+  m_scale_by_avg_inverse_length = scaleByAvgInverseLength;
+  m_scale_by_x = scaleByX;
+  m_historySmoothing = historySmoothing;
+  m_smoothing_scheme = (SmoothingScheme)scheme;
+}
+// Incoming references (refs) are stored as refs[file_id][[sent_id][reference]]
+// This data structure: m_refs[sent_id][[vector<length>][ngrams]]
+void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
+{
+  m_refs.clear();
+  FactorCollection& fc = FactorCollection::Instance();
+  for (size_t file_id = 0; file_id < refs.size(); file_id++) {
+    for (size_t sent_id = 0; sent_id < refs[file_id].size(); sent_id++) {
+      const string& ref = refs[file_id][sent_id];
+      vector<string> refTokens  = Tokenize(ref);
+      if (file_id == 0)
+        m_refs[sent_id] = RefValue();
+      pair<vector<size_t>,NGrams>& ref_pair = m_refs[sent_id];
+      (ref_pair.first).push_back(refTokens.size());
+      for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
+        for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
+          Phrase ngram(1);
+          for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
+            const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
+            Word w;
+            w.SetFactor(0, f);
+            ngram.AddWord(w);
+          }
+          ref_pair.second[ngram] += 1;
+        }
+      }
+    }
+  }
+//	cerr << "Number of ref files: " << refs.size() << endl;
+//	for (size_t i = 0; i < m_refs.size(); ++i) {
+//		cerr << "Sent id " << i << ", number of references: " << (m_refs[i].first).size() << endl;
+//	}
+}
+void BleuScoreFeature::SetCurrSourceLength(size_t source_length)
+{
+  m_cur_source_length = source_length;
+}
+void BleuScoreFeature::SetCurrNormSourceLength(size_t source_length)
+{
+  m_cur_norm_source_length = source_length;
+}
+// m_refs[sent_id][[vector<length>][ngrams]]
+void BleuScoreFeature::SetCurrShortestRefLength(size_t sent_id)
+{
+  // look for shortest reference
+  int shortestRef = -1;
+  for (size_t i = 0; i < (m_refs[sent_id].first).size(); ++i) {
+    if (shortestRef == -1 || (m_refs[sent_id].first)[i] < shortestRef)
+      shortestRef = (m_refs[sent_id].first)[i];
+  }
+  m_cur_ref_length = shortestRef;
+//		cerr << "Set shortest cur_ref_length: " << m_cur_ref_length << endl;
+}
+void BleuScoreFeature::SetCurrAvgRefLength(size_t sent_id)
+{
+  // compute average reference length
+  size_t sum = 0;
+  size_t numberRefs = (m_refs[sent_id].first).size();
+  for (size_t i = 0; i < numberRefs; ++i) {
+    sum += (m_refs[sent_id].first)[i];
+  }
+  m_cur_ref_length = (float)sum/numberRefs;
+//		cerr << "Set average cur_ref_length: " << m_cur_ref_length << endl;
+}
+void BleuScoreFeature::SetCurrReferenceNgrams(size_t sent_id)
+{
+  m_cur_ref_ngrams = m_refs[sent_id].second;
+}
+size_t BleuScoreFeature::GetShortestRefIndex(size_t ref_id)
+{
+  // look for shortest reference
+  int shortestRef = -1;
+  size_t shortestRefIndex = 0;
+  for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
+    if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef) {
+      shortestRef = (m_refs[ref_id].first)[i];
+      shortestRefIndex = i;
+    }
+  }
+  return shortestRefIndex;
+}
+/*
+ * Update the pseudo-document O after each translation of a source sentence.
+ * (O is an exponentially-weighted moving average of vectors c(e;{r_k}))
+ * O = m_historySmoothing * (O + c(e_oracle))
+ * O_f = m_historySmoothing * (O_f + |f|)		input length of pseudo-document
+ */
+void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo)
+{
+  Phrase phrase(hypo);
+  std::vector< size_t > ngram_counts(BleuScoreState::bleu_order);
+  std::vector< size_t > ngram_matches(BleuScoreState::bleu_order);
+  // compute vector c(e;{r_k}):
+  // vector of effective reference length, number of ngrams in e, number of ngram matches between e and r_k
+  GetNgramMatchCounts(phrase, m_cur_ref_ngrams, ngram_counts, ngram_matches, 0);
+  // update counts and matches for every ngram length with counts from hypo
+  for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+    m_count_history[i] = m_historySmoothing * (m_count_history[i] + ngram_counts[i]);
+    m_match_history[i] = m_historySmoothing * (m_match_history[i] + ngram_matches[i]);
+  }
+  // update counts for reference and target length
+  m_source_length_history = m_historySmoothing * (m_source_length_history + m_cur_source_length);
+  m_target_length_history = m_historySmoothing * (m_target_length_history + hypo.size());
+  m_ref_length_history = m_historySmoothing * (m_ref_length_history + m_cur_ref_length);
+}
+/*
+ * Update history with a batch of translations
+ */
+void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypos, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch)
+{
+  for (size_t ref_id = 0; ref_id < hypos.size(); ++ref_id) {
+    Phrase phrase(hypos[ref_id]);
+    std::vector< size_t > ngram_counts(BleuScoreState::bleu_order);
+    std::vector< size_t > ngram_matches(BleuScoreState::bleu_order);
+    // set current source and reference information for each oracle in the batch
+    size_t cur_source_length = sourceLengths[ref_id];
+    size_t hypo_length = hypos[ref_id].size();
+    size_t cur_ref_length = GetClosestRefLength(ref_ids[ref_id], hypo_length);
+    NGrams cur_ref_ngrams = m_refs[ref_ids[ref_id]].second;
+    cerr << "reference length: " << cur_ref_length << endl;
+    // compute vector c(e;{r_k}):
+    // vector of effective reference length, number of ngrams in e, number of ngram matches between e and r_k
+    GetNgramMatchCounts(phrase, cur_ref_ngrams, ngram_counts, ngram_matches, 0);
+    // update counts and matches for every ngram length with counts from hypo
+    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+      m_count_history[i] += ngram_counts[i];
+      m_match_history[i] += ngram_matches[i];
+      // do this for last position in batch
+      if (ref_id == hypos.size() - 1) {
+        m_count_history[i] *= m_historySmoothing;
+        m_match_history[i] *= m_historySmoothing;
+      }
+    }
+    // update counts for reference and target length
+    m_source_length_history += cur_source_length;
+    m_target_length_history += hypos[ref_id].size();
+    m_ref_length_history += cur_ref_length;
+    // do this for last position in batch
+    if (ref_id == hypos.size() - 1) {
+      cerr << "Rank " << rank << ", epoch " << epoch << " ,source length history: " << m_source_length_history << " --> " << m_source_length_history * m_historySmoothing << endl;
+      cerr << "Rank " << rank << ", epoch " << epoch << " ,target length history: " << m_target_length_history << " --> " << m_target_length_history * m_historySmoothing << endl;
+      m_source_length_history *= m_historySmoothing;
+      m_target_length_history *= m_historySmoothing;
+      m_ref_length_history *= m_historySmoothing;
+    }
+  }
+}
+/*
+ * Print batch of reference translations
+ */
+/*void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
+	for (size_t ref_id = 0; ref_id < ref_ids.size(); ++ref_id){
+	    size_t cur_ref_length = (m_refs[ref_ids[ref_id]].first)[0]; // TODO!!
+	    cerr << "reference length: " << cur_ref_length << endl;
+	}
+}*/
+size_t BleuScoreFeature::GetClosestRefLength(size_t ref_id, int hypoLength)
+{
+  // look for closest reference
+  int currentDist = -1;
+  int closestRefLength = -1;
+  for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
+    if (closestRefLength == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
+      closestRefLength = (m_refs[ref_id].first)[i];
+      currentDist = abs(hypoLength - (int)(m_refs[ref_id].first)[i]);
+    }
+  }
+  return (size_t)closestRefLength;
+}
+/*
+ * Given a phrase (current translation) calculate its ngram counts and
+ * its ngram matches against the ngrams in the reference translation
+ */
+void BleuScoreFeature::GetNgramMatchCounts(Phrase& phrase,
+    const NGrams& ref_ngram_counts,
+    std::vector< size_t >& ret_counts,
+    std::vector< size_t >& ret_matches,
+    size_t skip_first) const
+{
+  NGrams::const_iterator ref_ngram_counts_iter;
+  size_t ngram_start_idx, ngram_end_idx;
+  // Chiang et al (2008) use unclipped counts of ngram matches
+  for (size_t end_idx = skip_first; end_idx < phrase.GetSize(); end_idx++) {
+    for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+      if (order > end_idx) break;
+      ngram_end_idx = end_idx;
+      ngram_start_idx = end_idx - order;
+      Phrase ngram = phrase.GetSubString(Range(ngram_start_idx, ngram_end_idx), 0);
+      ret_counts[order]++;
+      ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+      if (ref_ngram_counts_iter != ref_ngram_counts.end())
+        ret_matches[order]++;
+    }
+  }
+}
+// score ngrams of words that have been added before the previous word span
+void BleuScoreFeature::GetNgramMatchCounts_prefix(Phrase& phrase,
+    const NGrams& ref_ngram_counts,
+    std::vector< size_t >& ret_counts,
+    std::vector< size_t >& ret_matches,
+    size_t new_start_indices,
+    size_t last_end_index) const
+{
+  NGrams::const_iterator ref_ngram_counts_iter;
+  size_t ngram_start_idx, ngram_end_idx;
+  // Chiang et al (2008) use unclipped counts of ngram matches
+  for (size_t start_idx = 0; start_idx < new_start_indices; start_idx++) {
+    for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+      ngram_start_idx = start_idx;
+      ngram_end_idx = start_idx + order;
+      if (order > ngram_end_idx) break;
+      if (ngram_end_idx > last_end_index) break;
+      Phrase ngram = phrase.GetSubString(Range(ngram_start_idx, ngram_end_idx), 0);
+      ret_counts[order]++;
+      ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+      if (ref_ngram_counts_iter != ref_ngram_counts.end())
+        ret_matches[order]++;
+    }
+  }
+}
+// score ngrams around the overlap of two previously scored phrases
+void BleuScoreFeature::GetNgramMatchCounts_overlap(Phrase& phrase,
+    const NGrams& ref_ngram_counts,
+    std::vector< size_t >& ret_counts,
+    std::vector< size_t >& ret_matches,
+    size_t overlap_index) const
+{
+  NGrams::const_iterator ref_ngram_counts_iter;
+  size_t ngram_start_idx, ngram_end_idx;
+  // Chiang et al (2008) use unclipped counts of ngram matches
+  for (size_t end_idx = overlap_index; end_idx < phrase.GetSize(); end_idx++) {
+    if (end_idx >= (overlap_index+BleuScoreState::bleu_order-1)) break;
+    for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+      if (order > end_idx) break;
+      ngram_end_idx = end_idx;
+      ngram_start_idx = end_idx - order;
+      if (ngram_start_idx >= overlap_index) continue; // only score ngrams that span the overlap point
+      Phrase ngram = phrase.GetSubString(Range(ngram_start_idx, ngram_end_idx), 0);
+      ret_counts[order]++;
+      ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+      if (ref_ngram_counts_iter != ref_ngram_counts.end())
+        ret_matches[order]++;
+    }
+  }
+}
+void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
+    const NGrams& ref_ngram_counts,
+    std::vector< size_t >& ret_counts,
+    std::vector< size_t >& ret_matches,
+    size_t skip_first) const
+{
+  NGrams::const_iterator ref_ngram_counts_iter;
+  size_t ngram_start_idx, ngram_end_idx;
+  Matches ngram_matches;
+  for (size_t end_idx = skip_first; end_idx < phrase.GetSize(); end_idx++) {
+    for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+      if (order > end_idx) break;
+      ngram_end_idx = end_idx;
+      ngram_start_idx = end_idx - order;
+      Phrase ngram = phrase.GetSubString(Range(ngram_start_idx, ngram_end_idx), 0);
+      ret_counts[order]++;
+      ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+      if (ref_ngram_counts_iter != ref_ngram_counts.end()) {
+        ngram_matches[order][ngram]++;
+      }
+    }
+  }
+  // clip ngram matches
+  for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+    NGrams::const_iterator iter;
+    // iterate over ngram counts for every ngram order
+    for (iter=ngram_matches[order].begin(); iter != ngram_matches[order].end(); ++iter) {
+      ref_ngram_counts_iter = ref_ngram_counts.find(iter->first);
+      if (iter->second > ref_ngram_counts_iter->second) {
+        ret_matches[order] += ref_ngram_counts_iter->second;
+      } else {
+        ret_matches[order] += iter->second;
+      }
+    }
+  }
+}
+/*
+ * Given a previous state, compute Bleu score for the updated state with an additional target
+ * phrase translated.
+ */
+FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const
+{
+  if (!m_enabled) return new BleuScoreState(m_is_syntax);
+  NGrams::const_iterator reference_ngrams_iter;
+  const BleuScoreState& ps = static_cast<const BleuScoreState&>(*prev_state);
+  BleuScoreState* new_state = new BleuScoreState(ps);
+  float old_bleu, new_bleu;
+  size_t num_new_words, ctx_start_idx, ctx_end_idx;
+  // Calculate old bleu;
+  old_bleu = CalculateBleu(new_state);
+  // Get context and append new words.
+  num_new_words = cur_hypo.GetCurrTargetLength();
+  if (num_new_words == 0) {
+    return new_state;
+  }
+  Phrase new_words = ps.m_words;
+  new_words.Append(cur_hypo.GetCurrTargetPhrase());
+  //cerr << "NW: " << new_words << endl;
+  // get ngram matches for new words
+  GetNgramMatchCounts(new_words,
+                      m_cur_ref_ngrams,
+                      new_state->m_ngram_counts,
+                      new_state->m_ngram_matches,
+                      new_state->m_words.GetSize()); // number of words in previous states
+  // Update state variables
+  ctx_end_idx = new_words.GetSize()-1;
+  size_t bleu_context_length = BleuScoreState::bleu_order -1;
+  if (ctx_end_idx > bleu_context_length) {
+    ctx_start_idx = ctx_end_idx - bleu_context_length;
+  } else {
+    ctx_start_idx = 0;
+  }
+  const Bitmap &coverageVector = cur_hypo.GetWordsBitmap();
+  new_state->m_source_length = coverageVector.GetNumWordsCovered();
+  new_state->m_words = new_words.GetSubString(Range(ctx_start_idx,
+                       ctx_end_idx));
+  new_state->m_target_length += cur_hypo.GetCurrTargetLength();
+  // we need a scaled reference length to compare the current target phrase to the corresponding reference phrase
+  new_state->m_scaled_ref_length = m_cur_ref_length *
+                                   ((float)coverageVector.GetNumWordsCovered()/coverageVector.GetSize());
+  // Calculate new bleu.
+  new_bleu = CalculateBleu(new_state);
+  // Set score to new Bleu score
+  accumulator->PlusEquals(this, new_bleu - old_bleu);
+  return new_state;
+}
+FFState* BleuScoreFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID,
+    ScoreComponentCollection* accumulator ) const
+{
+  if (!m_enabled) return new BleuScoreState(m_is_syntax);
+  NGrams::const_iterator reference_ngrams_iter;
+  const Phrase& curr_target_phrase = static_cast<const Phrase&>(cur_hypo.GetCurrTargetPhrase());
+//  cerr << "\nCur target phrase: " << cur_hypo.GetTargetLHS() << " --> " << curr_target_phrase << endl;
+  // Calculate old bleu of previous states
+  float old_bleu = 0, new_bleu = 0;
+  size_t num_old_words = 0, num_words_first_prev = 0;
+  size_t num_words_added_left = 0, num_words_added_right = 0;
+  // double-check cases where more than two previous hypotheses were combined
+  assert(cur_hypo.GetPrevHypos().size() <= 2);
+  BleuScoreState* new_state;
+  if (cur_hypo.GetPrevHypos().size() == 0)
+    new_state = new BleuScoreState(m_is_syntax);
+  else {
+    const FFState* prev_state_zero = cur_hypo.GetPrevHypo(0)->GetFFState(featureID);
+    const BleuScoreState& ps_zero = static_cast<const BleuScoreState&>(*prev_state_zero);
+    new_state = new BleuScoreState(ps_zero);
+    num_words_first_prev = ps_zero.m_target_length;
+    for (size_t i = 0; i < cur_hypo.GetPrevHypos().size(); ++i) {
+      const FFState* prev_state = cur_hypo.GetPrevHypo(i)->GetFFState(featureID);
+      const BleuScoreState* ps = static_cast<const BleuScoreState*>(prev_state);
+      BleuScoreState* ps_nonConst = const_cast<BleuScoreState*>(ps);
+//  		cerr << "prev phrase: " << cur_hypo.GetPrevHypo(i)->GetOutputPhrase()
+//  				<< " ( " << cur_hypo.GetPrevHypo(i)->GetTargetLHS() << ")" << endl;
+      old_bleu += CalculateBleu(ps_nonConst);
+      num_old_words += ps->m_target_length;
+      if (i > 0)
+        // add ngram matches from other previous states
+        new_state->AddNgramCountAndMatches(ps_nonConst->m_ngram_counts, ps_nonConst->m_ngram_matches);
+    }
+  }
+  // check if we are already done (don't add <s> and </s>)
+  size_t numWordsCovered = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();
+  if (numWordsCovered == m_cur_source_length) {
+    // Bleu score stays the same, do not need to add anything
+    //accumulator->PlusEquals(this, 0);
+    return new_state;
+  }
+  // set new context
+  Phrase new_words = cur_hypo.GetOutputPhrase();
+  new_state->m_words = new_words;
+  size_t num_curr_words = new_words.GetSize();
+  // get ngram matches for new words
+  if (num_old_words == 0) {
+//  	cerr << "compute right ngram context" << endl;
+    GetNgramMatchCounts(new_words,
+                        m_cur_ref_ngrams,
+                        new_state->m_ngram_counts,
+                        new_state->m_ngram_matches,
+                        0);
+  } else if (new_words.GetSize() == num_old_words) {
+    // two hypotheses were glued together, compute new ngrams on the basis of first hypothesis
+    num_words_added_right = num_curr_words - num_words_first_prev;
+    // score around overlap point
+//  	cerr << "compute overlap ngram context (" << (num_words_first_prev) << ")" << endl;
+    GetNgramMatchCounts_overlap(new_words,
+                                m_cur_ref_ngrams,
+                                new_state->m_ngram_counts,
+                                new_state->m_ngram_matches,
+                                num_words_first_prev);
+  } else if (num_old_words + curr_target_phrase.GetNumTerminals() == num_curr_words) {
+    assert(curr_target_phrase.GetSize() == curr_target_phrase.GetNumTerminals()+1);
+    // previous hypothesis + rule with 1 non-terminal were combined (NT substituted by Ts)
+    for (size_t i = 0; i < curr_target_phrase.GetSize(); ++i)
+      if (curr_target_phrase.GetWord(i).IsNonTerminal()) {
+        num_words_added_left = i;
+        num_words_added_right = curr_target_phrase.GetSize() - (i+1);
+        break;
+      }
+    // left context
+//  	cerr << "compute left ngram context" << endl;
+    if (num_words_added_left > 0)
+      GetNgramMatchCounts_prefix(new_words,
+                                 m_cur_ref_ngrams,
+                                 new_state->m_ngram_counts,
+                                 new_state->m_ngram_matches,
+                                 num_words_added_left,
+                                 num_curr_words - num_words_added_right - 1);
+    // right context
+//  	cerr << "compute right ngram context" << endl;
+    if (num_words_added_right > 0)
+      GetNgramMatchCounts(new_words,
+                          m_cur_ref_ngrams,
+                          new_state->m_ngram_counts,
+                          new_state->m_ngram_matches,
+                          num_words_added_left + num_old_words);
+  } else {
+    cerr << "undefined state.. " << endl;
+    exit(1);
+  }
+  // Update state variables
+  size_t ctx_start_idx = 0;
+  size_t ctx_end_idx = new_words.GetSize()-1;
+  size_t bleu_context_length = BleuScoreState::bleu_order -1;
+  if (ctx_end_idx > bleu_context_length) {
+    ctx_start_idx = ctx_end_idx - bleu_context_length;
+  }
+  new_state->m_source_length = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();
+  new_state->m_words = new_words.GetSubString(Range(ctx_start_idx, ctx_end_idx));
+  new_state->m_target_length = cur_hypo.GetOutputPhrase().GetSize();
+  // we need a scaled reference length to compare the current target phrase to the corresponding
+  // reference phrase
+  size_t cur_source_length = m_cur_source_length;
+  new_state->m_scaled_ref_length = m_cur_ref_length * (float(new_state->m_source_length)/cur_source_length);
+  // Calculate new bleu.
+  new_bleu = CalculateBleu(new_state);
+  // Set score to new Bleu score
+  accumulator->PlusEquals(this, new_bleu - old_bleu);
+  return new_state;
+}
+/**
+ * Calculate real sentence Bleu score of complete translation
+ */
+float BleuScoreFeature::CalculateBleu(Phrase translation) const
+{
+  if (translation.GetSize() == 0)
+    return 0.0;
+  Phrase normTranslation = translation;
+  // remove start and end symbol for chart decoding
+  if (m_cur_source_length != m_cur_norm_source_length) {
+    Range* range = new Range(1, translation.GetSize()-2);
+    normTranslation = translation.GetSubString(*range);
+  }
+  // get ngram matches for translation
+  BleuScoreState* state = new BleuScoreState(m_is_syntax);
+  GetClippedNgramMatchesAndCounts(normTranslation,
+                                  m_cur_ref_ngrams,
+                                  state->m_ngram_counts,
+                                  state->m_ngram_matches,
+                                  0); // number of words in previous states
+  // set state variables
+  state->m_words = normTranslation;
+  state->m_source_length = m_cur_norm_source_length;
+  state->m_target_length = normTranslation.GetSize();
+  state->m_scaled_ref_length = m_cur_ref_length;
+  // Calculate bleu.
+  return CalculateBleu(state);
+}
+/*
+ * Calculate Bleu score for a partial hypothesis given as state.
+ */
+float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const
+{
+  if (!state->m_ngram_counts[0]) return 0;
+  if (!state->m_ngram_matches[0]) return 0;      	// if we have no unigram matches, score should be 0
+  float precision = 1.0;
+  float smooth = 1;
+  float smoothed_count, smoothed_matches;
+  if (m_sentence_bleu || m_simple_history_bleu) {
+    // Calculate geometric mean of modified ngram precisions
+    // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
+    // 		= BP * 4th root(PRODUCT_1_4 p_n)
+    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+      if (state->m_ngram_counts[i]) {
+        smoothed_matches = state->m_ngram_matches[i];
+        smoothed_count = state->m_ngram_counts[i];
+        switch (m_smoothing_scheme) {
+        case PLUS_ONE:
+        default:
+          if (i > 0) {
+            // smoothing for all n > 1
+            smoothed_matches += 1;
+            smoothed_count += 1;
+          }
+          break;
+        case PLUS_POINT_ONE:
+          if (i > 0) {
+            // smoothing for all n > 1
+            smoothed_matches += 0.1;
+            smoothed_count += 0.1;
+          }
+          break;
+        case PAPINENI:
+          if (state->m_ngram_matches[i] == 0) {
+            smooth *= 0.5;
+            smoothed_matches += smooth;
+            smoothed_count += smooth;
+          }
+          break;
+        }
+        if (m_simple_history_bleu) {
+          smoothed_matches += m_match_history[i];
+          smoothed_count += m_count_history[i];
+        }
+        precision *= smoothed_matches/smoothed_count;
+      }
+    }
+    // take geometric mean
+    precision = pow(precision, (float)1/4);
+    // Apply brevity penalty if applicable.
+    // BP = 1 				if c > r
+    // BP = e^(1- r/c))		if c <= r
+    // where
+    // c: length of the candidate translation
+    // r: effective reference length (sum of best match lengths for each candidate sentence)
+    if (m_simple_history_bleu) {
+      if ((m_target_length_history + state->m_target_length) < (m_ref_length_history + state->m_scaled_ref_length)) {
+        float smoothed_target_length = m_target_length_history + state->m_target_length;
+        float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
+        precision *= exp(1 - (smoothed_ref_length/smoothed_target_length));
+      }
+    } else {
+      if (state->m_target_length < state->m_scaled_ref_length) {
+        float target_length = state->m_target_length;
+        float ref_length = state->m_scaled_ref_length;
+        precision *= exp(1 - (ref_length/target_length));
+      }
+    }
+    //cerr << "precision: " << precision << endl;
+    // Approximate bleu score as of Chiang/Resnik is scaled by the size of the input:
+    // B(e;f,{r_k}) = (O_f + |f|) * BLEU(O + c(e;{r_k}))
+    // where c(e;) is a vector of reference length, ngram counts and ngram matches
+    if (m_scale_by_input_length) {
+      precision *= m_cur_norm_source_length;
+    } else if (m_scale_by_avg_input_length) {
+      precision *= m_avg_input_length;
+    } else if (m_scale_by_inverse_length) {
+      precision *= (100/m_cur_norm_source_length);
+    } else if (m_scale_by_avg_inverse_length) {
+      precision *= (100/m_avg_input_length);
+    }
+    return precision * m_scale_by_x;
+  } else {
+    // Revised history BLEU: compute Bleu in the context of the pseudo-document
+    // B(b) = size_of_oracle_doc * (Bleu(B_hist + b) - Bleu(B_hist))
+    // Calculate geometric mean of modified ngram precisions
+    // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
+    // 		= BP * 4th root(PRODUCT_1_4 p_n)
+    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+      if (state->m_ngram_counts[i]) {
+        smoothed_matches = m_match_history[i] + state->m_ngram_matches[i] + 0.1;
+        smoothed_count = m_count_history[i] + state->m_ngram_counts[i] + 0.1;
+        precision *= smoothed_matches/smoothed_count;
+      }
+    }
+    // take geometric mean
+    precision = pow(precision, (float)1/4);
+    // Apply brevity penalty if applicable.
+    if ((m_target_length_history + state->m_target_length) < (m_ref_length_history + state->m_scaled_ref_length))
+      precision *= exp(1 - ((m_ref_length_history + state->m_scaled_ref_length)/(m_target_length_history + state->m_target_length)));
+    cerr << "precision: " << precision << endl;
+    // **BLEU score of pseudo-document**
+    float precision_pd = 1.0;
+    if (m_target_length_history > 0) {
+      for (size_t i = 0; i < BleuScoreState::bleu_order; i++)
+        if (m_count_history[i] != 0)
+          precision_pd *= (m_match_history[i] + 0.1)/(m_count_history[i] + 0.1);
+      // take geometric mean
+      precision_pd = pow(precision_pd, (float)1/4);
+      // Apply brevity penalty if applicable.
+      if (m_target_length_history < m_ref_length_history)
+        precision_pd *= exp(1 - (m_ref_length_history/m_target_length_history));
+    } else
+      precision_pd = 0;
+    // **end BLEU of pseudo-document**
+    cerr << "precision pd: " << precision_pd << endl;
+    float sentence_impact;
+    if (m_target_length_history > 0)
+      sentence_impact = m_target_length_history * (precision - precision_pd);
+    else
+      sentence_impact = precision;
+    cerr << "sentence impact: " << sentence_impact << endl;
+    return sentence_impact * m_scale_by_x;
+  }
+}
+const FFState* BleuScoreFeature::EmptyHypothesisState(const InputType& input) const
+{
+  return new BleuScoreState(m_is_syntax);
+}
+bool BleuScoreFeature::IsUseable(const FactorMask &mask) const
+{
+  // TODO: Was this meant to return mask[0]!?
+  bool ret = mask[0];
+  return 0;
+}
+void
+BleuScoreFeature::
+Load(AllOptions::ptr const& opts)
+{
+  m_is_syntax = is_syntax(opts->search.algo);
+}
+} // namespace.

mosesdecoder/moses/FF/CountNonTerms.h ADDED Viewed

	@@ -0,0 +1,50 @@

+#pragma once
+#include "StatelessFeatureFunction.h"
+namespace Moses
+{
+class CountNonTerms : public StatelessFeatureFunction
+{
+public:
+  CountNonTerms(const std::string &line);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const;
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const {
+  }
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {
+  }
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const {
+  }
+  void EvaluateWhenApplied(
+    const ChartHypothesis& hypo,
+    ScoreComponentCollection* accumulator) const {
+  }
+  void SetParameter(const std::string& key, const std::string& value);
+  void Load(AllOptions::ptr const& opts);
+protected:
+  bool m_all, m_sourceSyntax, m_targetSyntax;
+};
+}

mosesdecoder/moses/FF/DeleteRules.h ADDED Viewed

	@@ -0,0 +1,49 @@

+#pragma once
+#include <string>
+#include <boost/unordered_set.hpp>
+#include "StatelessFeatureFunction.h"
+namespace Moses
+{
+class DeleteRules : public StatelessFeatureFunction
+{
+protected:
+  std::string m_path;
+  boost::unordered_set<size_t> m_ruleHashes;
+public:
+  DeleteRules(const std::string &line);
+  void Load(AllOptions::ptr const& opts);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const;
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const;
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const;
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const;
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const;
+  void SetParameter(const std::string& key, const std::string& value);
+};
+}

mosesdecoder/moses/FF/Diffs.h ADDED Viewed

	@@ -0,0 +1,150 @@

+#ifndef moses_Diffs_h
+#define moses_Diffs_h
+#include <cmath>
+namespace Moses
+{
+typedef char Diff;
+typedef std::vector<Diff> Diffs;
+template <class Sequence, class Pred>
+void CreateDiffRec(size_t** c,
+                   const Sequence &s1,
+                   const Sequence &s2,
+                   size_t start,
+                   size_t i,
+                   size_t j,
+                   Diffs& diffs,
+                   Pred pred)
+{
+  if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
+    CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
+    diffs.push_back(Diff('m'));
+  } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
+    CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
+    diffs.push_back(Diff('i'));
+  } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
+    CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
+    diffs.push_back(Diff('d'));
+  }
+}
+template <class Sequence, class Pred>
+Diffs CreateDiff(const Sequence& s1,
+                 const Sequence& s2,
+                 Pred pred)
+{
+  Diffs diffs;
+  size_t n = s2.size();
+  int start = 0;
+  int m_end = s1.size() - 1;
+  int n_end = s2.size() - 1;
+  while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
+    diffs.push_back(Diff('m'));
+    start++;
+  }
+  while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
+    m_end--;
+    n_end--;
+  }
+  size_t m_new = m_end - start + 1;
+  size_t n_new = n_end - start + 1;
+  size_t** c = new size_t*[m_new + 1];
+  for(size_t i = 0; i <= m_new; ++i) {
+    c[i] = new size_t[n_new + 1];
+    c[i][0] = 0;
+  }
+  for(size_t j = 0; j <= n_new; ++j)
+    c[0][j] = 0;
+  for(size_t i = 1; i <= m_new; ++i)
+    for(size_t j = 1; j <= n_new; ++j)
+      if(pred(s1[i - 1 + start], s2[j - 1 + start]))
+        c[i][j] = c[i-1][j-1] + 1;
+      else
+        c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
+  CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
+  for(size_t i = 0; i <= m_new; ++i)
+    delete[] c[i];
+  delete[] c;
+  for (size_t i = n_end + 1; i < n; ++i)
+    diffs.push_back(Diff('m'));
+  return diffs;
+}
+template <class Sequence>
+Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
+{
+  return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
+}
+template <class Sequence, class Sig, class Stats>
+void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
+{
+  if(sig.size() != stats.size())
+    throw "Signature size differs from score array size.";
+  size_t m = 0, d = 0, i = 0, s = 0;
+  Diffs diff = CreateDiff(s1, s2);
+  for(int j = 0; j < (int)diff.size(); ++j) {
+    if(diff[j] == 'm')
+      m++;
+    else if(diff[j] == 'd') {
+      d++;
+      int k = 0;
+      while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
+            diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
+        d--;
+        s++;
+        k++;
+      }
+      j += k;
+    } else if(diff[j] == 'i')
+      i++;
+  }
+  for(size_t j = 0; j < sig.size(); ++j) {
+    switch (sig[j]) {
+    case 'l':
+      stats[j] += d + i + s;
+      break;
+    case 'm':
+      stats[j] += m;
+      break;
+    case 'd':
+      stats[j] += d;
+      break;
+    case 'i':
+      stats[j] += i;
+      break;
+    case 's':
+      stats[j] += s;
+      break;
+    case 'r':
+      float macc = 1;
+      if (d + i + s + m)
+        macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
+      if(macc > 0)
+        stats[j] += log(macc);
+      else
+        stats[j] += log(1.0/(float)(d + i + s + m + 1));
+      break;
+    }
+  }
+}
+}
+#endif

mosesdecoder/moses/FF/Dsg-Feature/DsgModel.cpp ADDED Viewed

	@@ -0,0 +1,156 @@

+#include <fstream>
+#include "DsgModel.h"
+#include "dsgHyp.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+using namespace std;
+using namespace lm::ngram;
+namespace Moses
+{
+DesegModel::DesegModel(const std::string &line)
+  :StatefulFeatureFunction(5, line )
+{
+  tFactor = 0;
+  order=5;
+  numFeatures = 5;
+  optimistic = 1;
+  ReadParameters();
+}
+DesegModel::~DesegModel()
+{
+  delete DSGM;
+}
+void DesegModel :: readLanguageModel(const char *lmFile)
+{
+  DSGM = ConstructDsgLM(m_lmPath.c_str());
+  State startState = DSGM->NullContextState();
+  desegT=new Desegmenter(m_desegPath,m_simple);// Desegmentation Table
+}
+void DesegModel::Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  readLanguageModel(m_lmPath.c_str());
+}
+void DesegModel:: EvaluateInIsolation(const Phrase &source
+                                      , const TargetPhrase &targetPhrase
+                                      , ScoreComponentCollection &scoreBreakdown
+                                      , ScoreComponentCollection &estimatedScores) const
+{
+  dsgHypothesis obj;
+  vector <string> myTargetPhrase;
+  vector<float> scores;
+  vector<string> targ_phrase; //stores the segmented tokens in the target phrase
+  const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+  for (int i = 0; i < targetPhrase.GetSize(); i++) {
+    targ_phrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string());
+  }
+  obj.setState(DSGM->NullContextState());
+  obj.setPhrases(targ_phrase);
+  obj.calculateDsgProbinIsol(*DSGM,*desegT,align);
+  obj.populateScores(scores,numFeatures);
+  estimatedScores.PlusEquals(this, scores);
+}
+FFState* DesegModel::EvaluateWhenApplied(
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
+{
+  const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
+  const Range &src_rng =cur_hypo.GetCurrSourceWordsRange();
+  const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+  size_t sourceOffset = src_rng.GetStartPos();
+  dsgHypothesis obj;
+  vector<float> scores;
+  vector<string> targ_phrase; //stores the segmented tokens in the target phrase
+  bool isCompleted;
+  isCompleted=cur_hypo.IsSourceCompleted();
+  for (int i = 0; i < cur_hypo.GetCurrTargetLength(); i++) {
+    targ_phrase.push_back(target.GetWord(i).GetFactor(tFactor)->GetString().as_string());
+  }
+  obj.setState(prev_state);
+  obj.setPhrases( targ_phrase );
+  obj.calculateDsgProb(*DSGM,*desegT,isCompleted,align, sourceOffset, optimistic);
+  obj.populateScores(scores,numFeatures);
+  accumulator->PlusEquals(this, scores);
+  return obj.saveState();
+}
+FFState* DesegModel::EvaluateWhenApplied(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+  UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
+}
+const FFState* DesegModel::EmptyHypothesisState(const InputType &input) const
+{
+  VERBOSE(3,"DesegModel::EmptyHypothesisState()" << endl);
+  State startState = DSGM->BeginSentenceState();
+  dsgState ss= dsgState(startState);
+  return new dsgState(ss);
+}
+std::string DesegModel::GetScoreProducerWeightShortName(unsigned idx) const
+{
+  return "dsg";
+}
+void DesegModel::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "path") {
+    m_lmPath = value;
+  } else if (key == "contiguity-features") {
+    if(value == "no")
+      numFeatures = 1;
+    else
+      numFeatures = 5;
+  } else if (key == "output-factor") {
+    tFactor = Scan<int>(value);
+  } else if (key == "optimistic") {
+    if (value == "n")
+      optimistic = 0;
+    else
+      optimistic = 1;
+  } else if (key == "deseg-path") {
+    m_desegPath = Scan<int>(value);
+  } else if (key == "deseg-scheme") {
+    if(value == "s")
+      m_simple = 1;
+    else
+      m_simple = 0;
+  } else if (key == "order") {
+    order = Scan<int>(value);
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+bool DesegModel::IsUseable(const FactorMask &mask) const
+{
+  bool ret = mask[0];
+  return ret;
+}
+} // namespace

mosesdecoder/moses/FF/ExampleStatelessFF.h ADDED Viewed

	@@ -0,0 +1,43 @@

+#pragma once
+#include <string>
+#include "StatelessFeatureFunction.h"
+namespace Moses
+{
+class ExampleStatelessFF : public StatelessFeatureFunction
+{
+public:
+  ExampleStatelessFF(const std::string &line);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const;
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const;
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const;
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const;
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const;
+  void SetParameter(const std::string& key, const std::string& value);
+};
+}

mosesdecoder/moses/FF/GlobalLexicalModel.cpp ADDED Viewed

	@@ -0,0 +1,199 @@

+#include <fstream>
+#include "GlobalLexicalModel.h"
+#include "moses/StaticData.h"
+#include "moses/InputFileStream.h"
+#include "moses/TranslationOption.h"
+#include "moses/TranslationTask.h"
+#include "moses/FactorCollection.h"
+#include "util/exception.hh"
+using namespace std;
+namespace Moses
+{
+GlobalLexicalModel::GlobalLexicalModel(const std::string &line)
+  : StatelessFeatureFunction(1, line)
+{
+  std::cerr << "Creating global lexical model...\n";
+  ReadParameters();
+  // define bias word
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  m_bias = new Word();
+  const Factor* factor = factorCollection.AddFactor( Input, m_inputFactorsVec[0], "**BIAS**" );
+  m_bias->SetFactor( m_inputFactorsVec[0], factor );
+}
+void GlobalLexicalModel::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "path") {
+    m_filePath = value;
+  } else if (key == "input-factor") {
+    m_inputFactorsVec = Tokenize<FactorType>(value,",");
+  } else if (key == "output-factor") {
+    m_outputFactorsVec = Tokenize<FactorType>(value,",");
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+GlobalLexicalModel::~GlobalLexicalModel()
+{
+  // delete words in the hash data structure
+  DoubleHash::const_iterator iter;
+  for(iter = m_hash.begin(); iter != m_hash.end(); iter++ ) {
+    boost::unordered_map< const Word*, float, UnorderedComparer<Word>, UnorderedComparer<Word> >::const_iterator iter2;
+    for(iter2 = iter->second.begin(); iter2 != iter->second.end(); iter2++ ) {
+      delete iter2->first; // delete input word
+    }
+    delete iter->first; // delete output word
+  }
+}
+void GlobalLexicalModel::Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  const std::string& oFactorDelimiter = opts->output.factor_delimiter;
+  const std::string& iFactorDelimiter = opts->input.factor_delimiter;
+  VERBOSE(2, "Loading global lexical model from file " << m_filePath << endl);
+  m_inputFactors = FactorMask(m_inputFactorsVec);
+  m_outputFactors = FactorMask(m_outputFactorsVec);
+  InputFileStream inFile(m_filePath);
+  // reading in data one line at a time
+  size_t lineNum = 0;
+  string line;
+  while(getline(inFile, line)) {
+    ++lineNum;
+    vector<string> token = Tokenize<string>(line, " ");
+    if (token.size() != 3) { // format checking
+      UTIL_THROW2("Syntax error at " << m_filePath << ":" << lineNum << ":" << line);
+    }
+    // create the output word
+    Word *outWord = new Word();
+    vector<string> factorString = Tokenize( token[0], oFactorDelimiter );
+    for (size_t i=0 ; i < m_outputFactorsVec.size() ; i++) {
+      const FactorDirection& direction = Output;
+      const FactorType& factorType = m_outputFactorsVec[i];
+      const Factor* factor
+      = factorCollection.AddFactor( direction, factorType, factorString[i] );
+      outWord->SetFactor( factorType, factor );
+    }
+    // create the input word
+    Word *inWord = new Word();
+    factorString = Tokenize( token[1], iFactorDelimiter );
+    for (size_t i=0 ; i < m_inputFactorsVec.size() ; i++) {
+      const FactorDirection& direction = Input;
+      const FactorType& factorType = m_inputFactorsVec[i];
+      const Factor* factor
+      = factorCollection.AddFactor( direction, factorType, factorString[i] );
+      inWord->SetFactor( factorType, factor );
+    }
+    // maximum entropy feature score
+    float score = Scan<float>(token[2]);
+    // std::cerr << "storing word " << *outWord << " " << *inWord << " " << score << endl;
+    // store feature in hash
+    DoubleHash::iterator keyOutWord = m_hash.find( outWord );
+    if( keyOutWord == m_hash.end() ) {
+      m_hash[outWord][inWord] = score;
+    } else { // already have hash for outword, delete the word to avoid leaks
+      (keyOutWord->second)[inWord] = score;
+      delete outWord;
+    }
+  }
+}
+void GlobalLexicalModel::InitializeForInput(ttasksptr const& ttask)
+{
+  UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
+                 "GlobalLexicalModel works only with sentence input.");
+  Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
+  m_local.reset(new ThreadLocalStorage);
+  m_local->input = s;
+}
+float GlobalLexicalModel::ScorePhrase( const TargetPhrase& targetPhrase ) const
+{
+  const Sentence& input = *(m_local->input);
+  float score = 0;
+  for(size_t targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ ) {
+    float sum = 0;
+    const Word& targetWord = targetPhrase.GetWord( targetIndex );
+    VERBOSE(2,"glm " << targetWord << ": ");
+    const DoubleHash::const_iterator targetWordHash = m_hash.find( &targetWord );
+    if( targetWordHash != m_hash.end() ) {
+      SingleHash::const_iterator inputWordHash = targetWordHash->second.find( m_bias );
+      if( inputWordHash != targetWordHash->second.end() ) {
+        VERBOSE(2,"*BIAS* " << inputWordHash->second);
+        sum += inputWordHash->second;
+      }
+      boost::unordered_set< const Word*, UnorderedComparer<Word>, UnorderedComparer<Word> > alreadyScored; // do not score a word twice
+      for(size_t inputIndex = 0; inputIndex < input.GetSize(); inputIndex++ ) {
+        const Word& inputWord = input.GetWord( inputIndex );
+        if ( alreadyScored.find( &inputWord ) == alreadyScored.end() ) {
+          SingleHash::const_iterator inputWordHash = targetWordHash->second.find( &inputWord );
+          if( inputWordHash != targetWordHash->second.end() ) {
+            VERBOSE(2," " << inputWord << " " << inputWordHash->second);
+            sum += inputWordHash->second;
+          }
+          alreadyScored.insert( &inputWord );
+        }
+      }
+    }
+    // Hal Daume says: 1/( 1 + exp [ - sum_i w_i * f_i ] )
+    VERBOSE(2," p=" << FloorScore( log(1/(1+exp(-sum))) ) << endl);
+    score += FloorScore( log(1/(1+exp(-sum))) );
+  }
+  return score;
+}
+float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const
+{
+  LexiconCache& m_cache = m_local->cache;
+  const LexiconCache::const_iterator query = m_cache.find( &targetPhrase );
+  if ( query != m_cache.end() ) {
+    return query->second;
+  }
+  float score = ScorePhrase( targetPhrase );
+  m_cache.insert( pair<const TargetPhrase*, float>(&targetPhrase, score) );
+  //VERBOSE(2, "add to cache " << targetPhrase << ": " << score << endl);
+  return score;
+}
+void GlobalLexicalModel::EvaluateWithSourceContext(const InputType &input
+    , const InputPath &inputPath
+    , const TargetPhrase &targetPhrase
+    , const StackVec *stackVec
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection *estimatedScores) const
+{
+  scoreBreakdown.PlusEquals( this, GetFromCacheOrScorePhrase(targetPhrase) );
+}
+bool GlobalLexicalModel::IsUseable(const FactorMask &mask) const
+{
+  for (size_t i = 0; i < m_outputFactors.size(); ++i) {
+    if (m_outputFactors[i]) {
+      if (!mask[i]) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+}

mosesdecoder/moses/FF/HyperParameterAsWeight.h ADDED Viewed

	@@ -0,0 +1,55 @@

+#pragma once
+#include "StatelessFeatureFunction.h"
+namespace Moses
+{
+class DecodeStep;
+/**
+  * Baseclass for phrase-table or generation table feature function
+ **/
+class HyperParameterAsWeight : public StatelessFeatureFunction
+{
+public:
+  HyperParameterAsWeight(const std::string &line);
+  virtual bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  virtual void EvaluateInIsolation(const Phrase &source
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedScores) const {
+  }
+  virtual void EvaluateWithSourceContext(const InputType &input
+                                         , const InputPath &inputPath
+                                         , const TargetPhrase &targetPhrase
+                                         , const StackVec *stackVec
+                                         , ScoreComponentCollection &scoreBreakdown
+                                         , ScoreComponentCollection *estimatedScores = NULL) const {
+  }
+  virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {
+  }
+  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+                                   ScoreComponentCollection* accumulator) const {
+  }
+  /**
+    * Same for chart-based features.
+    **/
+  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                                   ScoreComponentCollection* accumulator) const {
+  }
+};
+} // namespace

mosesdecoder/moses/FF/LexicalReordering/HReorderingBackwardState.cpp ADDED Viewed

	@@ -0,0 +1,50 @@

+#include "HReorderingBackwardState.h"
+namespace Moses
+{
+///////////////////////////
+//HierarchicalReorderingBackwardState
+HReorderingBackwardState::
+HReorderingBackwardState(const HReorderingBackwardState *prev,
+                         const TranslationOption &topt,
+                         ReorderingStack reoStack)
+  : LRState(prev, topt),  m_reoStack(reoStack)
+{ }
+HReorderingBackwardState::
+HReorderingBackwardState(const LRModel &config, size_t offset)
+  : LRState(config, LRModel::Backward, offset)
+{ }
+size_t HReorderingBackwardState::hash() const
+{
+  size_t ret = m_reoStack.hash();
+  return ret;
+}
+bool HReorderingBackwardState::operator==(const FFState& o) const
+{
+  const HReorderingBackwardState& other
+  = static_cast<const HReorderingBackwardState&>(o);
+  bool ret = m_reoStack == other.m_reoStack;
+  return ret;
+}
+LRState*
+HReorderingBackwardState::
+Expand(const TranslationOption& topt, const InputType& input,
+       ScoreComponentCollection*  scores) const
+{
+  HReorderingBackwardState* nextState;
+  nextState = new HReorderingBackwardState(this, topt, m_reoStack);
+  Range swrange = topt.GetSourceWordsRange();
+  int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
+  ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
+  CopyScores(scores, topt, input, reoType);
+  return nextState;
+}
+}

mosesdecoder/moses/FF/LexicalReordering/HReorderingBackwardState.h ADDED Viewed

	@@ -0,0 +1,33 @@

+#pragma once
+#include "LRState.h"
+#include "ReorderingStack.h"
+namespace Moses
+{
+//! State for a hierarchical reordering model (see Galley and Manning, A
+//! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008)
+//! backward state (conditioned on the previous phrase)
+class HReorderingBackwardState : public LRState
+{
+private:
+  ReorderingStack m_reoStack;
+public:
+  HReorderingBackwardState(const LRModel &config, size_t offset);
+  HReorderingBackwardState(const HReorderingBackwardState *prev,
+                           const TranslationOption &topt,
+                           ReorderingStack reoStack);
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+  virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
+                          ScoreComponentCollection*  scores) const;
+private:
+  ReorderingType GetOrientationTypeMSD(int reoDistance) const;
+  ReorderingType GetOrientationTypeMSLR(int reoDistance) const;
+  ReorderingType GetOrientationTypeMonotonic(int reoDistance) const;
+  ReorderingType GetOrientationTypeLeftRight(int reoDistance) const;
+};
+}

mosesdecoder/moses/FF/LexicalReordering/HReorderingForwardState.cpp ADDED Viewed

	@@ -0,0 +1,78 @@

+#include "HReorderingForwardState.h"
+namespace Moses
+{
+///////////////////////////
+//HReorderingForwardState
+HReorderingForwardState::
+HReorderingForwardState(const LRModel &config,
+                        size_t size, size_t offset)
+  : LRState(config, LRModel::Forward, offset)
+  , m_first(true)
+  , m_prevRange(NOT_FOUND,NOT_FOUND)
+  , m_coverage(size)
+{ }
+HReorderingForwardState::
+HReorderingForwardState(const HReorderingForwardState *prev,
+                        const TranslationOption &topt)
+  : LRState(prev, topt)
+  , m_first(false)
+  , m_prevRange(topt.GetSourceWordsRange())
+  , m_coverage(prev->m_coverage, topt.GetSourceWordsRange())
+{
+}
+size_t HReorderingForwardState::hash() const
+{
+  size_t ret;
+  ret = hash_value(m_prevRange);
+  return ret;
+}
+bool HReorderingForwardState::operator==(const FFState& o) const
+{
+  if (&o == this) return true;
+  HReorderingForwardState const& other
+  = static_cast<HReorderingForwardState const&>(o);
+  int compareScores = ((m_prevRange == other.m_prevRange)
+                       ? ComparePrevScores(other.m_prevOption)
+                       : (m_prevRange < other.m_prevRange) ? -1 : 1);
+  return compareScores == 0;
+}
+// For compatibility with the phrase-based reordering model, scoring is one
+// step delayed.
+// The forward model takes determines orientations heuristically as follows:
+//  mono:   if the next phrase comes after the conditioning phrase and
+//          - there is a gap to the right of the conditioning phrase, or
+//          - the next phrase immediately follows it
+//  swap:   if the next phrase goes before the conditioning phrase and
+//          - there is a gap to the left of the conditioning phrase, or
+//          - the next phrase immediately precedes it
+//  dright: if the next phrase follows the conditioning phrase and other
+//          stuff comes in between
+//  dleft:  if the next phrase precedes the conditioning phrase and other
+//          stuff comes in between
+LRState*
+HReorderingForwardState::
+Expand(TranslationOption const& topt, InputType const& input,
+       ScoreComponentCollection* scores) const
+{
+  const Range cur = topt.GetSourceWordsRange();
+  // keep track of the current coverage ourselves so we don't need the hypothesis
+  Bitmap cov(m_coverage, cur);
+  if (!m_first) {
+    LRModel::ReorderingType reoType;
+    reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
+    CopyScores(scores, topt, input, reoType);
+  }
+  return new HReorderingForwardState(this, topt);
+}
+}

mosesdecoder/moses/FF/LexicalReordering/HReorderingForwardState.h ADDED Viewed

	@@ -0,0 +1,33 @@

+#pragma once
+#include "LRState.h"
+#include "moses/Range.h"
+#include "moses/Bitmap.h"
+namespace Moses
+{
+//!forward state (conditioned on the next phrase)
+class HReorderingForwardState : public LRState
+{
+private:
+  bool m_first;
+  Range m_prevRange;
+  Bitmap m_coverage;
+public:
+  HReorderingForwardState(const LRModel &config, size_t sentenceLength,
+                          size_t offset);
+  HReorderingForwardState(const HReorderingForwardState *prev,
+                          const TranslationOption &topt);
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+  virtual LRState* Expand(const TranslationOption& hypo,
+                          const InputType& input,
+                          ScoreComponentCollection* scores) const;
+};
+}

mosesdecoder/moses/FF/LexicalReordering/LRModel.cpp ADDED Viewed

	@@ -0,0 +1,219 @@

+#include "LRModel.h"
+#include "moses/Range.h"
+#include "moses/Bitmap.h"
+#include "moses/InputType.h"
+#include "HReorderingForwardState.h"
+#include "HReorderingBackwardState.h"
+#include "PhraseBasedReorderingState.h"
+#include "BidirectionalReorderingState.h"
+#include "SparseReordering.h"
+namespace Moses
+{
+bool
+IsMonotonicStep(Range  const& prev, // words range of last source phrase
+                Range  const& cur,  // words range of current source phrase
+                Bitmap const& cov)  // coverage bitmap
+{
+  size_t e = prev.GetEndPos() + 1;
+  size_t s = cur.GetStartPos();
+  return (s == e || (s >= e && !cov.GetValue(e)));
+}
+bool
+IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
+{
+  size_t s = prev.GetStartPos();
+  size_t e = cur.GetEndPos();
+  return (e+1 == s || (e < s && !cov.GetValue(s-1)));
+}
+size_t
+LRModel::
+GetNumberOfTypes() const
+{
+  return ((m_modelType == MSD)  ? 3 :
+          (m_modelType == MSLR) ? 4 : 2);
+}
+size_t
+LRModel::
+GetNumScoreComponents() const
+{
+  size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
+  return ((m_direction == Bidirectional)
+          ? 2 * score_per_dir + m_additionalScoreComponents
+          : score_per_dir + m_additionalScoreComponents);
+}
+void
+LRModel::
+ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
+                const LexicalReordering* producer)
+{
+  if (sparseArgs.size()) {
+    m_sparse.reset(new SparseReordering(sparseArgs, producer));
+  }
+}
+void
+LRModel::
+SetAdditionalScoreComponents(size_t number)
+{
+  m_additionalScoreComponents = number;
+}
+/// return orientation for the first phrase
+LRModel::ReorderingType
+LRModel::
+GetOrientation(Range const& cur) const
+{
+  UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
+  return ((m_modelType == LeftRight) ? R :
+          (cur.GetStartPos() == 0) ? M  :
+          (m_modelType == MSD)     ? D  :
+          (m_modelType == MSLR)    ? DR : NM);
+}
+LRModel::ReorderingType
+LRModel::
+GetOrientation(Range const& prev, Range const& cur) const
+{
+  UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
+  return ((m_modelType == LeftRight)
+          ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
+        : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
+          : (m_modelType == Monotonic) ? NM
+          : (prev.GetStartPos() ==  cur.GetEndPos() + 1) ? S
+          : (m_modelType == MSD) ? D
+          : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
+}
+LRModel::ReorderingType
+LRModel::
+GetOrientation(int const reoDistance) const
+{
+  // this one is for HierarchicalReorderingBackwardState
+  return ((m_modelType == LeftRight)
+          ? (reoDistance >= 1) ? R : L
+        : (reoDistance == 1) ? M
+          : (m_modelType == Monotonic) ? NM
+          : (reoDistance == -1)  ? S
+          : (m_modelType == MSD) ? D
+          : (reoDistance  >  1) ? DR : DL);
+}
+LRModel::ReorderingType
+LRModel::
+GetOrientation(Range const& prev, Range const& cur,
+               Bitmap const& cov) const
+{
+  return ((m_modelType == LeftRight)
+          ? cur.GetStartPos() > prev.GetEndPos() ? R : L
+        : IsMonotonicStep(prev,cur,cov) ? M
+          : (m_modelType == Monotonic) ? NM
+          : IsSwap(prev,cur,cov) ? S
+          : (m_modelType == MSD) ? D
+          : cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
+}
+LRModel::
+LRModel(const std::string &modelType)
+  : m_modelString(modelType)
+  , m_scoreProducer(NULL)
+  , m_modelType(None)
+  , m_phraseBased(true)
+  , m_collapseScores(false)
+  , m_direction(Backward)
+  , m_additionalScoreComponents(0)
+{
+  std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
+  for (size_t i=0; i<config.size(); ++i) {
+    if      (config[i] == "hier")   {
+      m_phraseBased = false;
+    } else if (config[i] == "phrase") {
+      m_phraseBased = true;
+    } else if (config[i] == "wbe")    {
+      m_phraseBased = true;
+    }
+    // no word-based decoding available, fall-back to phrase-based
+    // This is the old lexical reordering model combination of moses
+    else if (config[i] == "msd")          {
+      m_modelType = MSD;
+    } else if (config[i] == "mslr")         {
+      m_modelType = MSLR;
+    } else if (config[i] == "monotonicity") {
+      m_modelType = Monotonic;
+    } else if (config[i] == "leftright")    {
+      m_modelType = LeftRight;
+    }
+    // unidirectional is deprecated, use backward instead
+    else if (config[i] == "unidirectional") {
+      m_direction = Backward;
+    } else if (config[i] == "backward")       {
+      m_direction = Backward;
+    } else if (config[i] == "forward")        {
+      m_direction = Forward;
+    } else if (config[i] == "bidirectional")  {
+      m_direction = Bidirectional;
+    }
+    else if (config[i] == "f")  {
+      m_condition = F;
+    } else if (config[i] == "fe") {
+      m_condition = FE;
+    }
+    else if (config[i] == "collapseff") {
+      m_collapseScores = true;
+    } else if (config[i] == "allff") {
+      m_collapseScores = false;
+    } else {
+      std::cerr
+          << "Illegal part in the lexical reordering configuration string: "
+          << config[i] << std::endl;
+      exit(1);
+    }
+  }
+  if (m_modelType == None) {
+    std::cerr
+        << "You need to specify the type of the reordering model "
+        << "(msd, monotonicity,...)" << std::endl;
+    exit(1);
+  }
+}
+LRState *
+LRModel::
+CreateLRState(const InputType &input) const
+{
+  LRState *bwd = NULL, *fwd = NULL;
+  size_t offset = 0;
+  switch(m_direction) {
+  case Backward:
+  case Bidirectional:
+    if (m_phraseBased)
+      bwd = new PhraseBasedReorderingState(*this, Backward, offset);
+    else
+      bwd = new HReorderingBackwardState(*this, offset);
+    offset += m_collapseScores ? 1 : GetNumberOfTypes();
+    if (m_direction == Backward) return bwd; // else fall through
+  case Forward:
+    if (m_phraseBased)
+      fwd = new PhraseBasedReorderingState(*this, Forward, offset);
+    else
+      fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
+    offset += m_collapseScores ? 1 : GetNumberOfTypes();
+    if (m_direction == Forward) return fwd;
+  }
+  return new BidirectionalReorderingState(*this, bwd, fwd, 0);
+}
+}

mosesdecoder/moses/FF/LexicalReordering/LRModel.h ADDED Viewed

	@@ -0,0 +1,133 @@

+#pragma once
+#include <string>
+#include <map>
+#include <boost/scoped_ptr.hpp>
+namespace Moses
+{
+class Range;
+class Bitmap;
+class InputType;
+class LRState;
+class LexicalReordering;
+class SparseReordering;
+//! Factory class for lexical reordering states
+class LRModel
+{
+public:
+  friend class LexicalReordering;
+  enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
+  enum Direction { Forward, Backward, Bidirectional };
+  enum Condition { F, E, FE };
+  // constants for the different types of reordering
+  // (correspond to indices in the respective table)
+#if 0
+  typedef int ReorderingType;
+  static const ReorderingType M   = 0; // monotonic
+  static const ReorderingType NM  = 1; // non-monotonic
+  static const ReorderingType S   = 1; // swap
+  static const ReorderingType D   = 2; // discontinuous
+  static const ReorderingType DL  = 2; // discontinuous, left
+  static const ReorderingType DR  = 3; // discontinuous, right
+  static const ReorderingType R   = 0; // right
+  static const ReorderingType L   = 1; // left
+  static const ReorderingType MAX = 3; // largest possible
+#else
+  enum ReorderingType {
+    M    = 0, // monotonic
+    NM   = 1, // non-monotonic
+    S    = 1, // swap
+    D    = 2, // discontinuous
+    DL   = 2, // discontinuous, left
+    DR   = 3, // discontinuous, right
+    R    = 0, // right
+    L    = 1, // left
+    MAX  = 3, // largest possible
+    NONE = 4  // largest possible
+  };
+#endif
+  // determine orientation, depending on model:
+  ReorderingType // for first phrase in phrase-based
+  GetOrientation(Range const& cur) const;
+  ReorderingType // for non-first phrases in phrase-based
+  GetOrientation(Range const& prev, Range const& cur) const;
+  ReorderingType // for HReorderingForwardState
+  GetOrientation(Range const& prev, Range const& cur,
+                 Bitmap const& cov) const;
+  ReorderingType // for HReorderingBackwarddState
+  GetOrientation(int const reoDistance) const;
+  LRModel(const std::string &modelType);
+  void
+  ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
+                  const LexicalReordering* producer);
+  LRState*
+  CreateLRState(const InputType &input) const;
+  size_t GetNumberOfTypes() const;
+  size_t GetNumScoreComponents() const;
+  void SetAdditionalScoreComponents(size_t number);
+  LexicalReordering*
+  GetScoreProducer() const {
+    return m_scoreProducer;
+  }
+  ModelType GetModelType() const {
+    return m_modelType;
+  }
+  Direction GetDirection() const {
+    return m_direction;
+  }
+  Condition GetCondition() const {
+    return m_condition;
+  }
+  bool
+  IsPhraseBased()  const {
+    return m_phraseBased;
+  }
+  bool
+  CollapseScores() const {
+    return m_collapseScores;
+  }
+  SparseReordering const*
+  GetSparseReordering() const {
+    return m_sparse.get();
+  }
+private:
+  void
+  SetScoreProducer(LexicalReordering* scoreProducer) {
+    m_scoreProducer = scoreProducer;
+  }
+  std::string const&
+  GetModelString() const {
+    return m_modelString;
+  }
+  std::string m_modelString;
+  LexicalReordering *m_scoreProducer;
+  ModelType m_modelType;
+  bool m_phraseBased;
+  bool m_collapseScores;
+  Direction m_direction;
+  Condition m_condition;
+  size_t m_additionalScoreComponents;
+  boost::scoped_ptr<SparseReordering> m_sparse;
+};
+}

mosesdecoder/moses/FF/LexicalReordering/LRState.cpp ADDED Viewed

	@@ -0,0 +1,88 @@

+// -*- c++ -*-
+#include <vector>
+#include <string>
+#include "LRState.h"
+#include "moses/FF/FFState.h"
+#include "moses/Hypothesis.h"
+#include "moses/Range.h"
+#include "moses/TranslationOption.h"
+#include "moses/Util.h"
+#include "LexicalReordering.h"
+namespace Moses
+{
+void
+LRState::
+CopyScores(ScoreComponentCollection*  accum,
+           const TranslationOption &topt,
+           const InputType& input,
+           ReorderingType reoType) const
+{
+  // don't call this on a bidirectional object
+  UTIL_THROW_IF2(m_direction != LRModel::Backward &&
+                 m_direction != LRModel::Forward,
+                 "Unknown direction: " << m_direction);
+  TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
+                                          ? &topt : m_prevOption);
+  LexicalReordering* producer = m_configuration.GetScoreProducer();
+  Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
+  // The approach here is bizarre! Why create a whole vector and do
+  // vector addition (acumm->PlusEquals) to update a single value? - UG
+  size_t off_remote = m_offset + reoType;
+  size_t off_local  = m_configuration.CollapseScores() ? m_offset : off_remote;
+  UTIL_THROW_IF2(off_local >= producer->GetNumScoreComponents(),
+                 "offset out of vector bounds!");
+  // look up applicable score from vectore of scores
+  if(cached) {
+    UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
+    Scores scores(producer->GetNumScoreComponents(),0);
+    scores[off_local ] = (*cached)[off_remote];
+    accum->PlusEquals(producer, scores);
+  }
+  // else: use default scores (if specified)
+  else if (producer->GetHaveDefaultScores()) {
+    Scores scores(producer->GetNumScoreComponents(),0);
+    scores[off_local] = producer->GetDefaultScore(off_remote);
+    accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
+  }
+  // note: if no default score, no cost
+  const SparseReordering* sparse = m_configuration.GetSparseReordering();
+  if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
+                                   m_direction, accum);
+}
+int
+LRState::
+ComparePrevScores(const TranslationOption *other) const
+{
+  LexicalReordering* producer = m_configuration.GetScoreProducer();
+  const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
+  const Scores* yrScores = other->GetLexReorderingScores(producer);
+  if(myScores == yrScores) return 0;
+  // The pointers are NULL if a phrase pair isn't found in the reordering table.
+  if(yrScores == NULL) return -1;
+  if(myScores == NULL) return  1;
+  size_t stop = m_offset + m_configuration.GetNumberOfTypes();
+  for(size_t i = m_offset; i < stop; i++) {
+    if((*myScores)[i] < (*yrScores)[i]) return -1;
+    if((*myScores)[i] > (*yrScores)[i]) return  1;
+  }
+  return 0;
+}
+}

mosesdecoder/moses/FF/LexicalReordering/LRState.h ADDED Viewed

	@@ -0,0 +1,81 @@

+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#pragma once
+#include <vector>
+#include <string>
+#include "moses/Hypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Range.h"
+#include "moses/Bitmap.h"
+#include "moses/TranslationOption.h"
+#include "moses/FF/FFState.h"
+#include "LRModel.h"
+namespace Moses
+{
+//! Abstract class for lexical reordering model states
+class LRState : public FFState
+{
+public:
+  typedef LRModel::ReorderingType ReorderingType;
+  virtual
+  LRState*
+  Expand(const TranslationOption& hypo, const InputType& input,
+         ScoreComponentCollection* scores) const = 0;
+  static
+  LRState*
+  CreateLRState(const std::vector<std::string>& config,
+                LRModel::Direction dir,
+                const InputType &input);
+protected:
+  const LRModel& m_configuration;
+  // The following is the true direction of the object, which can be
+  // Backward or Forward even if the Configuration has Bidirectional.
+  LRModel::Direction m_direction;
+  size_t m_offset;
+  //forward scores are conditioned on prev option, so need to remember it
+  const TranslationOption *m_prevOption;
+  inline
+  LRState(const LRState *prev,
+          const TranslationOption &topt)
+    : m_configuration(prev->m_configuration)
+    , m_direction(prev->m_direction)
+    , m_offset(prev->m_offset)
+    , m_prevOption(&topt)
+  { }
+  inline
+  LRState(const LRModel &config,
+          LRModel::Direction dir,
+          size_t offset)
+    : m_configuration(config)
+    , m_direction(dir)
+    , m_offset(offset)
+    , m_prevOption(NULL)
+  { }
+  // copy the right scores in the right places, taking into account
+  // forward/backward, offset, collapse
+  void
+  CopyScores(ScoreComponentCollection* scores,
+             const TranslationOption& topt,
+             const InputType& input, ReorderingType reoType) const;
+  int
+  ComparePrevScores(const TranslationOption *other) const;
+};
+}

mosesdecoder/moses/FF/LexicalReordering/LexicalReordering.cpp ADDED Viewed

	@@ -0,0 +1,170 @@

+#include <sstream>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/foreach.hpp>
+#include "moses/FF/FFState.h"
+#include "moses/TranslationOptionList.h"
+#include "LexicalReordering.h"
+#include "LRState.h"
+#include "moses/StaticData.h"
+#include "moses/Util.h"
+#include "moses/InputPath.h"
+using namespace std;
+using namespace boost::algorithm;
+namespace Moses
+{
+LexicalReordering::
+LexicalReordering(const std::string &line)
+  : StatefulFeatureFunction(line,false)
+{
+  VERBOSE(1, "Initializing Lexical Reordering Feature.." << std::endl);
+  map<string,string> sparseArgs;
+  m_haveDefaultScores = false;
+  for (size_t i = 0; i < m_args.size(); ++i) {
+    const vector<string> &args = m_args[i];
+    if (args[0] == "type") {
+      m_configuration.reset(new LRModel(args[1]));
+      m_configuration->SetScoreProducer(this);
+      m_modelTypeString = m_configuration->GetModelString();
+    } else if (args[0] == "input-factor")
+      m_factorsF =Tokenize<FactorType>(args[1]);
+    else if (args[0] == "output-factor")
+      m_factorsE =Tokenize<FactorType>(args[1]);
+    else if (args[0] == "path")
+      m_filePath = args[1];
+    else if (starts_with(args[0], "sparse-"))
+      sparseArgs[args[0].substr(7)] = args[1];
+    else if (args[0] == "default-scores") {
+      vector<string> tokens = Tokenize(args[1],",");
+      for(size_t i=0; i<tokens.size(); i++)
+        m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i])));
+      m_haveDefaultScores = true;
+    } else UTIL_THROW2("Unknown argument " + args[0]);
+  }
+  switch(m_configuration->GetCondition()) {
+  case LRModel::FE:
+  case LRModel::E:
+    UTIL_THROW_IF2(m_factorsE.empty(),
+                   "TL factor mask for lexical reordering is "
+                   << "unexpectedly empty");
+    if(m_configuration->GetCondition() == LRModel::E)
+      break; // else fall through
+  case LRModel::F:
+    UTIL_THROW_IF2(m_factorsF.empty(),
+                   "SL factor mask for lexical reordering is "
+                   << "unexpectedly empty");
+    break;
+  default:
+    UTIL_THROW2("Unknown conditioning option!");
+  }
+  // sanity check: number of default scores
+  size_t numScores
+  = m_numScoreComponents
+    = m_numTuneableComponents
+      = m_configuration->GetNumScoreComponents();
+  UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
+                 "wrong number of default scores (" << m_defaultScores.size()
+                 << ") for lexicalized reordering model (expected "
+                 << m_configuration->GetNumScoreComponents() << ")");
+  m_configuration->ConfigureSparse(sparseArgs, this);
+  // this->Register();
+}
+LexicalReordering::
+~LexicalReordering()
+{ }
+void
+LexicalReordering::
+Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  typedef LexicalReorderingTable LRTable;
+  if (m_filePath.size())
+    m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF,
+                                         m_factorsE, std::vector<FactorType>()));
+}
+Scores
+LexicalReordering::
+GetProb(const Phrase& f, const Phrase& e) const
+{
+  return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
+}
+FFState*
+LexicalReordering::
+EvaluateWhenApplied(const Hypothesis& hypo,
+                    const FFState* prev_state,
+                    ScoreComponentCollection* out) const
+{
+  VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
+  const LRState *prev = static_cast<const LRState *>(prev_state);
+  LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
+  VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl);
+  return next_state;
+}
+FFState const*
+LexicalReordering::EmptyHypothesisState(const InputType &input) const
+{
+  return m_configuration->CreateLRState(input);
+}
+bool
+LexicalReordering::
+IsUseable(const FactorMask &mask) const
+{
+  BOOST_FOREACH(FactorType const& f, m_factorsE) {
+    if (!mask[f]) return false;
+  }
+  return true;
+}
+void
+LexicalReordering::
+SetCache(TranslationOption& to) const
+{
+  if (to.GetLexReorderingScores(this)) return;
+  // Scores were were set already (e.g., by sampling phrase table)
+  if (m_table) {
+    Phrase const& sphrase = to.GetInputPath().GetPhrase();
+    Phrase const& tphrase = to.GetTargetPhrase();
+    to.CacheLexReorderingScores(*this, this->GetProb(sphrase,tphrase));
+  } else { // e.g. OOV with Mmsapt
+    // Scores vals(GetNumScoreComponents(), 0);
+    // to.CacheLexReorderingScores(*this, vals);
+  }
+}
+LRModel const&
+LexicalReordering
+::GetModel() const
+{
+  return *m_configuration;
+}
+void
+LexicalReordering::
+SetCache(TranslationOptionList& tol) const
+{
+  BOOST_FOREACH(TranslationOption* to, tol)
+  this->SetCache(*to);
+}
+}

mosesdecoder/moses/FF/LexicalReordering/LexicalReordering.h ADDED Viewed

	@@ -0,0 +1,106 @@

+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#pragma once
+#include <string>
+#include <vector>
+#include <boost/scoped_ptr.hpp>
+#include "moses/Factor.h"
+#include "moses/Phrase.h"
+#include "moses/TypeDef.h"
+#include "moses/Util.h"
+#include "moses/Range.h"
+#include "moses/TranslationOption.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "util/exception.hh"
+#include "LRState.h"
+#include "LexicalReorderingTable.h"
+#include "SparseReordering.h"
+namespace Moses
+{
+class Factor;
+class Phrase;
+class Hypothesis;
+class InputType;
+// implementation of lexical reordering (Tilman ...) for phrase-based
+// decoding
+class LexicalReordering : public StatefulFeatureFunction
+{
+public:
+  LexicalReordering(const std::string &line);
+  virtual ~LexicalReordering();
+  void Load(AllOptions::ptr const& opts);
+  virtual
+  bool
+  IsUseable(const FactorMask &mask) const;
+  virtual
+  FFState const*
+  EmptyHypothesisState(const InputType &input) const;
+  void
+  InitializeForInput(ttasksptr const& ttask) {
+    if (m_table) m_table->InitializeForInput(ttask);
+  }
+  Scores
+  GetProb(const Phrase& f, const Phrase& e) const;
+  virtual
+  FFState*
+  EvaluateWhenApplied(const Hypothesis& cur_hypo,
+                      const FFState* prev_state,
+                      ScoreComponentCollection* accumulator) const;
+  virtual
+  FFState*
+  EvaluateWhenApplied(const ChartHypothesis&, int featureID,
+                      ScoreComponentCollection*) const {
+    UTIL_THROW2("LexicalReordering is not valid for chart decoder");
+  }
+  bool
+  GetHaveDefaultScores() {
+    return m_haveDefaultScores;
+  }
+  float
+  GetDefaultScore( size_t i ) {
+    return m_defaultScores[i];
+  }
+  virtual
+  void
+  SetCache(TranslationOption& to) const;
+  virtual
+  void
+  SetCache(TranslationOptionList& tol) const;
+private:
+  bool DecodeCondition(std::string s);
+  bool DecodeDirection(std::string s);
+  bool DecodeNumFeatureFunctions(std::string s);
+  boost::scoped_ptr<LRModel> m_configuration;
+  std::string m_modelTypeString;
+  std::vector<std::string> m_modelType;
+  boost::scoped_ptr<LexicalReorderingTable> m_table;
+  std::vector<LRModel::Condition> m_condition;
+  std::vector<FactorType> m_factorsE, m_factorsF;
+  std::string m_filePath;
+  bool m_haveDefaultScores;
+  Scores m_defaultScores;
+public:
+  LRModel const& GetModel() const;
+};
+}

mosesdecoder/moses/FF/LexicalReordering/PhraseBasedReorderingState.cpp ADDED Viewed

	@@ -0,0 +1,72 @@

+#include "PhraseBasedReorderingState.h"
+namespace Moses
+{
+// ===========================================================================
+// PHRASE BASED REORDERING STATE
+// ===========================================================================
+bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
+PhraseBasedReorderingState::
+PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
+                           const TranslationOption &topt)
+  : LRState(prev, topt)
+  , m_prevRange(topt.GetSourceWordsRange())
+  , m_first(false)
+{ }
+PhraseBasedReorderingState::
+PhraseBasedReorderingState(const LRModel &config,
+                           LRModel::Direction dir, size_t offset)
+  : LRState(config, dir, offset)
+  , m_prevRange(NOT_FOUND,NOT_FOUND)
+  , m_first(true)
+{ }
+size_t PhraseBasedReorderingState::hash() const
+{
+  size_t ret;
+  ret = hash_value(m_prevRange);
+  boost::hash_combine(ret, m_direction);
+  return ret;
+}
+bool PhraseBasedReorderingState::operator==(const FFState& o) const
+{
+  if (&o == this) return true;
+  const PhraseBasedReorderingState &other = static_cast<const PhraseBasedReorderingState&>(o);
+  if (m_prevRange == other.m_prevRange) {
+    if (m_direction == LRModel::Forward) {
+      int compareScore = ComparePrevScores(other.m_prevOption);
+      return compareScore == 0;
+    } else {
+      return true;
+    }
+  } else {
+    return false;
+  }
+}
+LRState*
+PhraseBasedReorderingState::
+Expand(const TranslationOption& topt, const InputType& input,
+       ScoreComponentCollection* scores) const
+{
+  // const LRModel::ModelType modelType = m_configuration.GetModelType();
+  if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) {
+    LRModel const& lrmodel = m_configuration;
+    Range const cur = topt.GetSourceWordsRange();
+    LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
+                                       : lrmodel.GetOrientation(m_prevRange,cur));
+    CopyScores(scores, topt, input, reoType);
+  }
+  return new PhraseBasedReorderingState(this, topt);
+}
+}

mosesdecoder/moses/FF/LexicalReordering/ReorderingStack.cpp ADDED Viewed

	@@ -0,0 +1,87 @@

+/*
+ * ReorderingStack.cpp
+ ** Author: Ankit K. Srivastava
+ ** Date: Jan 26, 2010
+*/
+#include "ReorderingStack.h"
+#include <vector>
+namespace Moses
+{
+size_t ReorderingStack::hash() const
+{
+  std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end());
+  return ret;
+}
+bool ReorderingStack::operator==(const ReorderingStack& o) const
+{
+  const ReorderingStack& other = static_cast<const ReorderingStack&>(o);
+  return m_stack == other.m_stack;
+}
+// Method to push (shift element into the stack and reduce if reqd)
+int ReorderingStack::ShiftReduce(Range input_span)
+{
+  int distance;  // value to return: the initial distance between this and previous span
+  // stack is empty
+  if(m_stack.empty()) {
+    m_stack.push_back(input_span);
+    return input_span.GetStartPos() + 1; // - (-1)
+  }
+  // stack is non-empty
+  Range prev_span = m_stack.back(); //access last element added
+  //calculate the distance we are returning
+  if(input_span.GetStartPos() > prev_span.GetStartPos()) {
+    distance = input_span.GetStartPos() - prev_span.GetEndPos();
+  } else {
+    distance = input_span.GetEndPos() - prev_span.GetStartPos();
+  }
+  if(distance == 1) { //monotone
+    m_stack.pop_back();
+    Range new_span(prev_span.GetStartPos(), input_span.GetEndPos());
+    Reduce(new_span);
+  } else if(distance == -1) { //swap
+    m_stack.pop_back();
+    Range new_span(input_span.GetStartPos(), prev_span.GetEndPos());
+    Reduce(new_span);
+  } else {      // discontinuous
+    m_stack.push_back(input_span);
+  }
+  return distance;
+}
+// Method to reduce, if possible the spans
+void ReorderingStack::Reduce(Range current)
+{
+  bool cont_loop = true;
+  while (cont_loop && m_stack.size() > 0) {
+    Range previous = m_stack.back();
+    if(current.GetStartPos() - previous.GetEndPos() == 1) { //mono&merge
+      m_stack.pop_back();
+      Range t(previous.GetStartPos(), current.GetEndPos());
+      current = t;
+    } else if(previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
+      m_stack.pop_back();
+      Range t(current.GetStartPos(), previous.GetEndPos());
+      current = t;
+    } else { // discontinuous, no more merging
+      cont_loop=false;
+    }
+  } // finished reducing, exit
+  // add to stack
+  m_stack.push_back(current);
+}
+}

mosesdecoder/moses/FF/LexicalReordering/ReorderingStack.h ADDED Viewed

	@@ -0,0 +1,40 @@

+/*
+ * ReorderingStack.h
+ ** Author: Ankit K. Srivastava
+ ** Date: Jan 26, 2010
+ */
+#pragma once
+//#include <string>
+#include <vector>
+//#include "Factor.h"
+//#include "Phrase.h"
+//#include "TypeDef.h"
+//#include "Util.h"
+#include "moses/Range.h"
+namespace Moses
+{
+/** @todo what is this?
+ */
+class ReorderingStack
+{
+private:
+  std::vector<Range> m_stack;
+public:
+  size_t hash() const;
+  bool operator==(const ReorderingStack& other) const;
+  int ShiftReduce(Range input_span);
+private:
+  void Reduce(Range input_span);
+};
+}

mosesdecoder/moses/FF/LexicalReordering/SparseReordering.cpp ADDED Viewed

	@@ -0,0 +1,315 @@

+#include <fstream>
+#include "moses/FactorCollection.h"
+#include "moses/InputPath.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/string_stream.hh"
+#include "util/tokenize_piece.hh"
+#include "LexicalReordering.h"
+#include "SparseReordering.h"
+#include <boost/algorithm/string/predicate.hpp>
+using namespace std;
+using namespace boost::algorithm;
+namespace Moses
+{
+const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
+{
+  static string kSep = "-";
+  static string name;
+  util::StringStream buf;
+  // type side position id word reotype
+  if (type == Phrase) {
+    buf << "phr";
+  } else if (type == Stack) {
+    buf << "stk";
+  } else if (type == Between) {
+    buf << "btn";
+  }
+  buf << kSep;
+  if (side == Source) {
+    buf << "src";
+  } else if (side == Target) {
+    buf << "tgt";
+  }
+  buf << kSep;
+  if (position == First) {
+    buf << "first";
+  } else if (position == Last) {
+    buf << "last";
+  }
+  buf << kSep;
+  buf << wordListId;
+  buf << kSep;
+  if (isCluster) buf << "cluster_";
+  buf << word->GetString();
+  buf << kSep;
+  buf << reoType;
+  name = buf.str();
+  return name;
+}
+SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
+  : m_producer(producer)
+  , m_useWeightMap(false)
+{
+  static const string kSource= "source";
+  static const string kTarget = "target";
+  for (map<string,string>::const_iterator i = config.begin(); i != config.end(); ++i) {
+    vector<string> fields = Tokenize(i->first, "-");
+    if (fields[0] == "words") {
+      UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering word list name should be sparse-words-(source|target)-<id>");
+      if (fields[1] == kSource) {
+        ReadWordList(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceWordLists);
+      } else if (fields[1] == kTarget) {
+        ReadWordList(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetWordLists);
+      } else {
+        UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
+      }
+    } else if (fields[0] == "clusters") {
+      UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering cluster name should be sparse-clusters-(source|target)-<id>");
+      if (fields[1] == kSource) {
+        ReadClusterMap(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceClusterMaps);
+      } else if (fields[1] == kTarget) {
+        ReadClusterMap(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetClusterMaps);
+      } else {
+        UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
+      }
+    } else if (fields[0] == "weights") {
+      ReadWeightMap(i->second);
+      m_useWeightMap = true;
+      for (int reoType=0; reoType<=LRModel::MAX; ++reoType) {
+        util::StringStream buf;
+        buf << reoType;
+        m_featureMap2.push_back(m_producer->GetFeatureName(buf.str()));
+      }
+    } else if (fields[0] == "phrase") {
+      m_usePhrase = true;
+    } else if (fields[0] == "stack") {
+      m_useStack = true;
+    } else if (fields[0] == "between") {
+      m_useBetween = true;
+    } else {
+      UTIL_THROW(util::Exception, "Unable to parse sparse reordering option: " << i->first);
+    }
+  }
+}
+void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster)
+{
+  for (size_t type = SparseReorderingFeatureKey::Stack;
+       type <= SparseReorderingFeatureKey::Between; ++type) {
+    for (size_t position = SparseReorderingFeatureKey::First;
+         position <= SparseReorderingFeatureKey::Last; ++position) {
+      for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) {
+        SparseReorderingFeatureKey
+        key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
+            factor, isCluster,
+            static_cast<SparseReorderingFeatureKey::Position>(position),
+            side, static_cast<LRModel::ReorderingType>(reoType));
+        m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
+      }
+    }
+  }
+}
+void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists)
+{
+  ifstream fh(filename.c_str());
+  UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
+  string line;
+  pWordLists->push_back(WordList());
+  pWordLists->back().first = id;
+  while (getline(fh,line)) {
+    //TODO: StringPiece
+    const Factor* factor = FactorCollection::Instance().AddFactor(line);
+    pWordLists->back().second.insert(factor);
+    PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false);
+  }
+}
+void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps)
+{
+  pClusterMaps->push_back(ClusterMap());
+  pClusterMaps->back().first = id;
+  util::FilePiece file(filename.c_str());
+  StringPiece line;
+  while (true) {
+    try {
+      line = file.ReadLine();
+    } catch (const util::EndOfFileException &e) {
+      break;
+    }
+    util::TokenIter<util::SingleCharacter, true> lineIter(line,util::SingleCharacter('\t'));
+    if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing word): '" << line << "'");
+    const Factor* wordFactor = FactorCollection::Instance().AddFactor(*lineIter);
+    ++lineIter;
+    if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
+    const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
+    pClusterMaps->back().second[wordFactor] = idFactor;
+    PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true);
+  }
+}
+void SparseReordering::AddFeatures(
+  SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+  const Word& word, SparseReorderingFeatureKey::Position position,
+  LRModel::ReorderingType reoType,
+  ScoreComponentCollection* scores) const
+{
+  const Factor*  wordFactor = word.GetFactor(0);
+  const vector<WordList>* wordLists;
+  const vector<ClusterMap>* clusterMaps;
+  if (side == SparseReorderingFeatureKey::Source) {
+    wordLists = &m_sourceWordLists;
+    clusterMaps = &m_sourceClusterMaps;
+  } else {
+    wordLists = &m_targetWordLists;
+    clusterMaps = &m_targetClusterMaps;
+  }
+  for (size_t id = 0; id < wordLists->size(); ++id) {
+    if ((*wordLists)[id].second.find(wordFactor) == (*wordLists)[id].second.end()) continue;
+    SparseReorderingFeatureKey key(id, type, wordFactor, false, position, side, reoType);
+    FeatureMap::const_iterator fmi = m_featureMap.find(key);
+    assert(fmi != m_featureMap.end());
+    if (m_useWeightMap) {
+      WeightMap::const_iterator wmi = m_weightMap.find(fmi->second.name());
+      if (wmi != m_weightMap.end()) {
+        if (wmi->second != 0) {
+          scores->SparsePlusEquals(m_featureMap2[reoType], wmi->second);
+        }
+      }
+    } else {
+      scores->SparsePlusEquals(fmi->second, 1.0);
+    }
+  }
+  for (size_t id = 0; id < clusterMaps->size(); ++id) {
+    const ClusterMap& clusterMap = (*clusterMaps)[id];
+    boost::unordered_map<const Factor*, const Factor*>::const_iterator clusterIter
+    = clusterMap.second.find(wordFactor);
+    if (clusterIter != clusterMap.second.end()) {
+      SparseReorderingFeatureKey key(id, type, clusterIter->second, true, position, side, reoType);
+      FeatureMap::const_iterator fmi = m_featureMap.find(key);
+      assert(fmi != m_featureMap.end());
+      if (m_useWeightMap) {
+        WeightMap::const_iterator wmi = m_weightMap.find(fmi->second.name());
+        if (wmi != m_weightMap.end()) {
+          if (wmi->second != 0) {
+            scores->SparsePlusEquals(m_featureMap2[reoType], wmi->second);
+          }
+        }
+      } else {
+        scores->SparsePlusEquals(fmi->second, 1.0);
+      }
+    }
+  }
+}
+void SparseReordering::CopyScores(
+  const TranslationOption& currentOpt,
+  const TranslationOption* previousOpt,
+  const InputType& input,
+  LRModel::ReorderingType reoType,
+  LRModel::Direction direction,
+  ScoreComponentCollection* scores) const
+{
+  if (m_useBetween && direction == LRModel::Backward &&
+      (reoType == LRModel::D || reoType == LRModel::DL || reoType == LRModel::DR)) {
+    size_t gapStart, gapEnd;
+    //NB: Using a static cast for speed, but could be nasty if
+    //using non-sentence input
+    const Sentence& sentence = static_cast<const Sentence&>(input);
+    const Range& currentRange = currentOpt.GetSourceWordsRange();
+    if (previousOpt) {
+      const Range& previousRange = previousOpt->GetSourceWordsRange();
+      if (previousRange < currentRange) {
+        gapStart = previousRange.GetEndPos() + 1;
+        gapEnd = currentRange.GetStartPos();
+      } else {
+        gapStart = currentRange.GetEndPos() + 1;
+        gapEnd = previousRange.GetStartPos();
+      }
+    } else {
+      //start of sentence
+      gapStart = 0;
+      gapEnd  = currentRange.GetStartPos();
+    }
+    assert(gapStart < gapEnd);
+    for (size_t i = gapStart; i < gapEnd; ++i) {
+      AddFeatures(SparseReorderingFeatureKey::Between,
+                  SparseReorderingFeatureKey::Source, sentence.GetWord(i),
+                  SparseReorderingFeatureKey::First, reoType, scores);
+    }
+  }
+  //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
+  //phrase (backward)
+  //stack (forward)
+  SparseReorderingFeatureKey::Type type;
+  if (direction == LRModel::Forward) {
+    if (!m_useStack) return;
+    type = SparseReorderingFeatureKey::Stack;
+  } else if (direction == LRModel::Backward) {
+    if (!m_usePhrase) return;
+    type = SparseReorderingFeatureKey::Phrase;
+  } else {
+    //Shouldn't be called for bidirectional
+    //keep compiler happy
+    type = SparseReorderingFeatureKey::Phrase;
+    assert(!"Shouldn't call CopyScores() with bidirectional direction");
+  }
+  const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
+  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
+              SparseReorderingFeatureKey::First, reoType, scores);
+  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
+  const Phrase& targetPhrase = currentOpt.GetTargetPhrase();
+  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
+              SparseReorderingFeatureKey::First, reoType, scores);
+  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
+}
+void SparseReordering::ReadWeightMap(const string& filename)
+{
+  util::FilePiece file(filename.c_str());
+  StringPiece line;
+  while (true) {
+    try {
+      line = file.ReadLine();
+    } catch (const util::EndOfFileException &e) {
+      break;
+    }
+    util::TokenIter<util::SingleCharacter, true> lineIter(line,util::SingleCharacter(' '));
+    UTIL_THROW_IF2(!lineIter, "Malformed weight line: '" << line << "'");
+    const std::string& name = lineIter->as_string();
+    ++lineIter;
+    UTIL_THROW_IF2(!lineIter, "Malformed weight line: '" << line << "'");
+    float weight = Moses::Scan<float>(lineIter->as_string());
+    std::pair< WeightMap::iterator, bool> inserted = m_weightMap.insert( std::make_pair(name, weight) );
+    UTIL_THROW_IF2(!inserted.second, "Duplicate weight: '" << name << "'");
+  }
+}
+} //namespace

mosesdecoder/moses/FF/LexicalReordering/SparseReordering.h ADDED Viewed

	@@ -0,0 +1,139 @@

+#ifndef moses_FF_LexicalReordering_SparseReordering_h
+#define moses_FF_LexicalReordering_SparseReordering_h
+/**
+ * Sparse reordering features for phrase-based MT, following Cherry (NAACL, 2013)
+**/
+#include <functional>
+#include <map>
+#include <string>
+#include <vector>
+#include <boost/unordered_set.hpp>
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+#include "moses/FeatureVector.h"
+#include "moses/ScoreComponentCollection.h"
+#include "LRState.h"
+/**
+ Configuration of sparse reordering:
+  The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
+  sparse-words-(source|target)-<id>=<filename>  -- Features which fire for the words in the list
+  sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
+                                     of cluster file TBD
+  sparse-phrase                    -- Add features which depend on the current phrase (backward)
+  sparse-stack                     -- Add features which depend on the previous phrase, or
+                                      top of stack. (forward)
+  sparse-between                   -- Add features which depend on words between previous phrase
+                                      (or top of stack) and current phrase.
+**/
+namespace Moses
+{
+/**
+ * Used to store pre-calculated feature names.
+**/
+struct SparseReorderingFeatureKey {
+  size_t id;
+  enum Type {Stack, Phrase, Between} type;
+  const Factor* word;
+  bool isCluster;
+  enum Position {First, Last} position;
+  enum Side {Source, Target} side;
+  LRState::ReorderingType reoType;
+  SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
+                             Position position_, Side side_, LRState::ReorderingType reoType_)
+    : id(id_), type(type_), word(word_), isCluster(isCluster_),
+      position(position_), side(side_), reoType(reoType_) {
+  }
+  const std::string& Name(const std::string& wordListId) ;
+};
+struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
+  std::size_t operator()(const SparseReorderingFeatureKey& key) const {
+    //TODO: can we just hash the memory?
+    //not sure, there could be random padding
+    std::size_t seed = 0;
+    seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
+    seed = util::MurmurHashNative(&key.type, sizeof(key.type), seed);
+    seed = util::MurmurHashNative(&key.word, sizeof(key.word), seed);
+    seed = util::MurmurHashNative(&key.isCluster, sizeof(key.isCluster), seed);
+    seed = util::MurmurHashNative(&key.position, sizeof(key.position), seed);
+    seed = util::MurmurHashNative(&key.side, sizeof(key.side), seed);
+    seed = util::MurmurHashNative(&key.reoType, sizeof(key.reoType), seed);
+    return seed;
+  }
+};
+struct EqualsSparseReorderingFeatureKey :
+  public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
+  bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
+    //TODO: Can we just compare the memory?
+    return left.id == right.id &&  left.type == right.type && left.word == right.word &&
+           left.position == right.position && left.side == right.side &&
+           left.reoType == right.reoType;
+  }
+};
+class SparseReordering
+{
+public:
+  SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
+  //If direction is backward the options will be different, for forward they will be the same
+  void CopyScores(const TranslationOption& currentOpt,
+                  const TranslationOption* previousOpt,
+                  const InputType& input,
+                  LRModel::ReorderingType reoType,
+                  LRModel::Direction direction,
+                  ScoreComponentCollection* scores) const ;
+private:
+  const LexicalReordering* m_producer;
+  typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
+  std::vector<WordList> m_sourceWordLists;
+  std::vector<WordList> m_targetWordLists;
+  typedef std::pair<std::string, boost::unordered_map<const Factor*, const Factor*> > ClusterMap; //id and map
+  std::vector<ClusterMap> m_sourceClusterMaps;
+  std::vector<ClusterMap> m_targetClusterMaps;
+  bool m_usePhrase;
+  bool m_useBetween;
+  bool m_useStack;
+  typedef boost::unordered_map<SparseReorderingFeatureKey, FName, HashSparseReorderingFeatureKey, EqualsSparseReorderingFeatureKey> FeatureMap;
+  FeatureMap m_featureMap;
+  typedef boost::unordered_map<std::string, float> WeightMap;
+  WeightMap m_weightMap;
+  bool m_useWeightMap;
+  std::vector<FName> m_featureMap2;
+  void ReadWordList(const std::string& filename, const std::string& id,
+                    SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
+  void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
+  void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
+  void ReadWeightMap(const std::string& filename);
+  void AddFeatures(
+    SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+    const Word& word, SparseReorderingFeatureKey::Position position,
+    LRModel::ReorderingType reoType,
+    ScoreComponentCollection* scores) const;
+};
+} //namespace
+#endif

mosesdecoder/moses/FF/MaxSpanFreeNonTermSource.h ADDED Viewed

	@@ -0,0 +1,53 @@

+#pragma once
+#include <string>
+#include "StatelessFeatureFunction.h"
+#include "moses/Word.h"
+namespace Moses
+{
+// -inf if left-most or right-most non-term is over a set span
+class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
+{
+public:
+  MaxSpanFreeNonTermSource(const std::string &line);
+  virtual bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  virtual void EvaluateInIsolation(const Phrase &source
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedScores) const;
+  virtual void EvaluateWithSourceContext(const InputType &input
+                                         , const InputPath &inputPath
+                                         , const TargetPhrase &targetPhrase
+                                         , const StackVec *stackVec
+                                         , ScoreComponentCollection &scoreBreakdown
+                                         , ScoreComponentCollection *estimatedScores = NULL) const;
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {
+  }
+  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
+                                   ScoreComponentCollection* accumulator) const {
+  }
+  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                                   ScoreComponentCollection* accumulator) const {
+  }
+  void SetParameter(const std::string& key, const std::string& value);
+  std::vector<float> DefaultWeights() const;
+protected:
+  int m_maxSpan;
+  std::string m_glueTargetLHSStr;
+  Word m_glueTargetLHS;
+};
+}

mosesdecoder/moses/FF/NieceTerminal.cpp ADDED Viewed

	@@ -0,0 +1,110 @@

+#include <vector>
+#include "NieceTerminal.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TargetPhrase.h"
+#include "moses/ChartCellLabel.h"
+#include "moses/InputType.h"
+using namespace std;
+namespace Moses
+{
+NieceTerminal::NieceTerminal(const std::string &line)
+  :StatelessFeatureFunction(line,true)
+  ,m_hardConstraint(false)
+{
+  ReadParameters();
+}
+std::vector<float> NieceTerminal::DefaultWeights() const
+{
+  UTIL_THROW_IF2(m_numScoreComponents != 1,
+                 "NieceTerminal must only have 1 score");
+  vector<float> ret(1, 1);
+  return ret;
+}
+void NieceTerminal::EvaluateInIsolation(const Phrase &source
+                                        , const TargetPhrase &targetPhrase
+                                        , ScoreComponentCollection &scoreBreakdown
+                                        , ScoreComponentCollection &estimatedScores) const
+{
+  targetPhrase.SetRuleSource(source);
+}
+void NieceTerminal::EvaluateWithSourceContext(const InputType &input
+    , const InputPath &inputPath
+    , const TargetPhrase &targetPhrase
+    , const StackVec *stackVec
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection *estimatedScores) const
+{
+  assert(stackVec);
+  const Phrase *ruleSource = targetPhrase.GetRuleSource();
+  assert(ruleSource);
+  boost::unordered_set<Word> terms;
+  for (size_t i = 0; i < ruleSource->GetSize(); ++i) {
+    const Word &word = ruleSource->GetWord(i);
+    if (!word.IsNonTerminal()) {
+      terms.insert(word);
+    }
+  }
+  for (size_t i = 0; i < stackVec->size(); ++i) {
+    const ChartCellLabel &cell = *stackVec->at(i);
+    const Range &ntRange = cell.GetCoverage();
+    bool containTerm = ContainTerm(input, ntRange, terms);
+    if (containTerm) {
+      //cerr << "ruleSource=" << *ruleSource << " ";
+      //cerr << "ntRange=" << ntRange << endl;
+      // non-term contains 1 of the terms in the rule.
+      float score = m_hardConstraint ? - std::numeric_limits<float>::infinity() : 1;
+      scoreBreakdown.PlusEquals(this, score);
+      return;
+    }
+  }
+}
+void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo,
+                                        ScoreComponentCollection* accumulator) const
+{}
+void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
+                                        ScoreComponentCollection* accumulator) const
+{}
+bool NieceTerminal::ContainTerm(const InputType &input,
+                                const Range &ntRange,
+                                const boost::unordered_set<Word> &terms) const
+{
+  boost::unordered_set<Word>::const_iterator iter;
+  for (size_t pos = ntRange.GetStartPos(); pos <= ntRange.GetEndPos(); ++pos) {
+    const Word &word = input.GetWord(pos);
+    iter = terms.find(word);
+    if (iter != terms.end()) {
+      return true;
+    }
+  }
+  return false;
+}
+void NieceTerminal::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "hard-constraint") {
+    m_hardConstraint = Scan<bool>(value);
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+}

mosesdecoder/moses/FF/NieceTerminal.h ADDED Viewed

	@@ -0,0 +1,54 @@

+#pragma once
+#include <boost/unordered_set.hpp>
+#include <string>
+#include "StatelessFeatureFunction.h"
+namespace Moses
+{
+class Range;
+class Word;
+// 1 of the non-term covers the same word as 1 of the terminals
+class NieceTerminal : public StatelessFeatureFunction
+{
+public:
+  NieceTerminal(const std::string &line);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const;
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const;
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {
+  }
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const;
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const;
+  void SetParameter(const std::string& key, const std::string& value);
+  std::vector<float> DefaultWeights() const;
+protected:
+  bool m_hardConstraint;
+  bool ContainTerm(const InputType &input,
+                   const Range &ntRange,
+                   const boost::unordered_set<Word> &terms) const;
+};
+}

mosesdecoder/moses/FF/OSM-Feature/OpSequenceModel.h ADDED Viewed

	@@ -0,0 +1,68 @@

+#pragma once
+#include <string>
+#include <map>
+#include <vector>
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/Manager.h"
+#include "moses/FF/OSM-Feature/osmHyp.h"
+#include "KenOSM.h"
+namespace Moses
+{
+class OpSequenceModel : public StatefulFeatureFunction
+{
+public:
+  OSMLM* OSM;
+  float unkOpProb;
+  int sFactor;	// Source Factor ...
+  int tFactor;	// Target Factor ...
+  int numFeatures;   // Number of features used ...
+  util::LoadMethod load_method; // method to load model
+  OpSequenceModel(const std::string &line);
+  ~OpSequenceModel();
+  void readLanguageModel(const char *);
+  void Load(AllOptions::ptr const& opts);
+  FFState* EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+  virtual FFState* EvaluateWhenApplied(
+    const ChartHypothesis& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const;
+  void  EvaluateInIsolation(const Phrase &source
+                            , const TargetPhrase &targetPhrase
+                            , ScoreComponentCollection &scoreBreakdown
+                            , ScoreComponentCollection &estimatedScores) const;
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+  virtual std::string GetScoreProducerWeightShortName(unsigned idx=0) const;
+  std::vector<float> GetFutureScores(const Phrase &source, const Phrase &target) const;
+  void SetParameter(const std::string& key, const std::string& value);
+  bool IsUseable(const FactorMask &mask) const;
+protected:
+  typedef std::pair<Phrase, Phrase> ParallelPhrase;
+  typedef std::vector<float> Scores;
+  std::map<ParallelPhrase, Scores> m_futureCost;
+  std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+  std::set <int> targetNullWords;
+  std::string m_lmPath;
+};
+} // namespace

mosesdecoder/moses/FF/PhraseBoundaryFeature.cpp ADDED Viewed

	@@ -0,0 +1,118 @@

+#include "PhraseBoundaryFeature.h"
+#include "moses/Hypothesis.h"
+#include "moses/TranslationOption.h"
+#include "moses/InputPath.h"
+#include "util/string_stream.hh"
+using namespace std;
+namespace Moses
+{
+size_t PhraseBoundaryState::hash() const
+{
+  size_t ret = hash_value(*m_targetWord);
+  boost::hash_combine(ret, hash_value(*m_sourceWord));
+  return ret;
+}
+bool PhraseBoundaryState::operator==(const FFState& other) const
+{
+  const PhraseBoundaryState& rhs = static_cast<const PhraseBoundaryState&>(other);
+  bool ret = *m_targetWord == *rhs.m_targetWord && *m_sourceWord == *rhs.m_sourceWord;
+  return ret;
+}
+/////////////////////////////////////////////////////////////////////////////////////
+PhraseBoundaryFeature::PhraseBoundaryFeature(const std::string &line)
+  : StatefulFeatureFunction(0, line)
+{
+  std::cerr << "Initializing source word deletion feature.." << std::endl;
+  ReadParameters();
+}
+void PhraseBoundaryFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "source") {
+    m_sourceFactors = Tokenize<FactorType>(value, ",");
+  } else if (key == "target") {
+    m_targetFactors = Tokenize<FactorType>(value, ",");
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+const FFState* PhraseBoundaryFeature::EmptyHypothesisState(const InputType &) const
+{
+  return new PhraseBoundaryState(NULL,NULL);
+}
+void PhraseBoundaryFeature::AddFeatures(
+  const Word* leftWord, const Word* rightWord, const FactorList& factors, const string& side,
+  ScoreComponentCollection* scores) const
+{
+  for (size_t i = 0; i < factors.size(); ++i) {
+    util::StringStream name;
+    name << side << ":";
+    name << factors[i];
+    name << ":";
+    if (leftWord) {
+      name << leftWord->GetFactor(factors[i])->GetString();
+    } else {
+      name << BOS_;
+    }
+    name << ":";
+    if (rightWord) {
+      name << rightWord->GetFactor(factors[i])->GetString();
+    } else {
+      name << EOS_;
+    }
+    scores->PlusEquals(this,name.str(),1);
+  }
+}
+FFState* PhraseBoundaryFeature::EvaluateWhenApplied
+(const Hypothesis& cur_hypo, const FFState* prev_state,
+ ScoreComponentCollection* scores) const
+{
+  const PhraseBoundaryState* pbState = static_cast<const PhraseBoundaryState*>(prev_state);
+  const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+  if (targetPhrase.GetSize() == 0) {
+    return new PhraseBoundaryState(*pbState);
+  }
+  const Word* leftTargetWord = pbState->GetTargetWord();
+  const Word* rightTargetWord = &(targetPhrase.GetWord(0));
+  AddFeatures(leftTargetWord,rightTargetWord,m_targetFactors,"tgt",scores);
+  const Phrase& sourcePhrase = cur_hypo.GetTranslationOption().GetInputPath().GetPhrase();
+  const Word* leftSourceWord = pbState->GetSourceWord();
+  const Word* rightSourceWord = &(sourcePhrase.GetWord(0));
+  AddFeatures(leftSourceWord,rightSourceWord,m_sourceFactors,"src",scores);
+  const Word* endSourceWord = &(sourcePhrase.GetWord(sourcePhrase.GetSize()-1));
+  const Word* endTargetWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
+  //if end of sentence add EOS
+  if (cur_hypo.IsSourceCompleted()) {
+    AddFeatures(endSourceWord,NULL,m_sourceFactors,"src",scores);
+    AddFeatures(endTargetWord,NULL,m_targetFactors,"tgt",scores);
+  }
+  return new PhraseBoundaryState(endSourceWord,endTargetWord);
+}
+bool PhraseBoundaryFeature::IsUseable(const FactorMask &mask) const
+{
+  for (size_t i = 0; i < m_targetFactors.size(); ++i) {
+    const FactorType &factor = m_targetFactors[i];
+    if (!mask[factor]) {
+      return false;
+    }
+  }
+  return true;
+}
+}

mosesdecoder/moses/FF/PhraseLengthFeature.h ADDED Viewed

	@@ -0,0 +1,54 @@

+#ifndef moses_PhraseLengthFeature_h
+#define moses_PhraseLengthFeature_h
+#include <stdexcept>
+#include <string>
+#include <map>
+#include "StatelessFeatureFunction.h"
+#include "moses/Word.h"
+#include "moses/FactorCollection.h"
+namespace Moses
+{
+/** Sets the features for length of source phrase, target phrase, both.
+ */
+class PhraseLengthFeature : public StatelessFeatureFunction
+{
+public:
+  PhraseLengthFeature(const std::string &line);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const {
+  }
+  void EvaluateWhenApplied(const ChartHypothesis& hypo,
+                           ScoreComponentCollection*) const {
+  }
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const {
+  }
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {
+  }
+  virtual void EvaluateInIsolation(const Phrase &source
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedScores) const;
+};
+}
+#endif // moses_PhraseLengthFeature_h

mosesdecoder/moses/FF/PhraseOrientationFeature.h ADDED Viewed

	@@ -0,0 +1,431 @@

+//
+// REFERENCE
+// ---------
+// When using this feature, please cite:
+//
+// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+//
+#pragma once
+#include <bitset>
+#include <string>
+#include <vector>
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+#include "moses/Factor.h"
+#include "phrase-extract/PhraseOrientation.h"
+#include "moses/PP/OrientationPhraseProperty.h"
+#include <boost/unordered_set.hpp>
+namespace Moses
+{
+class PhraseOrientationFeatureState : public FFState
+{
+public:
+  friend class PhraseOrientationFeature;
+  PhraseOrientationFeatureState(bool distinguishStates, bool useSparseWord, bool useSparseNT)
+    : m_leftBoundaryNonTerminalL2RScores(3,0)
+    , m_rightBoundaryNonTerminalR2LScores(3,0)
+    , m_leftBoundaryNonTerminalL2RPossibleFutureOrientations(0x7)
+    , m_rightBoundaryNonTerminalR2LPossibleFutureOrientations(0x7)
+    , m_leftBoundaryRecursionGuard(false)
+    , m_rightBoundaryRecursionGuard(false)
+    , m_leftBoundaryIsSet(false)
+    , m_rightBoundaryIsSet(false)
+    , m_distinguishStates(distinguishStates)
+    , m_useSparseWord(useSparseWord)
+    , m_useSparseNT(useSparseNT)
+  {}
+  void SetLeftBoundaryL2R(const std::vector<float> &scores,
+                          size_t heuristicScoreIndex,
+                          std::bitset<3> &possibleFutureOrientations,
+                          const Factor* leftBoundaryNonTerminalSymbol,
+                          const PhraseOrientationFeatureState* prevState) {
+    for (size_t i=0; i<3; ++i) {
+      m_leftBoundaryNonTerminalL2RScores[i] = scores[i];
+      m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i] = possibleFutureOrientations[i];
+    }
+    m_leftBoundaryNonTerminalL2RHeuristicScoreIndex = heuristicScoreIndex;
+    m_leftBoundaryNonTerminalSymbol = leftBoundaryNonTerminalSymbol;
+    m_leftBoundaryPrevState = prevState;
+    m_leftBoundaryIsSet = true;
+  }
+  void SetRightBoundaryR2L(const std::vector<float> &scores,
+                           size_t heuristicScoreIndex,
+                           std::bitset<3> &possibleFutureOrientations,
+                           const Factor* rightBoundaryNonTerminalSymbol,
+                           const PhraseOrientationFeatureState* prevState) {
+    for (size_t i=0; i<3; ++i) {
+      m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
+      m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i] = possibleFutureOrientations[i];
+    }
+    m_rightBoundaryNonTerminalR2LHeuristicScoreIndex = heuristicScoreIndex;
+    m_rightBoundaryNonTerminalSymbol = rightBoundaryNonTerminalSymbol;
+    m_rightBoundaryPrevState = prevState;
+    m_rightBoundaryIsSet = true;
+  }
+  float GetLeftBoundaryL2RScoreMono() const {
+    return m_leftBoundaryNonTerminalL2RScores[0];
+  }
+  float GetLeftBoundaryL2RScoreSwap() const {
+    return m_leftBoundaryNonTerminalL2RScores[1];
+  }
+  float GetLeftBoundaryL2RScoreDiscontinuous() const {
+    return m_leftBoundaryNonTerminalL2RScores[2];
+  }
+  float GetRightBoundaryR2LScoreMono() const {
+    return m_rightBoundaryNonTerminalR2LScores[0];
+  }
+  float GetRightBoundaryR2LScoreSwap() const {
+    return m_rightBoundaryNonTerminalR2LScores[1];
+  }
+  float GetRightBoundaryR2LScoreDiscontinuous() const {
+    return m_rightBoundaryNonTerminalR2LScores[2];
+  }
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+protected:
+  static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
+    if (!state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
+      return 0;
+    }
+    if (state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
+      return 1;
+    }
+    if (!state.m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet) {
+      return -1;
+    }
+    if (useSparseNT) {
+      if ( otherState.m_leftBoundaryNonTerminalSymbol < state.m_leftBoundaryNonTerminalSymbol ) {
+        return 1;
+      }
+      if ( state.m_leftBoundaryNonTerminalSymbol < otherState.m_leftBoundaryNonTerminalSymbol ) {
+        return -1;
+      }
+    }
+    if ( otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
+      return 1;
+    }
+    if ( state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
+      return -1;
+    }
+    if ( Smaller(otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
+      return 1;
+    }
+    if ( Smaller(state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
+      return -1;
+    }
+    for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
+      // compare only for possible future orientations
+      // (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
+      if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
+        if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
+          return 1;
+        }
+        if (state.m_leftBoundaryNonTerminalL2RScores[i] < otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
+          return -1;
+        }
+      }
+    }
+    if (state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
+      return 0;
+    }
+    if (state.m_leftBoundaryRecursionGuard && !otherState.m_leftBoundaryRecursionGuard) {
+      return 1;
+    }
+    if (!state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
+      return -1;
+    }
+    const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
+    const PhraseOrientationFeatureState *otherPrevState = otherState.m_leftBoundaryPrevState;
+    return CompareLeftBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
+  };
+  static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
+    if (!state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
+      return 0;
+    }
+    if (state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
+      return 1;
+    }
+    if (!state.m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet) {
+      return -1;
+    }
+    if (useSparseNT) {
+      if ( otherState.m_rightBoundaryNonTerminalSymbol < state.m_rightBoundaryNonTerminalSymbol ) {
+        return 1;
+      }
+      if ( state.m_rightBoundaryNonTerminalSymbol < otherState.m_rightBoundaryNonTerminalSymbol ) {
+        return -1;
+      }
+    }
+    if ( otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
+      return 1;
+    }
+    if ( state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
+      return -1;
+    }
+    if ( Smaller(otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
+      return 1;
+    }
+    if ( Smaller(state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
+      return -1;
+    }
+    for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
+      // compare only for possible future orientations
+      // (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
+      if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
+        if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
+          return 1;
+        }
+        if (state.m_rightBoundaryNonTerminalR2LScores[i] < otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
+          return -1;
+        }
+      }
+    }
+    if (state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
+      return 0;
+    }
+    if (state.m_rightBoundaryRecursionGuard && !otherState.m_rightBoundaryRecursionGuard) {
+      return 1;
+    }
+    if (!state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
+      return -1;
+    }
+    const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
+    const PhraseOrientationFeatureState *otherPrevState = otherState.m_rightBoundaryPrevState;
+    return CompareRightBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
+  };
+  static void HashCombineLeftBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) {
+    if (useSparseNT) {
+      boost::hash_combine(hash, state.m_leftBoundaryNonTerminalSymbol);
+    }
+    // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex);
+    // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations);
+    for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
+      if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
+        boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RScores[i]);
+      } else {
+        boost::hash_combine(hash, 0);
+      }
+    }
+    if (!state.m_leftBoundaryRecursionGuard) {
+      const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
+      if (prevState->m_leftBoundaryIsSet) {
+        HashCombineLeftBoundaryRecursive(hash, *prevState, useSparseNT);
+      }
+    }
+  };
+  static void HashCombineRightBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) {
+    if (useSparseNT) {
+      boost::hash_combine(hash, state.m_rightBoundaryNonTerminalSymbol);
+    }
+    // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex);
+    // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations);
+    for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
+      if (state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
+        boost::hash_combine(hash, state.m_rightBoundaryNonTerminalR2LScores[i]);
+      } else {
+        boost::hash_combine(hash, 0);
+      }
+    }
+    if (!state.m_rightBoundaryRecursionGuard) {
+      const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
+      if (prevState->m_rightBoundaryIsSet) {
+        HashCombineRightBoundaryRecursive(hash, *prevState, useSparseNT);
+      }
+    }
+  };
+  template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) {
+    for (size_t i=0; i<N; ++i) {
+      if (x[i] ^ y[i])
+        return y[i];
+    }
+    return false;
+  }
+  std::vector<float> m_leftBoundaryNonTerminalL2RScores;
+  std::vector<float> m_rightBoundaryNonTerminalR2LScores;
+  size_t m_leftBoundaryNonTerminalL2RHeuristicScoreIndex;
+  size_t m_rightBoundaryNonTerminalR2LHeuristicScoreIndex;
+  std::bitset<3> m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
+  std::bitset<3> m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
+  bool m_leftBoundaryRecursionGuard;
+  bool m_rightBoundaryRecursionGuard;
+  bool m_leftBoundaryIsSet;
+  bool m_rightBoundaryIsSet;
+  const PhraseOrientationFeatureState* m_leftBoundaryPrevState;
+  const PhraseOrientationFeatureState* m_rightBoundaryPrevState;
+  const bool m_distinguishStates;
+  const bool m_useSparseWord;
+  const bool m_useSparseNT;
+  const Factor* m_leftBoundaryNonTerminalSymbol;
+  const Factor* m_rightBoundaryNonTerminalSymbol;
+};
+class PhraseOrientationFeature : public StatefulFeatureFunction
+{
+public:
+  struct ReoClassData {
+  public:
+    std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
+    std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
+    bool firstNonTerminalIsBoundary;
+    bool firstNonTerminalPreviousSourceSpanIsAligned;
+    bool firstNonTerminalFollowingSourceSpanIsAligned;
+    bool lastNonTerminalIsBoundary;
+    bool lastNonTerminalPreviousSourceSpanIsAligned;
+    bool lastNonTerminalFollowingSourceSpanIsAligned;
+  };
+  PhraseOrientationFeature(const std::string &line);
+  ~PhraseOrientationFeature() {
+  }
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+    return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
+  }
+  void SetParameter(const std::string& key, const std::string& value);
+  void Load(AllOptions::ptr const& opts);
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const;
+  FFState* EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const {
+    UTIL_THROW2(GetScoreProducerDescription()
+                << ": EvaluateWhenApplied(const Hypothesis&, ...) not implemented");
+    return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
+  };
+  FFState* EvaluateWhenApplied(
+    const ChartHypothesis& cur_hypo,
+    int featureID, // used to index the state in the previous hypotheses
+    ScoreComponentCollection* accumulator) const;
+protected:
+  void LoadWordList(const std::string& filename,
+                    boost::unordered_set<const Factor*>& list);
+  void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
+                      ScoreComponentCollection &scoreBreakdown,
+                      const Factor* targetPhraseLHS,
+                      bool subtract=false) const;
+  size_t GetHeuristicScoreIndex(const std::vector<float>& scores,
+                                size_t weightsVectorOffset,
+                                const std::bitset<3> possibleFutureOrientations = 0x7) const;
+  void LeftBoundaryL2RScoreRecursive(int featureID,
+                                     const PhraseOrientationFeatureState *state,
+                                     const std::bitset<3> orientation,
+                                     std::vector<float>& newScores,
+                                     ScoreComponentCollection* scoreBreakdown) const;
+  void RightBoundaryR2LScoreRecursive(int featureID,
+                                      const PhraseOrientationFeatureState *state,
+                                      const std::bitset<3> orientation,
+                                      std::vector<float>& newScores,
+                                      ScoreComponentCollection* scoreBreakdown) const;
+  void SparseWordL2RScore(const ChartHypothesis* hypo,
+                          ScoreComponentCollection* scoreBreakdown,
+                          const std::string* o) const;
+  void SparseWordR2LScore(const ChartHypothesis* hypo,
+                          ScoreComponentCollection* scoreBreakdown,
+                          const std::string* o) const;
+  void SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
+                                 ScoreComponentCollection* scoreBreakdown,
+                                 const std::string* o) const;
+  void SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
+                                 ScoreComponentCollection* scoreBreakdown,
+                                 const std::string* o) const;
+  const std::string* ToString(const MosesTraining::PhraseOrientation::REO_CLASS o) const;
+  static const std::string MORIENT;
+  static const std::string SORIENT;
+  static const std::string DORIENT;
+  std::string m_glueLabelStr;
+  const Factor* m_glueLabel;
+  bool m_noScoreBoundary;
+  bool m_monotoneScoreBoundary;
+  bool m_distinguishStates;
+  bool m_lookaheadScore;
+  bool m_heuristicScoreUseWeights;
+  bool m_useSparseWord;
+  bool m_useSparseNT;
+  size_t m_offsetR2LScores;
+  mutable std::vector<float> m_weightsVector;
+  std::string m_filenameTargetWordList;
+  boost::unordered_set<const Factor*> m_targetWordList;
+  bool m_useTargetWordList;
+  std::string m_filenameSourceWordList;
+  boost::unordered_set<const Factor*> m_sourceWordList;
+  bool m_useSourceWordList;
+};
+}

mosesdecoder/moses/FF/RulePairUnlexicalizedSource.cpp ADDED Viewed

	@@ -0,0 +1,90 @@

+#include "RulePairUnlexicalizedSource.h"
+#include "moses/StaticData.h"
+#include "moses/InputFileStream.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/FactorCollection.h"
+#include <sstream>
+#include "util/string_stream.hh"
+using namespace std;
+namespace Moses
+{
+RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
+  : StatelessFeatureFunction(1, line)
+  , m_glueRules(false)
+  , m_nonGlueRules(true)
+  , m_glueTargetLHSStr("Q")
+{
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+  ReadParameters();
+  FactorCollection &factorCollection = FactorCollection::Instance();
+  m_glueTargetLHS = factorCollection.AddFactor(m_glueTargetLHSStr, true);
+  VERBOSE(1, " Done.");
+}
+void RulePairUnlexicalizedSource::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "glueRules") {
+    m_glueRules = Scan<bool>(value);
+  } else if (key == "nonGlueRules") {
+    m_nonGlueRules = Scan<bool>(value);
+  } else if (key == "glueTargetLHS") {
+    m_glueTargetLHSStr = value;
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
+    , const TargetPhrase &targetPhrase
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection &estimatedScores) const
+{
+  const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0];
+  if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) {
+    return;
+  }
+  if ( !m_nonGlueRules && (targetPhraseLHS != m_glueTargetLHS) ) {
+    return;
+  }
+  for (size_t posS=0; posS<source.GetSize(); ++posS) {
+    const Word &wordS = source.GetWord(posS);
+    if ( !wordS.IsNonTerminal() ) {
+      return;
+    }
+  }
+  util::StringStream namestr;
+  for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
+    const Word &wordT = targetPhrase.GetWord(posT);
+    const Factor* factorT = wordT[0];
+    if ( wordT.IsNonTerminal() ) {
+      namestr << "[";
+    }
+    namestr << factorT->GetString();
+    if ( wordT.IsNonTerminal() ) {
+      namestr << "]";
+    }
+    namestr << "|";
+  }
+  namestr << targetPhraseLHS->GetString() << "|";
+  for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
+       it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
+    namestr << "|" << it->first << "-" << it->second;
+  }
+  scoreBreakdown.PlusEquals(this, namestr.str(), 1);
+  if ( targetPhraseLHS != m_glueTargetLHS ) {
+    scoreBreakdown.PlusEquals(this, 1);
+  }
+}
+}

mosesdecoder/moses/FF/RulePairUnlexicalizedSource.h ADDED Viewed

	@@ -0,0 +1,61 @@

+#pragma once
+#include <string>
+#include <limits>
+#include <boost/unordered_map.hpp>
+#include "StatelessFeatureFunction.h"
+#include "moses/Factor.h"
+namespace Moses
+{
+class RulePairUnlexicalizedSource : public StatelessFeatureFunction
+{
+public:
+  RulePairUnlexicalizedSource(const std::string &line);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  void SetParameter(const std::string& key, const std::string& value);
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const;
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const
+  {}
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
+  {}
+  void EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    ScoreComponentCollection* accumulator) const
+  {}
+  void EvaluateWhenApplied(
+    const ChartHypothesis& cur_hypo,
+    ScoreComponentCollection* accumulator) const
+  {}
+protected:
+  bool m_glueRules;
+  bool m_nonGlueRules;
+  std::string m_glueTargetLHSStr;
+  const Factor* m_glueTargetLHS;
+};
+}

mosesdecoder/moses/FF/SetSourcePhrase.cpp ADDED Viewed

	@@ -0,0 +1,21 @@

+#include "SetSourcePhrase.h"
+#include "moses/TargetPhrase.h"
+namespace Moses
+{
+SetSourcePhrase::SetSourcePhrase(const std::string &line)
+  :StatelessFeatureFunction(0, line)
+{
+  m_tuneable = false;
+  ReadParameters();
+}
+void SetSourcePhrase::EvaluateInIsolation(const Phrase &source
+    , const TargetPhrase &targetPhrase
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection &estimatedScores) const
+{
+  targetPhrase.SetRuleSource(source);
+}
+}

mosesdecoder/moses/FF/SourceWordDeletionFeature.cpp ADDED Viewed

	@@ -0,0 +1,107 @@

+#include <sstream>
+#include "SourceWordDeletionFeature.h"
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TranslationOption.h"
+#include "moses/Util.h"
+#include "util/string_piece_hash.hh"
+#include "util/exception.hh"
+namespace Moses
+{
+using namespace std;
+SourceWordDeletionFeature::SourceWordDeletionFeature(const std::string &line)
+  :StatelessFeatureFunction(0, line),
+   m_unrestricted(true)
+{
+  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
+  ReadParameters();
+  VERBOSE(1, " Done." << std::endl);
+}
+void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "factor") {
+    m_factorType = Scan<FactorType>(value);
+  } else if (key == "path") {
+    m_filename = value;
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+void SourceWordDeletionFeature::Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  if (m_filename.empty())
+    return;
+  FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
+  ifstream inFile(m_filename.c_str());
+  UTIL_THROW_IF2(!inFile, "Can't open file " << m_filename);
+  std::string line;
+  while (getline(inFile, line)) {
+    m_vocab.insert(line);
+  }
+  inFile.close();
+  m_unrestricted = false;
+}
+bool SourceWordDeletionFeature::IsUseable(const FactorMask &mask) const
+{
+  bool ret = mask[m_factorType];
+  return ret;
+}
+void SourceWordDeletionFeature::EvaluateInIsolation(const Phrase &source
+    , const TargetPhrase &targetPhrase
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection &estimatedScores) const
+{
+  const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
+  ComputeFeatures(source, targetPhrase, &scoreBreakdown, alignmentInfo);
+}
+void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source,
+    const TargetPhrase& targetPhrase,
+    ScoreComponentCollection* accumulator,
+    const AlignmentInfo &alignmentInfo) const
+{
+  // handle special case: unknown words (they have no word alignment)
+  size_t targetLength = targetPhrase.GetSize();
+  size_t sourceLength = source.GetSize();
+  if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return;
+  // flag aligned words
+  std::vector<bool> aligned(sourceLength, false);
+  for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++)
+    aligned[ alignmentPoint->first ] = true;
+  // process unaligned source words
+  for(size_t i=0; i<sourceLength; i++) {
+    if (!aligned[i]) {
+      const Word &w = source.GetWord(i);
+      if (!w.IsNonTerminal()) {
+        const StringPiece word = w.GetFactor(m_factorType)->GetString();
+        if (word != "<s>" && word != "</s>") {
+          if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) {
+            accumulator->PlusEquals(this, StringPiece("OTHER"),1);
+          } else {
+            accumulator->PlusEquals(this,word,1);
+          }
+        }
+      }
+    }
+  }
+}
+}

mosesdecoder/moses/FF/StatefulFeatureFunction.h ADDED Viewed

	@@ -0,0 +1,96 @@

+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#pragma once
+#include "FeatureFunction.h"
+namespace Moses
+{
+class FFState;
+namespace Syntax
+{
+struct SHyperedge;
+}
+/** base class for all stateful feature functions.
+ * eg. LM, distortion penalty
+ */
+class StatefulFeatureFunction: public FeatureFunction
+{
+  //All statefull FFs
+  static std::vector<const StatefulFeatureFunction*> m_statefulFFs;
+public:
+  static const std::vector<const StatefulFeatureFunction*>&
+  GetStatefulFeatureFunctions() {
+    return m_statefulFFs;
+  }
+  StatefulFeatureFunction(const std::string &line, bool registerNow);
+  StatefulFeatureFunction(size_t numScoreComponents, const std::string &line);
+  /**
+   * \brief This interface should be implemented.
+   * Notes: When evaluating the value of this feature function, you should avoid
+   * calling hypo.GetPrevHypo().  If you need something from the "previous"
+   * hypothesis, you should store it in an FFState object which will be passed
+   * in as prev_state.  If you don't do this, you will get in trouble.
+   */
+  virtual FFState* EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const = 0;
+  // virtual FFState* EvaluateWhenAppliedWithContext(
+  //   ttasksptr const& ttasks,
+  //   const Hypothesis& cur_hypo,
+  //   const FFState* prev_state,
+  //   ScoreComponentCollection* accumulator) const {
+  //   return EvaluateWhenApplied(cur_hypo, prev_state, accumulator);
+  // }
+  virtual FFState* EvaluateWhenApplied(
+    const ChartHypothesis& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const = 0;
+  virtual FFState* EvaluateWhenApplied(
+    const Syntax::SHyperedge& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const {
+    assert(false);
+    return 0; /* FIXME */
+  }
+  //! return the state associated with the empty hypothesis for a given sentence
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;
+  bool IsStateless() const {
+    return false;
+  }
+  virtual void
+  EvaluateInIsolation
+  (Phrase const& source, TargetPhrase const& targetPhrase,
+   ScoreComponentCollection &scoreBreakdown,
+   ScoreComponentCollection &estimatedScores) const {}
+  virtual void
+  EvaluateWithSourceContext
+  (InputType const&input, InputPath const& inputPath, TargetPhrase const& targetPhrase,
+   StackVec const* stackVec, ScoreComponentCollection &scoreBreakdown,
+   ScoreComponentCollection *estimatedFutureScore = NULL) const {}
+  virtual void
+  EvaluateTranslationOptionListWithSourceContext
+  (const InputType &input, const TranslationOptionList &translationOptionList) const {}
+};
+}

mosesdecoder/moses/FF/TargetNgramFeature.h ADDED Viewed

	@@ -0,0 +1,239 @@

+#ifndef moses_TargetNgramFeature_h
+#define moses_TargetNgramFeature_h
+#include <string>
+#include <map>
+#include <boost/unordered_set.hpp>
+#include "StatefulFeatureFunction.h"
+#include "moses/FF/FFState.h"
+#include "moses/Word.h"
+#include "moses/FactorCollection.h"
+#include "moses/LM/SingleFactor.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ChartManager.h"
+#include "util/string_stream.hh"
+namespace Moses
+{
+class TargetNgramState : public FFState
+{
+public:
+  TargetNgramState() {}
+  TargetNgramState(const std::vector<Word> &words): m_words(words) {}
+  const std::vector<Word> GetWords() const {
+    return m_words;
+  }
+  size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+private:
+  std::vector<Word> m_words;
+};
+class TargetNgramChartState : public FFState
+{
+private:
+  Phrase m_contextPrefix, m_contextSuffix;
+  size_t m_numTargetTerminals; // This isn't really correct except for the surviving hypothesis
+  size_t m_startPos, m_endPos, m_inputSize;
+  /** Construct the prefix string of up to specified size
+   * \param ret prefix string
+   * \param size maximum size (typically max lm context window)
+   */
+  size_t CalcPrefix(const ChartHypothesis &hypo, const int featureId, Phrase &ret, size_t size) const {
+    const TargetPhrase &target = hypo.GetCurrTargetPhrase();
+    const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+      target.GetAlignNonTerm().GetNonTermIndexMap();
+    // loop over the rule that is being applied
+    for (size_t pos = 0; pos < target.GetSize(); ++pos) {
+      const Word &word = target.GetWord(pos);
+      // for non-terminals, retrieve it from underlying hypothesis
+      if (word.IsNonTerminal()) {
+        size_t nonTermInd = nonTermIndexMap[pos];
+        const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
+        size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcPrefix(*prevHypo, featureId, ret, size);
+//        Phrase phrase = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->GetPrefix();
+//        size = phrase.GetSize();
+      }
+      // for words, add word
+      else {
+        ret.AddWord(word);
+        size--;
+      }
+      // finish when maximum length reached
+      if (size==0)
+        break;
+    }
+    return size;
+  }
+  /** Construct the suffix phrase of up to specified size
+   * will always be called after the construction of prefix phrase
+   * \param ret suffix phrase
+   * \param size maximum size of suffix
+   */
+  size_t CalcSuffix(const ChartHypothesis &hypo, int featureId, Phrase &ret, size_t size) const {
+    size_t prefixSize = m_contextPrefix.GetSize();
+    assert(prefixSize <= m_numTargetTerminals);
+    // special handling for small hypotheses
+    // does the prefix match the entire hypothesis string? -> just copy prefix
+    if (prefixSize == m_numTargetTerminals) {
+      size_t maxCount = std::min(prefixSize, size);
+      size_t pos= prefixSize - 1;
+      for (size_t ind = 0; ind < maxCount; ++ind) {
+        const Word &word = m_contextPrefix.GetWord(pos);
+        ret.PrependWord(word);
+        --pos;
+      }
+      size -= maxCount;
+      return size;
+    }
+    // construct suffix analogous to prefix
+    else {
+      const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase();
+      const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+        targetPhrase.GetAlignTerm().GetNonTermIndexMap();
+      for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) {
+        const Word &word = targetPhrase.GetWord(pos);
+        if (word.IsNonTerminal()) {
+          size_t nonTermInd = nonTermIndexMap[pos];
+          const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
+          size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcSuffix(*prevHypo, featureId, ret, size);
+        } else {
+          ret.PrependWord(word);
+          size--;
+        }
+        if (size==0)
+          break;
+      }
+      return size;
+    }
+  }
+public:
+  TargetNgramChartState(const ChartHypothesis &hypo, int featureId, size_t order)
+    :m_contextPrefix(order - 1),
+     m_contextSuffix(order - 1) {
+    m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals();
+    const Range range = hypo.GetCurrSourceRange();
+    m_startPos = range.GetStartPos();
+    m_endPos = range.GetEndPos();
+    m_inputSize = hypo.GetManager().GetSource().GetSize();
+    const std::vector<const ChartHypothesis*> prevHypos = hypo.GetPrevHypos();
+    for (std::vector<const ChartHypothesis*>::const_iterator i = prevHypos.begin(); i != prevHypos.end(); ++i) {
+      // keep count of words (= length of generated string)
+      m_numTargetTerminals += static_cast<const TargetNgramChartState*>((*i)->GetFFState(featureId))->GetNumTargetTerminals();
+    }
+    CalcPrefix(hypo, featureId, m_contextPrefix, order - 1);
+    CalcSuffix(hypo, featureId, m_contextSuffix, order - 1);
+  }
+  size_t GetNumTargetTerminals() const {
+    return m_numTargetTerminals;
+  }
+  const Phrase &GetPrefix() const {
+    return m_contextPrefix;
+  }
+  const Phrase &GetSuffix() const {
+    return m_contextSuffix;
+  }
+  size_t hash() const {
+    // not sure if this is correct
+    size_t ret;
+    ret = m_startPos;
+    boost::hash_combine(ret, m_endPos);
+    boost::hash_combine(ret, m_inputSize);
+    // prefix
+    if (m_startPos > 0) { // not for "<s> ..."
+      boost::hash_combine(ret, hash_value(GetPrefix()));
+    }
+    if (m_endPos < m_inputSize - 1) { // not for "... </s>"
+      boost::hash_combine(ret, hash_value(GetSuffix()));
+    }
+    return ret;
+  }
+  virtual bool operator==(const FFState& o) const {
+    const TargetNgramChartState &other =
+      static_cast<const TargetNgramChartState &>( o );
+    // prefix
+    if (m_startPos > 0) { // not for "<s> ..."
+      if (GetPrefix() != other.GetPrefix())
+        return false;
+    }
+    if (m_endPos < m_inputSize - 1) { // not for "... </s>"
+      if (GetSuffix() != other.GetSuffix())
+        return false;
+    }
+    return true;
+  }
+};
+/** Sets the features of observed ngrams.
+ */
+class TargetNgramFeature : public StatefulFeatureFunction
+{
+public:
+  TargetNgramFeature(const std::string &line);
+  void Load(AllOptions::ptr const& opts);
+  bool IsUseable(const FactorMask &mask) const;
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+  virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
+                                       ScoreComponentCollection* accumulator) const;
+  virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
+                                       ScoreComponentCollection* accumulator) const;
+  void SetParameter(const std::string& key, const std::string& value);
+private:
+  FactorType m_factorType;
+  Word m_bos;
+  boost::unordered_set<std::string> m_vocab;
+  size_t m_n;
+  bool m_lower_ngrams;
+  std::string m_file;
+  std::string m_baseName;
+  void appendNgram(const Word& word, bool& skip, util::StringStream& ngram) const;
+  void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
+                        size_t numberOfStartPos = 1, size_t offset = 0) const;
+  void MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
+                        size_t numberOfEndPos = 1, size_t offset = 0) const;
+};
+}
+#endif // moses_TargetNgramFeature_h

mosesdecoder/moses/FF/UnknownWordPenaltyProducer.h ADDED Viewed

	@@ -0,0 +1,64 @@

+#pragma once
+// $Id$
+#include "StatelessFeatureFunction.h"
+namespace Moses
+{
+class Range;
+/** unknown word penalty */
+class UnknownWordPenaltyProducer : public StatelessFeatureFunction
+{
+protected:
+  static UnknownWordPenaltyProducer *s_instance;
+public:
+  static const UnknownWordPenaltyProducer& Instance() {
+    return *s_instance;
+  }
+  static UnknownWordPenaltyProducer& InstanceNonConst() {
+    return *s_instance;
+  }
+  UnknownWordPenaltyProducer(const std::string &line);
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  std::vector<float> DefaultWeights() const;
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const {
+  }
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const {
+  }
+  void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
+                           ScoreComponentCollection* accumulator) const {
+  }
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedScores = NULL) const {
+  }
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {
+  }
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedScores) const {
+  }
+};
+}

mosesdecoder/moses/FF/VW/AlignmentConstraint.h ADDED Viewed

	@@ -0,0 +1,40 @@

+#pragma once
+namespace Moses
+{
+/**
+ * Helper class for storing alignment constraints.
+ */
+class AlignmentConstraint
+{
+public:
+  AlignmentConstraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
+  AlignmentConstraint(int min, int max) : m_min(min), m_max(max) {}
+  /**
+   * We are aligned to point => our min cannot be larger, our max cannot be smaller.
+   */
+  void Update(int point) {
+    if (m_min > point) m_min = point;
+    if (m_max < point) m_max = point;
+  }
+  bool IsSet() const {
+    return m_max != -1;
+  }
+  int GetMin() const {
+    return m_min;
+  }
+  int GetMax() const {
+    return m_max;
+  }
+private:
+  int m_min, m_max;
+};
+}

mosesdecoder/moses/FF/VW/ThreadLocalByFeatureStorage.h ADDED Viewed

	@@ -0,0 +1,82 @@

+#pragma once
+#include <string>
+#include <cstdlib>
+#include <vector>
+#include <map>
+#include <boost/thread/tss.hpp>
+#include <boost/shared_ptr.hpp>
+#include "moses/FF/FeatureFunction.h"
+namespace Moses
+{
+template <class Value>
+struct DefaultFactory {
+  typedef boost::shared_ptr<Value> ValuePtr;
+  ValuePtr operator()() {
+    return ValuePtr(new Value());
+  }
+};
+template<class Value, class Factory = DefaultFactory<Value> >
+class ThreadLocalByFeatureStorage
+{
+public:
+  typedef boost::shared_ptr<Value> ValuePtr;
+  typedef std::map<std::string, ValuePtr> NameValueMap;
+  typedef boost::thread_specific_ptr<NameValueMap> TSNameValueMap;
+  ThreadLocalByFeatureStorage(FeatureFunction* ff,
+                              Factory factory = Factory())
+    : m_ff(ff), m_factory(factory) {}
+  virtual ~ThreadLocalByFeatureStorage() {} // provide empty virtual dtor
+  virtual ValuePtr GetStored() {
+    if(!m_nameMap.get())
+      m_nameMap.reset(new NameValueMap());
+    typename NameValueMap::iterator it
+    = m_nameMap->find(m_ff->GetScoreProducerDescription());
+    if(it == m_nameMap->end()) {
+      std::pair<typename NameValueMap::iterator, bool> ret;
+      ret = m_nameMap->insert(
+              std::make_pair(m_ff->GetScoreProducerDescription(), m_factory()));
+      return ret.first->second;
+    } else {
+      return it->second;
+    }
+  }
+  virtual const ValuePtr GetStored() const {
+    UTIL_THROW_IF2(!m_nameMap.get(),
+                   "No thread local storage has been created for: "
+                   << m_ff->GetScoreProducerDescription());
+    typename NameValueMap::const_iterator it
+    = m_nameMap->find(m_ff->GetScoreProducerDescription());
+    UTIL_THROW_IF2(it == m_nameMap->end(),
+                   "No features stored for: "
+                   << m_ff->GetScoreProducerDescription());
+    return it->second;
+  }
+private:
+  FeatureFunction* m_ff;
+  Factory m_factory;
+  static TSNameValueMap m_nameMap;
+};
+template <class Value, class Factory>
+typename ThreadLocalByFeatureStorage<Value, Factory>::TSNameValueMap
+ThreadLocalByFeatureStorage<Value, Factory>::m_nameMap;
+}

mosesdecoder/moses/FF/VW/VWFeatureBase.h ADDED Viewed

	@@ -0,0 +1,160 @@

+#pragma once
+#include <string>
+#include <boost/thread/tss.hpp>
+#include "vw/Classifier.h"
+#include "moses/TypeDef.h"
+#include "moses/TranslationTask.h"
+#include "moses/Util.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+namespace Moses
+{
+enum VWFeatureType {
+  vwft_source,
+  vwft_target,
+  vwft_targetContext
+};
+class VWFeatureBase : public StatelessFeatureFunction
+{
+public:
+  VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
+    : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
+    // defaults
+    m_sourceFactors.push_back(0);
+    m_targetFactors.push_back(0);
+  }
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  // Official hooks should do nothing. This is a hack to be able to define
+  // classifier features in the moses.ini configuration file.
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedFutureScore) const {}
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedFutureScore = NULL) const {}
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const {}
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const {}
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const {}
+  // Common parameters for classifier features, both source and target features
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    if (key == "used-by") {
+      ParseUsedBy(value);
+    } else if (key == "source-factors") {
+      Tokenize<FactorType>(m_sourceFactors, value, ",");
+    } else if (key == "target-factors") {
+      Tokenize<FactorType>(m_targetFactors, value, ",");
+    } else {
+      StatelessFeatureFunction::SetParameter(key, value);
+    }
+  }
+  // Return all classifier features, regardless of type
+  static const std::vector<VWFeatureBase*>& GetFeatures(std::string name = "VW0") {
+    UTIL_THROW_IF2(s_features.count(name) == 0, "No features registered for parent classifier: " + name);
+    return s_features[name];
+  }
+  // Return only source-dependent classifier features
+  static const std::vector<VWFeatureBase*>& GetSourceFeatures(std::string name = "VW0") {
+    UTIL_THROW_IF2(s_sourceFeatures.count(name) == 0, "No source features registered for parent classifier: " + name);
+    return s_sourceFeatures[name];
+  }
+  // Return only target-context classifier features
+  static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
+    // don't throw an exception when there are no target-context features, this feature type is not mandatory
+    return s_targetContextFeatures[name];
+  }
+  // Return only target-dependent classifier features
+  static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
+    UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
+    return s_targetFeatures[name];
+  }
+  // Required length context (maximum context size of defined target-context features)
+  static size_t GetMaximumContextSize(std::string name = "VW0") {
+    return s_targetContextLength[name]; // 0 by default
+  }
+  // Overload to process source-dependent data, create features once for every
+  // source sentence word range.
+  virtual void operator()(const InputType &input
+                          , const Range &sourceRange
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const = 0;
+  // Overload to process target-dependent features, create features once for
+  // every target phrase. One source word range will have at least one target
+  // phrase, but may have more.
+  virtual void operator()(const InputType &input
+                          , const TargetPhrase &targetPhrase
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const = 0;
+  // Overload to process target-context dependent features, these features are
+  // evaluated during decoding. For efficiency, features are not fed directly into
+  // the classifier object but instead output in the vector "features" and managed
+  // separately in VW.h.
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const = 0;
+protected:
+  std::vector<FactorType> m_sourceFactors, m_targetFactors;
+  void UpdateRegister() {
+    for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
+        it != m_usedBy.end(); it++) {
+      s_features[*it].push_back(this);
+      if(m_featureType == vwft_source) {
+        s_sourceFeatures[*it].push_back(this);
+      } else if (m_featureType == vwft_targetContext) {
+        s_targetContextFeatures[*it].push_back(this);
+        UpdateContextSize(*it);
+      } else {
+        s_targetFeatures[*it].push_back(this);
+      }
+    }
+  }
+private:
+  void ParseUsedBy(const std::string &usedBy) {
+    m_usedBy.clear();
+    Tokenize(m_usedBy, usedBy, ",");
+  }
+  void UpdateContextSize(const std::string &usedBy);
+  std::vector<std::string> m_usedBy;
+  VWFeatureType m_featureType;
+  static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
+  static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
+  static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
+  static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
+  static std::map<std::string, size_t> s_targetContextLength;
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureContext.h ADDED Viewed

	@@ -0,0 +1,116 @@

+#pragma once
+#include <string>
+#include <boost/foreach.hpp>
+#include "VWFeatureBase.h"
+#include "moses/InputType.h"
+#include "moses/TypeDef.h"
+#include "moses/Word.h"
+namespace Moses
+{
+// Inherit from this for source-dependent classifier features. They will
+// automatically register with the classifier class named VW0 or one or more
+// names specified by the used-by=name1,name2,... parameter.
+//
+// The classifier gets a full list by calling
+// VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription())
+class VWFeatureContext : public VWFeatureBase
+{
+public:
+  VWFeatureContext(const std::string &line, size_t contextSize)
+    : VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) {
+  }
+  // Gets its pure virtual functions from VWFeatureBase
+  virtual void operator()(const InputType &input
+                          , const TargetPhrase &targetPhrase
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+  }
+  virtual void operator()(const InputType &input
+                          , const Range &sourceRange
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    if (key == "size") {
+      m_contextSize = Scan<size_t>(value);
+    } else if (key == "factor-positions") {
+      // factor positions: assuming a factor such as positional morphological tag, use this
+      // option to select only certain positions; this assumes that only a single
+      // target-side factor is defined
+      Tokenize<size_t>(m_factorPositions, value, ",");
+    } else {
+      VWFeatureBase::SetParameter(key, value);
+    }
+  }
+  size_t GetContextSize() {
+    return m_contextSize;
+  }
+protected:
+  // Get word with the correct subset of factors as string. Because we're target
+  // context features, we look at a limited number of words to the left of the
+  // current translation. posFromEnd is interpreted like this:
+  // 0 = last word of the hypothesis
+  // 1 = next to last word
+  // ...etc.
+  inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
+    const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
+    if (m_factorPositions.empty()) {
+      return word.GetString(m_targetFactors, false);
+    } else {
+      if (m_targetFactors.size() != 1)
+        UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
+      const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
+      // corner cases: at sentence beginning/end, we don't have the correct factors set up
+      // similarly for UNK
+      if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR)
+        return fullFactor;
+      std::string subFactor(m_factorPositions.size(), 'x'); // initialize string with correct size and placeholder chars
+      for (size_t i = 0; i < m_factorPositions.size(); i++)
+        subFactor[i] = fullFactor[m_factorPositions[i]];
+      return subFactor;
+    }
+  }
+  // some target-context feature functions also look at the source
+  inline std::string GetSourceWord(const InputType &input, size_t pos) const {
+    return input.GetWord(pos).GetString(m_sourceFactors, false);
+  }
+  // get source words aligned to a particular context word
+  std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
+      , const InputType &input
+      , const AlignmentInfo &alignInfo
+      , size_t posFromEnd) const {
+    size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
+    std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
+    std::vector<std::string> out;
+    out.reserve(alignedToTarget.size());
+    BOOST_FOREACH(size_t srcIdx, alignedToTarget) {
+      out.push_back(GetSourceWord(input, srcIdx));
+    }
+    return out;
+  }
+  // required context size
+  size_t m_contextSize;
+  // factor positions: assuming a factor such as positional morphological tag, use this
+  // option to select only certain positions
+  std::vector<size_t> m_factorPositions;
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureContextBilingual.h ADDED Viewed

	@@ -0,0 +1,45 @@

+#pragma once
+#include <string>
+#include <boost/foreach.hpp>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+namespace Moses
+{
+class VWFeatureContextBilingual : public VWFeatureContext
+{
+public:
+  VWFeatureContextBilingual(const std::string &line)
+    : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 0; i < m_contextSize; i++) {
+      std::string tgtWord = GetWord(contextPhrase, i);
+      std::vector<std::string> alignedTo = GetAlignedSourceWords(contextPhrase, input, alignmentInfo, i);
+      BOOST_FOREACH(const std::string &srcWord, alignedTo) {
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("tcblng^-" + SPrint(i + 1) + "^" + tgtWord + "^" + srcWord));
+      }
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureContext::SetParameter(key, value);
+  }
+private:
+  static const int DEFAULT_WINDOW_SIZE = 1;
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureSourceBagOfWords.h ADDED Viewed

	@@ -0,0 +1,34 @@

+#pragma once
+#include <string>
+#include "VWFeatureSource.h"
+namespace Moses
+{
+class VWFeatureSourceBagOfWords : public VWFeatureSource
+{
+public:
+  VWFeatureSourceBagOfWords(const std::string &line)
+    : VWFeatureSource(line) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const Range &sourceRange
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 0; i < input.GetSize(); i++) {
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i)));
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureSource::SetParameter(key, value);
+  }
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureSourceBigrams.h ADDED Viewed

	@@ -0,0 +1,34 @@

+#pragma once
+#include <string>
+#include "VWFeatureSource.h"
+namespace Moses
+{
+class VWFeatureSourceBigrams : public VWFeatureSource
+{
+public:
+  VWFeatureSourceBigrams(const std::string &line)
+    : VWFeatureSource(line) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const Range &sourceRange
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 1; i < input.GetSize(); i++) {
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i)));
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureSource::SetParameter(key, value);
+  }
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureSourceExternalFeatures.h ADDED Viewed

	@@ -0,0 +1,64 @@

+#pragma once
+#include <string>
+#include <cstdlib>
+#include "ThreadLocalByFeatureStorage.h"
+#include "VWFeatureSource.h"
+#include "TabbedSentence.h"
+namespace Moses
+{
+// Assuming a given column of TabbedSentence contains space separated source features
+class VWFeatureSourceExternalFeatures : public VWFeatureSource
+{
+public:
+  VWFeatureSourceExternalFeatures(const std::string &line)
+    : VWFeatureSource(line), m_tls(this), m_column(0) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const Range &sourceRange
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    const Features& features = *m_tls.GetStored();
+    for (size_t i = 0; i < features.size(); i++) {
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("srcext^" + features[i]));
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    if(key == "column")
+      m_column = Scan<size_t>(value);
+    else
+      VWFeatureSource::SetParameter(key, value);
+  }
+  virtual void InitializeForInput(ttasksptr const& ttask) {
+    InputType const& source = *(ttask->GetSource().get());
+    UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
+                   "This feature function requires the TabbedSentence input type");
+    const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
+    const std::string &column = tabbedSentence.GetColumn(m_column);
+    Features& features = *m_tls.GetStored();
+    features.clear();
+    Tokenize(features, column, " ");
+  }
+private:
+  typedef std::vector<std::string> Features;
+  typedef ThreadLocalByFeatureStorage<Features> TLSFeatures;
+  TLSFeatures m_tls;
+  size_t m_column;
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureSourceIndicator.h ADDED Viewed

	@@ -0,0 +1,42 @@

+#pragma once
+#include <string>
+#include <algorithm>
+#include "VWFeatureSource.h"
+#include "moses/Util.h"
+namespace Moses
+{
+class VWFeatureSourceIndicator : public VWFeatureSource
+{
+public:
+  VWFeatureSourceIndicator(const std::string &line)
+    : VWFeatureSource(line) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const Range &sourceRange
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    size_t begin = sourceRange.GetStartPos();
+    size_t end   = sourceRange.GetEndPos() + 1;
+    std::vector<std::string> words(end - begin);
+    for (size_t i = 0; i < end - begin; i++)
+      words[i] = GetWord(input, begin + i);
+    outFeatures.push_back(classifier.AddLabelIndependentFeature("sind^" + Join(" ", words)));
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureSource::SetParameter(key, value);
+  }
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureSourcePhraseInternal.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+#include <string>
+#include <algorithm>
+#include "VWFeatureSource.h"
+#include "moses/Util.h"
+namespace Moses
+{
+class VWFeatureSourcePhraseInternal : public VWFeatureSource
+{
+public:
+  VWFeatureSourcePhraseInternal(const std::string &line)
+    : VWFeatureSource(line) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const Range &sourceRange
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    size_t begin = sourceRange.GetStartPos();
+    size_t end   = sourceRange.GetEndPos() + 1;
+    while (begin < end) {
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++)));
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureSource::SetParameter(key, value);
+  }
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureSourceSenseWindow.h ADDED Viewed

	@@ -0,0 +1,141 @@

+#pragma once
+#include <string>
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "ThreadLocalByFeatureStorage.h"
+#include "VWFeatureSource.h"
+#include "moses/Util.h"
+/*
+ * Produces features from factors in the following format:
+ * wordsense1:0.25^wordsense1:0.7^wordsense3:0.05
+ *
+ * This is useful e.g. for including different possible word senses as features weighted
+ * by their probability.
+ *
+ * By default, features are extracted from a small context window around the current
+ * phrase and from within the phrase.
+ */
+namespace Moses
+{
+class VWFeatureSourceSenseWindow : public VWFeatureSource
+{
+public:
+  VWFeatureSourceSenseWindow(const std::string &line)
+    : VWFeatureSource(line), m_tlsSenses(this), m_tlsForms(this), m_lexicalized(true), m_size(DEFAULT_WINDOW_SIZE) {
+    ReadParameters();
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+  // precompute feature strings for each input sentence
+  virtual void InitializeForInput(ttasksptr const& ttask) {
+    InputType const& input = *(ttask->GetSource().get());
+    std::vector<WordSenses>& senses = *m_tlsSenses.GetStored();
+    std::vector<std::string>& forms = *m_tlsForms.GetStored();
+    senses.clear();
+    forms.clear();
+    senses.resize(input.GetSize());
+    forms.resize(input.GetSize());
+    for (size_t i = 0; i < input.GetSize(); i++) {
+      senses[i] = GetSenses(input, i);
+      forms[i] = m_lexicalized ? GetWordForm(input, i) + "^" : "";
+    }
+  }
+  void operator()(const InputType &input
+                  , const Range &sourceRange
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    int begin = sourceRange.GetStartPos();
+    int end   = sourceRange.GetEndPos() + 1;
+    int inputLen = input.GetSize();
+    const std::vector<WordSenses>& senses = *m_tlsSenses.GetStored();
+    const std::vector<std::string>& forms = *m_tlsForms.GetStored();
+    // before current phrase
+    for (int i = std::max(0, begin - m_size); i < begin; i++) {
+      BOOST_FOREACH(const Sense &sense, senses[i]) {
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob));
+      }
+    }
+    // within current phrase
+    for (int i = begin; i < end; i++) {
+      BOOST_FOREACH(const Sense &sense, senses[i]) {
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob));
+      }
+    }
+    // after current phrase
+    for (int i = end; i < std::min(end + m_size, inputLen); i++) {
+      BOOST_FOREACH(const Sense &sense, senses[i]) {
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob));
+      }
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    if (key == "size") {
+      m_size = Scan<size_t>(value);
+    } else if (key == "lexicalized") {
+      m_lexicalized = Scan<bool>(value);
+    } else {
+      VWFeatureSource::SetParameter(key, value);
+    }
+  }
+private:
+  static const int DEFAULT_WINDOW_SIZE = 3;
+  struct Sense {
+    std::string m_label;
+    float m_prob;
+  };
+  typedef std::vector<Sense> WordSenses;
+  typedef ThreadLocalByFeatureStorage<std::vector<WordSenses> > TLSSenses;
+  typedef ThreadLocalByFeatureStorage<std::vector<std::string> > TLSWordForms;
+  TLSSenses m_tlsSenses; // for each input sentence, contains extracted senses and probs for each word
+  TLSWordForms m_tlsForms; // word forms for each input sentence
+  std::vector<Sense> GetSenses(const InputType &input, size_t pos) const {
+    std::string w = GetWord(input, pos);
+    std::vector<std::string> senseTokens = Tokenize(w, "^");
+    std::vector<Sense> out(senseTokens.size());
+    for (size_t i = 0; i < senseTokens.size(); i++) {
+      std::vector<std::string> senseColumns = Tokenize(senseTokens[i], ":");
+      if (senseColumns.size() != 2) {
+        UTIL_THROW2("VW :: bad format of sense distribution: " << senseTokens[i]);
+      }
+      out[i].m_label = senseColumns[0];
+      out[i].m_prob = Scan<float>(senseColumns[1]);
+    }
+    return out;
+  }
+  // assuming that word surface form is always factor 0, output the word form
+  inline std::string GetWordForm(const InputType &input, size_t pos) const {
+    return input.GetWord(pos).GetString(0).as_string();
+  }
+  bool m_lexicalized;
+  int m_size;
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureTargetBigrams.h ADDED Viewed

	@@ -0,0 +1,33 @@

+#pragma once
+#include <string>
+#include "VWFeatureTarget.h"
+namespace Moses
+{
+class VWFeatureTargetBigrams : public VWFeatureTarget
+{
+public:
+  VWFeatureTargetBigrams(const std::string &line)
+    : VWFeatureTarget(line) {
+    ReadParameters();
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const TargetPhrase &targetPhrase
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 1; i < targetPhrase.GetSize(); i++) {
+      outFeatures.push_back(classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i)));
+    }
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureTarget::SetParameter(key, value);
+  }
+};
+}

mosesdecoder/moses/FF/VW/VWFeatureTargetIndicator.h ADDED Viewed

	@@ -0,0 +1,31 @@

+#pragma once
+#include <string>
+#include "VWFeatureTarget.h"
+namespace Moses
+{
+class VWFeatureTargetIndicator : public VWFeatureTarget
+{
+public:
+  VWFeatureTargetIndicator(const std::string &line)
+    : VWFeatureTarget(line) {
+    ReadParameters();
+    VWFeatureBase::UpdateRegister();
+  }
+  void operator()(const InputType &input
+                  , const TargetPhrase &targetPhrase
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    outFeatures.push_back(classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors)));
+  }
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureTarget::SetParameter(key, value);
+  }
+};
+}