|
|
#pragma once |
|
|
|
|
|
#include <string> |
|
|
#include <boost/foreach.hpp> |
|
|
#include "VWFeatureBase.h" |
|
|
#include "moses/InputType.h" |
|
|
#include "moses/TypeDef.h" |
|
|
#include "moses/Word.h" |
|
|
|
|
|
namespace Moses |
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VWFeatureContext : public VWFeatureBase |
|
|
{ |
|
|
public: |
|
|
VWFeatureContext(const std::string &line, size_t contextSize) |
|
|
: VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) { |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
virtual void operator()(const InputType &input |
|
|
, const TargetPhrase &targetPhrase |
|
|
, Discriminative::Classifier &classifier |
|
|
, Discriminative::FeatureVector &outFeatures) const { |
|
|
} |
|
|
|
|
|
virtual void operator()(const InputType &input |
|
|
, const Range &sourceRange |
|
|
, Discriminative::Classifier &classifier |
|
|
, Discriminative::FeatureVector &outFeatures) const { |
|
|
} |
|
|
|
|
|
virtual void SetParameter(const std::string& key, const std::string& value) { |
|
|
if (key == "size") { |
|
|
m_contextSize = Scan<size_t>(value); |
|
|
} else if (key == "factor-positions") { |
|
|
|
|
|
|
|
|
|
|
|
Tokenize<size_t>(m_factorPositions, value, ","); |
|
|
} else { |
|
|
VWFeatureBase::SetParameter(key, value); |
|
|
} |
|
|
} |
|
|
|
|
|
size_t GetContextSize() { |
|
|
return m_contextSize; |
|
|
} |
|
|
|
|
|
protected: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const { |
|
|
const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1); |
|
|
if (m_factorPositions.empty()) { |
|
|
return word.GetString(m_targetFactors, false); |
|
|
} else { |
|
|
if (m_targetFactors.size() != 1) |
|
|
UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined."); |
|
|
const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string(); |
|
|
|
|
|
|
|
|
|
|
|
if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR) |
|
|
return fullFactor; |
|
|
|
|
|
std::string subFactor(m_factorPositions.size(), 'x'); |
|
|
for (size_t i = 0; i < m_factorPositions.size(); i++) |
|
|
subFactor[i] = fullFactor[m_factorPositions[i]]; |
|
|
|
|
|
return subFactor; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
inline std::string GetSourceWord(const InputType &input, size_t pos) const { |
|
|
return input.GetWord(pos).GetString(m_sourceFactors, false); |
|
|
} |
|
|
|
|
|
|
|
|
std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase |
|
|
, const InputType &input |
|
|
, const AlignmentInfo &alignInfo |
|
|
, size_t posFromEnd) const { |
|
|
size_t idx = contextPhrase.GetSize() - posFromEnd - 1; |
|
|
std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx); |
|
|
std::vector<std::string> out; |
|
|
out.reserve(alignedToTarget.size()); |
|
|
BOOST_FOREACH(size_t srcIdx, alignedToTarget) { |
|
|
out.push_back(GetSourceWord(input, srcIdx)); |
|
|
} |
|
|
return out; |
|
|
} |
|
|
|
|
|
|
|
|
size_t m_contextSize; |
|
|
|
|
|
|
|
|
|
|
|
std::vector<size_t> m_factorPositions; |
|
|
}; |
|
|
|
|
|
} |
|
|
|