| #pragma once |
|
|
| #include <string> |
| #include <boost/foreach.hpp> |
| #include "VWFeatureBase.h" |
| #include "moses/InputType.h" |
| #include "moses/TypeDef.h" |
| #include "moses/Word.h" |
|
|
| namespace Moses |
| { |
|
|
| |
| |
| |
| |
| |
| |
|
|
|
|
| class VWFeatureContext : public VWFeatureBase |
| { |
| public: |
| VWFeatureContext(const std::string &line, size_t contextSize) |
| : VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) { |
| } |
|
|
| |
|
|
| virtual void operator()(const InputType &input |
| , const TargetPhrase &targetPhrase |
| , Discriminative::Classifier &classifier |
| , Discriminative::FeatureVector &outFeatures) const { |
| } |
|
|
| virtual void operator()(const InputType &input |
| , const Range &sourceRange |
| , Discriminative::Classifier &classifier |
| , Discriminative::FeatureVector &outFeatures) const { |
| } |
|
|
| virtual void SetParameter(const std::string& key, const std::string& value) { |
| if (key == "size") { |
| m_contextSize = Scan<size_t>(value); |
| } else if (key == "factor-positions") { |
| |
| |
| |
| Tokenize<size_t>(m_factorPositions, value, ","); |
| } else { |
| VWFeatureBase::SetParameter(key, value); |
| } |
| } |
|
|
| size_t GetContextSize() { |
| return m_contextSize; |
| } |
|
|
| protected: |
| |
| |
| |
| |
| |
| |
| inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const { |
| const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1); |
| if (m_factorPositions.empty()) { |
| return word.GetString(m_targetFactors, false); |
| } else { |
| if (m_targetFactors.size() != 1) |
| UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined."); |
| const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string(); |
|
|
| |
| |
| if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR) |
| return fullFactor; |
|
|
| std::string subFactor(m_factorPositions.size(), 'x'); |
| for (size_t i = 0; i < m_factorPositions.size(); i++) |
| subFactor[i] = fullFactor[m_factorPositions[i]]; |
|
|
| return subFactor; |
| } |
| } |
|
|
| |
| inline std::string GetSourceWord(const InputType &input, size_t pos) const { |
| return input.GetWord(pos).GetString(m_sourceFactors, false); |
| } |
|
|
| |
| std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase |
| , const InputType &input |
| , const AlignmentInfo &alignInfo |
| , size_t posFromEnd) const { |
| size_t idx = contextPhrase.GetSize() - posFromEnd - 1; |
| std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx); |
| std::vector<std::string> out; |
| out.reserve(alignedToTarget.size()); |
| BOOST_FOREACH(size_t srcIdx, alignedToTarget) { |
| out.push_back(GetSourceWord(input, srcIdx)); |
| } |
| return out; |
| } |
|
|
| |
| size_t m_contextSize; |
|
|
| |
| |
| std::vector<size_t> m_factorPositions; |
| }; |
|
|
| } |
|
|