File size: 4,123 Bytes
fd49381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#include <fstream>
#include "DsgModel.h"
#include "dsgHyp.h"
#include "moses/Util.h"
#include "util/exception.hh"

using namespace std;
using namespace lm::ngram;

namespace Moses
{

DesegModel::DesegModel(const std::string &line)
  :StatefulFeatureFunction(5, line )
{
  tFactor = 0;
  order=5;
  numFeatures = 5;
  optimistic = 1;
  ReadParameters();
}

DesegModel::~DesegModel()
{
  delete DSGM;
}

void DesegModel :: readLanguageModel(const char *lmFile)
{
  DSGM = ConstructDsgLM(m_lmPath.c_str());
  State startState = DSGM->NullContextState();
  desegT=new Desegmenter(m_desegPath,m_simple);// Desegmentation Table
}


void DesegModel::Load(AllOptions::ptr const& opts)
{
  m_options = opts;
  readLanguageModel(m_lmPath.c_str());
}



void DesegModel:: EvaluateInIsolation(const Phrase &source
                                      , const TargetPhrase &targetPhrase
                                      , ScoreComponentCollection &scoreBreakdown
                                      , ScoreComponentCollection &estimatedScores) const
{

  dsgHypothesis obj;
  vector <string> myTargetPhrase;
  vector<float> scores;
  vector<string> targ_phrase; //stores the segmented tokens in the target phrase
  const AlignmentInfo &align = targetPhrase.GetAlignTerm();

  for (int i = 0; i < targetPhrase.GetSize(); i++) {
    targ_phrase.push_back(targetPhrase.GetWord(i).GetFactor(tFactor)->GetString().as_string());
  }

  obj.setState(DSGM->NullContextState());
  obj.setPhrases(targ_phrase);
  obj.calculateDsgProbinIsol(*DSGM,*desegT,align);
  obj.populateScores(scores,numFeatures);
  estimatedScores.PlusEquals(this, scores);
}


FFState* DesegModel::EvaluateWhenApplied(
  const Hypothesis& cur_hypo,
  const FFState* prev_state,
  ScoreComponentCollection* accumulator) const
{
  const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
  const Range &src_rng =cur_hypo.GetCurrSourceWordsRange();
  const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
  size_t sourceOffset = src_rng.GetStartPos();

  dsgHypothesis obj;
  vector<float> scores;
  vector<string> targ_phrase; //stores the segmented tokens in the target phrase
  bool isCompleted;

  isCompleted=cur_hypo.IsSourceCompleted();
  for (int i = 0; i < cur_hypo.GetCurrTargetLength(); i++) {
    targ_phrase.push_back(target.GetWord(i).GetFactor(tFactor)->GetString().as_string());
  }

  obj.setState(prev_state);
  obj.setPhrases( targ_phrase );
  obj.calculateDsgProb(*DSGM,*desegT,isCompleted,align, sourceOffset, optimistic);
  obj.populateScores(scores,numFeatures);
  accumulator->PlusEquals(this, scores);
  return obj.saveState();

}

FFState* DesegModel::EvaluateWhenApplied(
  const ChartHypothesis& /* cur_hypo */,
  int /* featureID - used to index the state in the previous hypotheses */,
  ScoreComponentCollection* accumulator) const
{
  UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
}

const FFState* DesegModel::EmptyHypothesisState(const InputType &input) const
{
  VERBOSE(3,"DesegModel::EmptyHypothesisState()" << endl);
  State startState = DSGM->BeginSentenceState();
  dsgState ss= dsgState(startState);
  return new dsgState(ss);
}

std::string DesegModel::GetScoreProducerWeightShortName(unsigned idx) const
{
  return "dsg";
}


void DesegModel::SetParameter(const std::string& key, const std::string& value)
{

  if (key == "path") {
    m_lmPath = value;
  } else if (key == "contiguity-features") {
    if(value == "no")
      numFeatures = 1;
    else
      numFeatures = 5;
  } else if (key == "output-factor") {
    tFactor = Scan<int>(value);
  } else if (key == "optimistic") {
    if (value == "n")
      optimistic = 0;
    else
      optimistic = 1;
  } else if (key == "deseg-path") {
    m_desegPath = Scan<int>(value);
  } else if (key == "deseg-scheme") {
    if(value == "s")
      m_simple = 1;
    else
      m_simple = 0;
  } else if (key == "order") {
    order = Scan<int>(value);
  } else {
    StatefulFeatureFunction::SetParameter(key, value);
  }
}

bool DesegModel::IsUseable(const FactorMask &mask) const
{
  bool ret = mask[0];
  return ret;
}

} // namespace