File size: 3,634 Bytes
fd49381 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
/*
* Word.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include <boost/functional/hash.hpp>
#include "Word.h"
#include "Hypothesis.h"
#include "ActiveChart.h"
#include "TargetPhraseImpl.h"
#include "Sentence.h"
#include "../legacy/Util2.h"
#include "../System.h"
#include "../AlignmentInfo.h"
#include "../ManagerBase.h"
using namespace std;
namespace Moses2
{
namespace SCFG
{
Word::Word(const SCFG::Word ©)
:Moses2::Word(copy)
,isNonTerminal(copy.isNonTerminal)
{
}
void Word::CreateFromString(FactorCollection &vocab,
const System &system,
const std::string &str)
{
vector<string> toks;
if (str[0] == '[' && str[str.size() - 1] == ']') {
isNonTerminal = true;
size_t startPos = str.find("[", 1);
bool doubleNT = startPos != string::npos;
if (doubleNT) {
assert(startPos != string::npos);
string str2 = str.substr(startPos + 1, str.size() - startPos - 2);
toks = Tokenize(str2, "|");
} else {
string str2 = str.substr(1, str.size() - 2);
toks = Tokenize(str2, "|");
}
} else {
isNonTerminal = false;
toks = Tokenize(str, "|");
}
// parse string
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
//cerr << "tok=" << tok << endl;
const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal);
m_factors[i] = factor;
}
}
size_t Word::hash() const
{
size_t ret = Moses2::Word::hash();
boost::hash_combine(ret, isNonTerminal);
return ret;
}
size_t Word::hash(const std::vector<FactorType> &factors) const
{
size_t seed = isNonTerminal;
for (size_t i = 0; i < factors.size(); ++i) {
FactorType factorType = factors[i];
const Factor *factor = m_factors[factorType];
boost::hash_combine(seed, factor);
}
return seed;
}
void Word::OutputToStream(const System &system, std::ostream &out) const
{
if (isNonTerminal) {
out << "[";
}
Moses2::Word::OutputToStream(system, out);
if (isNonTerminal) {
out << "]";
}
}
void Word::OutputToStream(
const ManagerBase &mgr,
size_t targetPos,
const SCFG::Hypothesis &hypo,
std::ostream &out) const
{
const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
const SCFG::SymbolBind &symbolBind = hypo.GetSymbolBind();
bool outputWord = true;
if (mgr.system.options.input.placeholder_factor != NOT_FOUND) {
const AlignmentInfo &alignInfo = tp.GetAlignTerm();
std::set<size_t> sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos);
if (sourceAligns.size() == 1) {
size_t sourcePos = *sourceAligns.begin();
/*
cerr << "sourcePos=" << sourcePos << endl;
cerr << "tp=" << tp.Debug(mgr.system) << endl;
cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl;
*/
assert(sourcePos < symbolBind.GetSize());
const Range &inputRange = symbolBind.coll[sourcePos].GetRange();
assert(inputRange.GetNumWordsCovered() == 1);
const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence &>(mgr.GetInput());
const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()];
const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor];
if (factor) {
out << factor->GetString();
outputWord = false;
}
}
}
if (outputWord) {
OutputToStream(mgr.system, out);
}
}
std::string Word::Debug(const System &system) const
{
stringstream out;
if (isNonTerminal) {
out << "[";
}
out << Moses2::Word::Debug(system);
if (isNonTerminal) {
out << "]";
}
return out.str();
}
}
}
|