|
|
#include "ForestWriter.h" |
|
|
|
|
|
#include <cassert> |
|
|
#include <vector> |
|
|
|
|
|
#include "TopologicalSorter.h" |
|
|
|
|
|
namespace MosesTraining |
|
|
{ |
|
|
namespace Syntax |
|
|
{ |
|
|
namespace PostprocessEgretForests |
|
|
{ |
|
|
|
|
|
void ForestWriter::Write(const std::string &sentence, const Forest &forest, |
|
|
std::size_t sentNum) |
|
|
{ |
|
|
m_out << "sentence " << sentNum << " :" << std::endl; |
|
|
m_out << PossiblyEscape(sentence) << std::endl; |
|
|
|
|
|
|
|
|
if (forest.vertices.empty()) { |
|
|
m_out << std::endl << std::endl; |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
std::vector<const Forest::Vertex *> vertices; |
|
|
TopologicalSorter sorter; |
|
|
sorter.Sort(forest, vertices); |
|
|
for (std::vector<const Forest::Vertex *>::const_iterator p = vertices.begin(); |
|
|
p != vertices.end(); ++p) { |
|
|
const Forest::Vertex &v = **p; |
|
|
for (std::vector<boost::shared_ptr<Forest::Hyperedge> >::const_iterator |
|
|
q = v.incoming.begin(); q != v.incoming.end(); ++q) { |
|
|
WriteHyperedgeLine(**q); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
m_out << std::endl; |
|
|
} |
|
|
|
|
|
void ForestWriter::WriteHyperedgeLine(const Forest::Hyperedge &e) |
|
|
{ |
|
|
WriteVertex(*e.head); |
|
|
m_out << " =>"; |
|
|
for (std::vector<Forest::Vertex *>::const_iterator p = e.tail.begin(); |
|
|
p != e.tail.end(); ++p) { |
|
|
m_out << " "; |
|
|
WriteVertex(**p); |
|
|
} |
|
|
m_out << " ||| " << e.weight << std::endl; |
|
|
} |
|
|
|
|
|
void ForestWriter::WriteVertex(const Forest::Vertex &v) |
|
|
{ |
|
|
m_out << PossiblyEscape(v.symbol.value); |
|
|
if (!v.incoming.empty()) { |
|
|
m_out << "[" << v.start << "," << v.end << "]"; |
|
|
} |
|
|
} |
|
|
|
|
|
std::string ForestWriter::PossiblyEscape(const std::string &s) const |
|
|
{ |
|
|
if (m_options.escape) { |
|
|
return Escape(s); |
|
|
} else { |
|
|
return s; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
std::string ForestWriter::Escape(const std::string &s) const |
|
|
{ |
|
|
std::string t; |
|
|
std::size_t len = s.size(); |
|
|
t.reserve(len); |
|
|
for (std::size_t i = 0; i < len; ++i) { |
|
|
if (s[i] == '<') { |
|
|
t += "<"; |
|
|
} else if (s[i] == '>') { |
|
|
t += ">"; |
|
|
} else if (s[i] == '[') { |
|
|
t += "["; |
|
|
} else if (s[i] == ']') { |
|
|
t += "]"; |
|
|
} else if (s[i] == '|') { |
|
|
t += "|"; |
|
|
} else if (s[i] == '&') { |
|
|
t += "&"; |
|
|
} else if (s[i] == '\'') { |
|
|
t += "'"; |
|
|
} else if (s[i] == '"') { |
|
|
t += """; |
|
|
} else { |
|
|
t += s[i]; |
|
|
} |
|
|
} |
|
|
return t; |
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|