File size: 2,501 Bytes
fd49381 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
#include "ForestWriter.h"
#include <cassert>
#include <vector>
#include "TopologicalSorter.h"
namespace MosesTraining
{
namespace Syntax
{
namespace PostprocessEgretForests
{
void ForestWriter::Write(const std::string &sentence, const Forest &forest,
std::size_t sentNum)
{
m_out << "sentence " << sentNum << " :" << std::endl;
m_out << PossiblyEscape(sentence) << std::endl;
// Check for parse failure.
if (forest.vertices.empty()) {
m_out << std::endl << std::endl;
return;
}
// Sort the vertices topologically then output the hyperedges from each.
std::vector<const Forest::Vertex *> vertices;
TopologicalSorter sorter;
sorter.Sort(forest, vertices);
for (std::vector<const Forest::Vertex *>::const_iterator p = vertices.begin();
p != vertices.end(); ++p) {
const Forest::Vertex &v = **p;
for (std::vector<boost::shared_ptr<Forest::Hyperedge> >::const_iterator
q = v.incoming.begin(); q != v.incoming.end(); ++q) {
WriteHyperedgeLine(**q);
}
}
// Write a terminating blank line.
m_out << std::endl;
}
void ForestWriter::WriteHyperedgeLine(const Forest::Hyperedge &e)
{
WriteVertex(*e.head);
m_out << " =>";
for (std::vector<Forest::Vertex *>::const_iterator p = e.tail.begin();
p != e.tail.end(); ++p) {
m_out << " ";
WriteVertex(**p);
}
m_out << " ||| " << e.weight << std::endl;
}
void ForestWriter::WriteVertex(const Forest::Vertex &v)
{
m_out << PossiblyEscape(v.symbol.value);
if (!v.incoming.empty()) {
m_out << "[" << v.start << "," << v.end << "]";
}
}
std::string ForestWriter::PossiblyEscape(const std::string &s) const
{
if (m_options.escape) {
return Escape(s);
} else {
return s;
}
}
// Escapes XML special characters.
std::string ForestWriter::Escape(const std::string &s) const
{
std::string t;
std::size_t len = s.size();
t.reserve(len);
for (std::size_t i = 0; i < len; ++i) {
if (s[i] == '<') {
t += "<";
} else if (s[i] == '>') {
t += ">";
} else if (s[i] == '[') {
t += "[";
} else if (s[i] == ']') {
t += "]";
} else if (s[i] == '|') {
t += "|";
} else if (s[i] == '&') {
t += "&";
} else if (s[i] == '\'') {
t += "'";
} else if (s[i] == '"') {
t += """;
} else {
t += s[i];
}
}
return t;
}
} // namespace PostprocessEgretForests
} // namespace Syntax
} // namespace MosesTraining
|