File size: 5,223 Bytes
687064a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
#include "MeteorScorer.h"
#include <algorithm>
#include <cmath>
#include <fstream>
#include <iterator>
#include <sstream>
#include <stdexcept>
#include <cstdio>
#include <string>
#include <vector>
#include <boost/thread/mutex.hpp>
#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
#include "Fdstream.h"
#endif
#include "ScoreStats.h"
#include "Util.h"
using namespace std;
namespace MosesTuning
{
// Meteor supported
#if (defined(__GLIBCXX__) || defined(__GLIBCPP__)) && !defined(_WIN32)
// for clarity
#define CHILD_STDIN_READ pipefds_input[0]
#define CHILD_STDIN_WRITE pipefds_input[1]
#define CHILD_STDOUT_READ pipefds_output[0]
#define CHILD_STDOUT_WRITE pipefds_output[1]
MeteorScorer::MeteorScorer(const string& config)
: StatisticsBasedScorer("METEOR",config)
{
meteor_jar = getConfig("jar", "");
meteor_lang = getConfig("lang", "en");
meteor_task = getConfig("task", "tune");
meteor_m = getConfig("m", "");
meteor_p = getConfig("p", "");
meteor_w = getConfig("w", "");
if (meteor_jar == "") {
throw runtime_error("Meteor jar required, see MeteorScorer.h for full list of options: --scconfig jar:/path/to/meteor-1.4.jar");
}
int pipe_status;
int pipefds_input[2];
int pipefds_output[2];
// Create pipes for process communication
pipe_status = pipe(pipefds_input);
if (pipe_status == -1) {
throw runtime_error("Error creating pipe");
}
pipe_status = pipe(pipefds_output);
if (pipe_status == -1) {
throw runtime_error("Error creating pipe");
}
// Fork
pid_t pid;
pid = fork();
if (pid == pid_t(0)) {
// Child's IO
dup2(CHILD_STDIN_READ, 0);
dup2(CHILD_STDOUT_WRITE, 1);
close(CHILD_STDIN_WRITE);
close(CHILD_STDOUT_READ);
// Call Meteor
stringstream meteor_cmd;
meteor_cmd << "java -Xmx1G -jar " << meteor_jar << " - - -stdio -lower -t " << meteor_task << " -l " << meteor_lang;
if (meteor_m != "") {
meteor_cmd << " -m '" << meteor_m << "'";
}
if (meteor_p != "") {
meteor_cmd << " -p '" << meteor_p << "'";
}
if (meteor_w != "") {
meteor_cmd << " -w '" << meteor_w << "'";
}
TRACE_ERR("Executing: " + meteor_cmd.str() + "\n");
execl("/bin/bash", "bash", "-c", meteor_cmd.str().c_str(), (char*)NULL);
throw runtime_error("Continued after execl");
}
// Parent's IO
close(CHILD_STDIN_READ);
close(CHILD_STDOUT_WRITE);
m_to_meteor = new ofdstream(CHILD_STDIN_WRITE);
m_from_meteor = new ifdstream(CHILD_STDOUT_READ);
}
MeteorScorer::~MeteorScorer()
{
// Cleanup IO
delete m_to_meteor;
delete m_from_meteor;
}
void MeteorScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
// Just store strings since we're sending lines to an external process
for (int incRefs = 0; incRefs < (int)referenceFiles.size(); incRefs++) {
m_references.clear();
ifstream in(referenceFiles.at(incRefs).c_str());
if (!in) {
throw runtime_error("Unable to open " + referenceFiles.at(incRefs));
}
string line;
while (getline(in, line)) {
line = this->preprocessSentence(line);
m_references.push_back(line);
}
m_multi_references.push_back(m_references);
}
m_references=m_multi_references.at(0);
}
void MeteorScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
string sentence = this->preprocessSentence(text);
string stats_str;
stringstream input;
// SCORE ||| ref1 ||| ref2 ||| ... ||| text
input << "SCORE";
for (int incRefs = 0; incRefs < (int)m_multi_references.size(); incRefs++) {
if (sid >= m_multi_references.at(incRefs).size()) {
stringstream msg;
msg << "Sentence id (" << sid << ") not found in reference set";
throw runtime_error(msg.str());
}
string ref = m_multi_references.at(incRefs).at(sid);
input << " ||| " << ref;
}
input << " ||| " << text << "\n";
// Threadsafe IO
#ifdef WITH_THREADS
mtx.lock();
#endif
//TRACE_ERR ( "in: " + input.str() );
*m_to_meteor << input.str();
m_from_meteor->getline(stats_str);
//TRACE_ERR ( "out: " + stats_str + "\n" );
#ifdef WITH_THREADS
mtx.unlock();
#endif
entry.set(stats_str);
}
float MeteorScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
string score;
stringstream input;
// EVAL ||| stats
input << "EVAL |||";
copy(comps.begin(), comps.end(), ostream_iterator<int>(input, " "));
input << "\n";
// Threadsafe IO
#ifdef WITH_THREADS
mtx.lock();
#endif
//TRACE_ERR ( "in: " + input.str() );
*m_to_meteor << input.str();
m_from_meteor->getline(score);
//TRACE_ERR ( "out: " + score + "\n" );
#ifdef WITH_THREADS
mtx.unlock();
#endif
return atof(score.c_str());
}
#else
// Meteor unsupported, throw error if used
MeteorScorer::MeteorScorer(const string& config)
: StatisticsBasedScorer("METEOR",config)
{
throw runtime_error("Meteor unsupported, requires GLIBCXX");
}
MeteorScorer::~MeteorScorer() {}
void MeteorScorer::setReferenceFiles(const vector<string>& referenceFiles) {}
void MeteorScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) {}
float MeteorScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
// Should never be reached
return 0.0;
}
#endif
}
|