#include #include #include #include #include #include #include "Desegmenter.h" #include using namespace std; namespace Moses { void Desegmenter::Load(const string filename) { std::ifstream myFile(filename.c_str() ); if (myFile.is_open()) { cerr << "Desegmentation File open successful." << endl; string line; while (getline(myFile, line)) { stringstream ss(line); string token; vector myline; while (getline(ss, token, '\t')) { myline.push_back(token); } mmDesegTable.insert(pair(myline[2], myline[1] )); } myFile.close(); } else cerr << "open() failed: check if Desegmentation file is in right folder" << endl; } vector Desegmenter::Search(string myKey) { multimap::const_iterator mmiPairFound = mmDesegTable.find(myKey); vector result; if (mmiPairFound != mmDesegTable.end()) { size_t nNumPairsInMap = mmDesegTable.count(myKey); for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter) { if (mmiPairFound != mmDesegTable.end()) { result.push_back(mmiPairFound->second); } ++mmiPairFound; } return result; } else { string rule_deseg ; rule_deseg = ApplyRules(myKey); result.push_back(rule_deseg); return result; } } string Desegmenter::ApplyRules(string & segToken) { string desegToken=segToken; if (!simple) { boost::replace_all(desegToken, "l+ All", "ll"); boost::replace_all(desegToken, "l+ Al", "ll"); boost::replace_all(desegToken, "y+ y ", "y"); boost::replace_all(desegToken, "p+ ", "t"); boost::replace_all(desegToken, "' +", "}"); boost::replace_all(desegToken, "y +", "A"); boost::replace_all(desegToken, "n +n", "n"); boost::replace_all(desegToken, "mn +m", "mm"); boost::replace_all(desegToken, "En +m", "Em"); boost::replace_all(desegToken, "An +lA", "Em"); boost::replace_all(desegToken, "-LRB-", "("); boost::replace_all(desegToken, "-RRB-", ")"); } boost::replace_all(desegToken, "+ +", ""); boost::replace_all(desegToken, "+ ", ""); boost::replace_all(desegToken, " +", ""); return desegToken; } Desegmenter::~Desegmenter() {} }