| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include <string.h> |
| | #include <fstream> |
| | #include <vector> |
| | #include <string> |
| | #include <iostream> |
| | #include <cstdlib> |
| | #include "InputFileStream.h" |
| | #include "OutputFileStream.h" |
| | #include "util/tokenize.hh" |
| |
|
| | using namespace std; |
| |
|
| | vector< string > splitLine(const char *line) |
| | { |
| | vector< string > item; |
| | int start=0; |
| | int i=0; |
| | for(; line[i] != '\0'; i++) { |
| | if (line[i] == ' ' && |
| | line[i+1] == '|' && |
| | line[i+2] == '|' && |
| | line[i+3] == '|' && |
| | line[i+4] == ' ') { |
| | if (start > i) start = i; |
| | item.push_back( string( line+start, i-start ) ); |
| | start = i+5; |
| | i += 3; |
| | } |
| | } |
| | item.push_back( string( line+start, i-start ) ); |
| |
|
| | return item; |
| | } |
| |
|
| | bool getLine( istream &fileP, vector< string > &item ) |
| | { |
| | if (fileP.eof()) |
| | return false; |
| |
|
| | string line; |
| | if (getline(fileP, line)) { |
| | item = splitLine(line.c_str()); |
| | return true; |
| | } else { |
| | return false; |
| | } |
| | } |
| |
|
| |
|
| | int main(int argc, char* argv[]) |
| | { |
| | cerr << "Starting..." << endl; |
| |
|
| | char* &fileNameDirect = argv[1]; |
| | Moses::InputFileStream fileDirect(fileNameDirect); |
| |
|
| |
|
| | |
| | if (fileDirect.fail()) { |
| | cerr << "ERROR: could not open extract file " << fileNameDirect << endl; |
| | exit(1); |
| | } |
| | istream &fileDirectP = fileDirect; |
| |
|
| | char* &fileNameConsolidated = argv[2]; |
| | ostream *fileConsolidated; |
| |
|
| | if (strcmp(fileNameConsolidated, "-") == 0) { |
| | fileConsolidated = &cout; |
| | } else { |
| | Moses::OutputFileStream *outputFile = new Moses::OutputFileStream(); |
| | bool success = outputFile->Open(fileNameConsolidated); |
| | if (!success) { |
| | cerr << "ERROR: could not open file phrase table file " |
| | << fileNameConsolidated << endl; |
| | exit(1); |
| | } |
| | fileConsolidated = outputFile; |
| | } |
| |
|
| | int i=0; |
| | while(true) { |
| | i++; |
| | if (i%1000 == 0) cerr << "." << flush; |
| | if (i%10000 == 0) cerr << ":" << flush; |
| | if (i%100000 == 0) cerr << "!" << flush; |
| |
|
| | vector< string > itemDirect; |
| | if (! getLine(fileDirectP, itemDirect )) |
| | break; |
| |
|
| | const vector< string > count = util::tokenize( itemDirect[4] ); |
| | float countEF = atof(count[0].c_str()); |
| | float countF = atof(count[1].c_str()); |
| | float prob = countF/countEF; |
| |
|
| | (*fileConsolidated) << itemDirect[0] << " ||| " |
| | << itemDirect[1] << " ||| " |
| | << prob << " ||| " |
| | << itemDirect[2] << "||| " |
| | << itemDirect[4] << " " << countEF |
| | << " ||| " << endl; |
| | } |
| |
|
| | fileConsolidated->flush(); |
| | if (fileConsolidated != &cout) { |
| | delete fileConsolidated; |
| | } |
| |
|
| | cerr << "Finished" << endl; |
| | } |
| |
|
| |
|