|
|
#include "SuffixArray.h" |
|
|
#include "../util/tokenize.hh" |
|
|
#include <getopt.h> |
|
|
|
|
|
using namespace std; |
|
|
|
|
|
size_t lookup( string ); |
|
|
vector<string> tokenize( const char input[] ); |
|
|
SuffixArray suffixArray; |
|
|
|
|
|
int main(int argc, char* argv[]) |
|
|
{ |
|
|
|
|
|
string query; |
|
|
string fileNameSuffix; |
|
|
string fileNameSource; |
|
|
bool loadFlag = false; |
|
|
bool saveFlag = false; |
|
|
bool createFlag = false; |
|
|
bool queryFlag = false; |
|
|
bool querySentenceFlag = false; |
|
|
|
|
|
int stdioFlag = false; |
|
|
string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n"; |
|
|
while(1) { |
|
|
static struct option long_options[] = { |
|
|
{"load", required_argument, 0, 'l'}, |
|
|
{"save", required_argument, 0, 's'}, |
|
|
{"create", required_argument, 0, 'c'}, |
|
|
{"query", required_argument, 0, 'q'}, |
|
|
{"query-sentence", required_argument, 0, 'Q'}, |
|
|
{"document", required_argument, 0, 'd'}, |
|
|
{"stdio", no_argument, 0, 'i'}, |
|
|
{"stdio-sentence", no_argument, 0, 'I'}, |
|
|
{0, 0, 0, 0} |
|
|
}; |
|
|
int option_index = 0; |
|
|
int c = getopt_long (argc, argv, "l:s:c:q:Q:iId", long_options, &option_index); |
|
|
if (c == -1) break; |
|
|
switch (c) { |
|
|
case 'l': |
|
|
fileNameSuffix = string(optarg); |
|
|
loadFlag = true; |
|
|
break; |
|
|
case 's': |
|
|
fileNameSuffix = string(optarg); |
|
|
saveFlag = true; |
|
|
break; |
|
|
case 'c': |
|
|
fileNameSource = string(optarg); |
|
|
createFlag = true; |
|
|
break; |
|
|
case 'q': |
|
|
query = string(optarg); |
|
|
queryFlag = true; |
|
|
break; |
|
|
case 'Q': |
|
|
query = string(optarg); |
|
|
querySentenceFlag = true; |
|
|
break; |
|
|
case 'i': |
|
|
stdioFlag = true; |
|
|
break; |
|
|
case 'I': |
|
|
stdioFlag = true; |
|
|
querySentenceFlag = true; |
|
|
break; |
|
|
case 'd': |
|
|
suffixArray.UseDocument(); |
|
|
break; |
|
|
default: |
|
|
cerr << info; |
|
|
exit(1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (saveFlag && !createFlag) { |
|
|
cerr << "error: cannot save without creating\n" << info; |
|
|
exit(1); |
|
|
} |
|
|
if (saveFlag && loadFlag) { |
|
|
cerr << "error: cannot load and save at the same time\n" << info; |
|
|
exit(1); |
|
|
} |
|
|
if (!loadFlag && !createFlag) { |
|
|
cerr << "error: neither load or create - i have no info!\n" << info; |
|
|
exit(1); |
|
|
} |
|
|
|
|
|
|
|
|
if (createFlag) { |
|
|
cerr << "will create\n"; |
|
|
cerr << "corpus is in " << fileNameSource << endl; |
|
|
suffixArray.Create( fileNameSource ); |
|
|
if (saveFlag) { |
|
|
suffixArray.Save( fileNameSuffix ); |
|
|
cerr << "will save in " << fileNameSuffix << endl; |
|
|
} |
|
|
} |
|
|
if (loadFlag) { |
|
|
cerr << "will load from " << fileNameSuffix << endl; |
|
|
suffixArray.Load( fileNameSuffix ); |
|
|
} |
|
|
|
|
|
|
|
|
if (stdioFlag) { |
|
|
while(true) { |
|
|
string query; |
|
|
if (getline(cin, query, '\n').eof()) { |
|
|
return 0; |
|
|
} |
|
|
if (querySentenceFlag) { |
|
|
vector< string > queryString = util::tokenize( query.c_str() ); |
|
|
suffixArray.PrintSentenceMatches( queryString ); |
|
|
} else { |
|
|
cout << lookup( query ) << endl; |
|
|
} |
|
|
} |
|
|
} else if (queryFlag) { |
|
|
cout << lookup( query ) << endl; |
|
|
} else if (querySentenceFlag) { |
|
|
vector< string > queryString = util::tokenize( query.c_str() ); |
|
|
suffixArray.PrintSentenceMatches( queryString ); |
|
|
} |
|
|
return 0; |
|
|
} |
|
|
|
|
|
size_t lookup( string query ) |
|
|
{ |
|
|
cerr << "query is " << query << endl; |
|
|
vector< string > queryString = util::tokenize( query.c_str() ); |
|
|
return suffixArray.Count( queryString ); |
|
|
} |
|
|
|