// build a phrase table for the given input #include "ug_mm_ttrack.h" #include "ug_mm_tsa.h" #include "tpt_tokenindex.h" #include "ug_corpus_token.h" #include #include #include #include #include #include #include "ug_typedefs.h" #include "tpt_pickler.h" #include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h" #include "moses/TranslationModel/UG/generic/sampling/Sampling.h" #include "moses/TranslationModel/UG/generic/file_io/ug_stream.h" #include #include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h" using namespace std; using namespace ugdiss; using namespace Moses; typedef sapt::L2R_Token Token; typedef sapt::mmTSA::tree_iterator iter; typedef boost::unordered_map,size_t> phrase_counter_t; #define CACHING_THRESHOLD 1000 sapt::mmTtrack T; // token tracks sapt::TokenIndex V; // vocabs sapt::mmTSA I; // suffix arrays void interpret_args(int ac, char* av[]); string bname; bool echo; int main(int argc, char* argv[]) { interpret_args(argc,argv); T.open(bname+".mct"); V.open(bname+".tdx"); V.iniReverseIndex(); I.open(bname+".sfa",&T); string line; while (getline(cin,line)) { vector phr; V.fillIdSeq(line,phr); TSA::tree_iterator m(&I); size_t i = 0; while (i < phr.size() && m.extend(phr[i])) ++i; if (echo) cout << line << ": "; if (i < phr.size()) cout << 0 << endl; else cout << m.rawCnt() << endl; } exit(0); } void interpret_args(int ac, char* av[]) { namespace po=boost::program_options; po::variables_map vm; po::options_description o("Options"); po::options_description h("Hidden Options"); po::positional_options_description a; o.add_options() ("help,h", "print this message") ("echo,e", po::bool_switch(&echo), "repeat lookup phrases") ; h.add_options() ("bname", po::value(&bname), "base name") ; a.add("bname",1); get_options(ac,av,h.add(o),a,vm); }