| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #ifndef PHRASE_EXTRACT_H |
| #define PHRASE_EXTRACT_H |
|
|
| #include <fstream> |
| #include <map> |
| #include <set> |
| #include <string> |
| #include <vector> |
|
|
| #include "../phrase-extract/SentenceAlignment.h" |
|
|
| #include "typedefs.h" |
|
|
|
|
| |
|
|
| |
| typedef std::pair<int, int> HPhraseVertex; |
|
|
| |
| |
| typedef std::pair<HPhraseVertex, HPhraseVertex> HPhrase; |
|
|
| |
| typedef std::vector<HPhrase> HPhraseVector; |
|
|
| |
| |
| typedef std::map<int, std::set<int> > HSentenceVertices; |
|
|
| |
| typedef std::pair<PhrasePairsLossyCounter::error_t, PhrasePairsLossyCounter::support_t> params_pair_t; |
| |
| typedef std::vector<PhrasePairsLossyCounter *> PhrasePairsLossyCountersVector; |
|
|
| |
| enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO}; |
| enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN}; |
|
|
| struct LossyCounterInstance { |
| |
| |
| size_t outputMass; |
| size_t outputSize; |
| |
| PhrasePairsLossyCounter lossyCounter; |
|
|
| LossyCounterInstance(PhrasePairsLossyCounter::error_t error, PhrasePairsLossyCounter::support_t support): outputMass(0), outputSize(0), lossyCounter(error, support) {} |
| }; |
|
|
| |
| typedef std::vector<LossyCounterInstance *> LossyCountersVector; |
|
|
| struct OutputProcessor { |
| virtual void operator() (const std::string& srcPhrase, const std::string& tgtPhrase, const std::string& orientationInfo, const alignment_t& alignment, const size_t frequency, int mode) = 0; |
| }; |
|
|
|
|
| |
|
|
| |
| REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool, |
| int, int, int, int, int, int, int, |
| bool (*)(int, int), bool (*)(int, int)); |
|
|
| REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool, |
| int, int, int, int, int, int, int, |
| bool (*)(int, int), bool (*)(int, int), |
| const HSentenceVertices &, const HSentenceVertices &); |
|
|
| REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool, |
| int, int, int, int, int, int, int, |
| bool (*)(int, int), bool (*)(int, int), |
| const HSentenceVertices &, const HSentenceVertices &, |
| const HSentenceVertices &, const HSentenceVertices &, |
| REO_POS); |
|
|
| void insertVertex(HSentenceVertices &, int, int); |
| void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, int, int, int, int); |
|
|
| std::string getOrientString(REO_POS, REO_MODEL_TYPE); |
|
|
| bool ge(int, int); |
| bool le(int, int); |
| bool lt(int, int); |
| bool isAligned (SentenceAlignment &, int, int); |
| void extract(SentenceAlignment &); |
|
|
| |
| void addPhrase(SentenceAlignment &, int, int, int, int, std::string &); |
|
|
| |
| void readInput(std::istream& eFile, std::istream& fFile, std::istream& aFile); |
| void processOutput(OutputProcessor& processor); |
| void printStats(void); |
|
|
|
|
| |
|
|
| extern bool allModelsOutputFlag; |
|
|
| |
| extern bool wordModel; |
| extern REO_MODEL_TYPE wordType; |
| extern bool phraseModel; |
| extern REO_MODEL_TYPE phraseType; |
| extern bool hierModel; |
| extern REO_MODEL_TYPE hierType; |
|
|
| extern int maxPhraseLength; |
| extern bool translationFlag; |
| extern bool orientationFlag; |
| extern bool sortedOutput; |
|
|
| extern LossyCountersVector lossyCounters; |
|
|
| #ifdef GET_COUNTS_ONLY |
| extern std::vector<size_t> phrasePairsCounters; |
| #endif |
|
|
| #endif |
|
|