| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #pragma once |
|
|
| #include <map> |
| #include <ostream> |
| #include <set> |
| #include <string> |
| #include <vector> |
|
|
| #include "OutputFileStream.h" |
| #include "SyntaxTree.h" |
|
|
| #include "syntax-common/tool.h" |
|
|
| namespace MosesTraining |
| { |
| namespace Syntax |
| { |
| namespace GHKM |
| { |
|
|
| struct Options; |
|
|
| class ExtractGHKM : public Tool |
| { |
| public: |
| ExtractGHKM() : Tool("extract-ghkm") {} |
|
|
| virtual int Main(int argc, char *argv[]); |
|
|
| private: |
| void RecordTreeLabels(const SyntaxTree &, std::set<std::string> &); |
| void CollectWordLabelCounts(SyntaxTree &, |
| const Options &, |
| std::map<std::string, int> &, |
| std::map<std::string, std::string> &); |
| void WriteUnknownWordLabel(const std::map<std::string, int> &, |
| const std::map<std::string, std::string> &, |
| const Options &, |
| std::ostream &, |
| bool writeCounts=false) const; |
| void WriteUnknownWordSoftMatches(const std::set<std::string> &, |
| std::ostream &) const; |
| void WriteGlueGrammar(const std::set<std::string> &, |
| const std::map<std::string, int> &, |
| const std::map<std::string,size_t> &, |
| const Options &, |
| std::ostream &) const; |
| void WriteSourceLabelSet(const std::map<std::string,size_t> &, |
| std::ostream &) const; |
| void StripBitParLabels(const std::set<std::string> &labelSet, |
| const std::map<std::string, int> &topLabelSet, |
| std::set<std::string> &outLabelSet, |
| std::map<std::string, int> &outTopLabelSet) const; |
|
|
| std::vector<std::string> ReadTokens(const std::string &) const; |
| std::vector<std::string> ReadTokens(const SyntaxTree &root) const; |
|
|
| void ProcessOptions(int, char *[], Options &) const; |
| }; |
|
|
| } |
| } |
| } |
|
|