| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #pragma once |
| #ifndef EXTRACT_GHKM_NODE_H_ |
| #define EXTRACT_GHKM_NODE_H_ |
|
|
| #include <cassert> |
| #include <iterator> |
| #include <string> |
| #include <vector> |
|
|
| #include "Span.h" |
|
|
| namespace MosesTraining |
| { |
| namespace Syntax |
| { |
| namespace GHKM |
| { |
|
|
| class Subgraph; |
|
|
| enum NodeType { SOURCE, TARGET, TREE }; |
|
|
| class Node |
| { |
| public: |
| Node(const std::string &label, NodeType type) |
| : m_label(label) |
| , m_type(type) |
| , m_pcfgScore(0.0f) {} |
|
|
| ~Node(); |
|
|
| const std::string &GetLabel() const { |
| return m_label; |
| } |
| NodeType GetType() const { |
| return m_type; |
| } |
| const std::vector<Node*> &GetChildren() const { |
| return m_children; |
| } |
| const std::vector<Node*> &GetParents() const { |
| return m_parents; |
| } |
| float GetPcfgScore() const { |
| return m_pcfgScore; |
| } |
| const Span &GetSpan() const { |
| return m_span; |
| } |
| const Span &GetComplementSpan() const { |
| return m_complementSpan; |
| } |
| const std::vector<const Subgraph*> &GetRules() const { |
| return m_rules; |
| } |
|
|
| void SetChildren(const std::vector<Node*> &c) { |
| m_children = c; |
| } |
| void SetParents(const std::vector<Node*> &p) { |
| m_parents = p; |
| } |
| void SetPcfgScore(float s) { |
| m_pcfgScore = s; |
| } |
| void SetSpan(const Span &s) { |
| m_span = s; |
| } |
| void SetComplementSpan(const Span &cs) { |
| m_complementSpan = cs; |
| } |
|
|
| void AddChild(Node *c) { |
| m_children.push_back(c); |
| } |
| void AddParent(Node *p) { |
| m_parents.push_back(p); |
| } |
| void AddRule(const Subgraph *s) { |
| m_rules.push_back(s); |
| } |
|
|
| bool IsSink() const { |
| return m_children.empty(); |
| } |
| bool IsPreterminal() const; |
|
|
| void PropagateIndex(int); |
|
|
| std::vector<std::string> GetTargetWords() const; |
|
|
| |
| |
| |
| template<typename OutputIterator> |
| void GetTreeAncestors(OutputIterator result, bool includeSelf=false); |
|
|
| |
| |
| template<typename InputIterator> |
| static Node *LowestCommonAncestor(InputIterator first, InputIterator last); |
|
|
| private: |
| |
| Node(const Node &); |
| Node &operator=(const Node &); |
|
|
| void GetTargetWords(std::vector<std::string> &) const; |
|
|
| std::string m_label; |
| NodeType m_type; |
| std::vector<Node*> m_children; |
| std::vector<Node*> m_parents; |
| float m_pcfgScore; |
| Span m_span; |
| Span m_complementSpan; |
| std::vector<const Subgraph*> m_rules; |
| }; |
|
|
| template<typename OutputIterator> |
| void Node::GetTreeAncestors(OutputIterator result, bool includeSelf) |
| { |
| |
| assert(m_type == TARGET || m_type == TREE); |
|
|
| if (includeSelf) { |
| *result++ = this; |
| } |
|
|
| Node *ancestor = !(m_parents.empty()) ? m_parents[0] : 0; |
| while (ancestor != 0) { |
| *result++ = ancestor; |
| ancestor = !(ancestor->m_parents.empty()) ? ancestor->m_parents[0] : 0; |
| } |
| } |
|
|
| template<typename InputIterator> |
| Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last) |
| { |
| |
| if (first == last) { |
| return 0; |
| } |
|
|
| |
| |
| InputIterator p = first; |
| Node *lca = *p++; |
| for (; p != last; ++p) { |
| Node *node = *p; |
| assert(node->m_type != SOURCE); |
| if (node != lca) { |
| lca = 0; |
| } |
| } |
| if (lca) { |
| return lca; |
| } |
|
|
| |
| size_t minPathLength = 0; |
| std::vector<std::vector<Node *> > paths; |
| for (p = first; p != last; ++p) { |
| paths.resize(paths.size()+1); |
| (*p)->GetTreeAncestors(std::back_inserter(paths.back()), true); |
| size_t pathLength = paths.back().size(); |
| assert(pathLength > 0); |
| if (paths.size() == 1 || pathLength < minPathLength) { |
| minPathLength = pathLength; |
| } |
| } |
|
|
| |
| |
| for (size_t i = 0; i < minPathLength; ++i) { |
| bool match = true; |
| for (size_t j = 0; j < paths.size(); ++j) { |
| size_t index = paths[j].size() - minPathLength + i; |
| assert(index >= 0); |
| assert(index < paths[j].size()); |
| if (j == 0) { |
| lca = paths[j][index]; |
| assert(lca); |
| } else if (lca != paths[j][index]) { |
| match = false; |
| break; |
| } |
| } |
| if (match) { |
| return lca; |
| } |
| } |
|
|
| |
| assert(false); |
| return 0; |
| } |
|
|
| } |
| } |
| } |
|
|
| #endif |
|
|