| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #pragma once |
| #include "tables-core.h" |
|
|
| #include <vector> |
| #include <set> |
| #include <map> |
| #include <boost/unordered_map.hpp> |
|
|
| namespace MosesTraining |
| { |
|
|
|
|
| typedef std::vector< std::set<size_t> > ALIGNMENT; |
|
|
|
|
| class ExtractionPhrasePair |
| { |
|
|
| protected: |
|
|
| typedef std::map<std::string,float> PROPERTY_VALUES; |
| typedef std::map<std::string,float>::iterator LAST_PROPERTY_VALUE; |
|
|
|
|
| bool m_isValid; |
|
|
| const PHRASE *m_phraseSource; |
| const PHRASE *m_phraseTarget; |
|
|
| float m_count; |
| float m_pcfgSum; |
|
|
| std::map<ALIGNMENT*,float> m_targetToSourceAlignments; |
| std::map<std::string, |
| std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties; |
|
|
| float m_lastCount; |
| float m_lastPcfgSum; |
| std::map<ALIGNMENT*,float>::iterator m_lastTargetToSourceAlignment; |
|
|
| public: |
|
|
| ExtractionPhrasePair( const PHRASE *phraseSource, |
| const PHRASE *phraseTarget, |
| ALIGNMENT *targetToSourceAlignment, |
| float count, float pcfgSum ); |
|
|
| ~ExtractionPhrasePair(); |
|
|
| bool Add( ALIGNMENT *targetToSourceAlignment, |
| float count, float pcfgSum ); |
|
|
| void IncrementPrevious( float count, float pcfgSum ); |
|
|
| bool Matches( const PHRASE *otherPhraseSource, |
| const PHRASE *otherPhraseTarget, |
| ALIGNMENT *otherTargetToSourceAlignment ) const; |
|
|
| bool Matches( const PHRASE *otherPhraseSource, |
| const PHRASE *otherPhraseTarget, |
| ALIGNMENT *otherTargetToSourceAlignment, |
| bool &sourceMatch, |
| bool &targetMatch, |
| bool &alignmentMatch ) const; |
|
|
| bool MatchesAlignment( ALIGNMENT *otherTargetToSourceAlignment ) const; |
|
|
| void Clear(); |
|
|
| bool IsValid() const { |
| return m_isValid; |
| } |
|
|
|
|
| const PHRASE *GetSource() const { |
| return m_phraseSource; |
| } |
|
|
| const PHRASE *GetTarget() const { |
| return m_phraseTarget; |
| } |
|
|
| float GetCount() const { |
| return m_count; |
| } |
|
|
| float GetPcfgScore() const { |
| return m_pcfgSum; |
| } |
|
|
| const size_t GetNumberOfProperties() const { |
| return m_properties.size(); |
| } |
|
|
| const std::map<std::string,float> *GetProperty( const std::string &key ) const { |
| std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::const_iterator iter; |
| iter = m_properties.find(key); |
| if (iter == m_properties.end()) { |
| return NULL; |
| } else { |
| return iter->second.first; |
| } |
| } |
|
|
| const ALIGNMENT *FindBestAlignmentTargetToSource() const; |
|
|
| const std::string *FindBestPropertyValue(const std::string &key) const; |
|
|
| std::string CollectAllPropertyValues(const std::string &key) const; |
|
|
| std::string CollectAllLabelsSeparateLHSAndRHS(const std::string& propertyKey, |
| std::set<std::string>& sourceLabelSet, |
| boost::unordered_map<std::string,float>& sourceLHSCounts, |
| boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& sourceRHSAndLHSJointCounts, |
| Vocabulary &vcbT) const; |
|
|
| void CollectAllPhraseOrientations(const std::string &key, |
| const std::vector<float> &orientationClassPriorsL2R, |
| const std::vector<float> &orientationClassPriorsR2L, |
| double smoothingFactor, |
| std::ostream &out) const; |
|
|
| void UpdateVocabularyFromValueTokens(const std::string& propertyKey, |
| std::set<std::string>& vocabulary) const; |
|
|
| void AddProperties(const std::string &str, float count); |
|
|
| void AddProperty(const std::string &key, const std::string &value, float count) { |
| std::map<std::string, |
| std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key); |
| if ( iter == m_properties.end() ) { |
| |
| PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES(); |
| std::pair<LAST_PROPERTY_VALUE,bool> insertedProperty = propertyValues->insert( std::pair<std::string,float>(value,count) ); |
| LAST_PROPERTY_VALUE *lastPropertyValue = new LAST_PROPERTY_VALUE(insertedProperty.first); |
| m_properties[key] = std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* >(propertyValues, lastPropertyValue); |
| } else { |
| LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second; |
| if ( (*lastPropertyValue)->first == value ) { |
| |
| (*lastPropertyValue)->second += count; |
| } else { |
| |
| |
| PROPERTY_VALUES *propertyValues = (iter->second).first; |
| std::pair<LAST_PROPERTY_VALUE,bool> insertedProperty = propertyValues->insert( std::pair<std::string,float>(value,count) ); |
| if ( !insertedProperty.second ) { |
| insertedProperty.first->second += count; |
| } |
| LAST_PROPERTY_VALUE *lastPropertyValue = new LAST_PROPERTY_VALUE(insertedProperty.first); |
| delete (iter->second).second; |
| (iter->second).second = lastPropertyValue; |
| } |
| } |
| } |
|
|
| }; |
|
|
| } |
|
|
|
|