hindi-sindhi-docker
/
mosesdecoder
/moses
/TranslationModel
/UG
/generic
/stringdist
/ug_stringdist.h
| //-*- c++ -*- | |
| // string distance measures | |
| // Code by Ulrich Germann | |
| namespace stringdist | |
| { | |
| float | |
| levenshtein(UChar const* a, size_t const lenA, | |
| UChar const* b, size_t const lenB); | |
| UErrorCode strip_accents(UnicodeString & trg); | |
| float | |
| fillAlignmentMatrix(UChar const* a, size_t const lenA, | |
| UChar const* b, size_t const lenB, | |
| std::vector<std::vector<float> > & M); | |
| class StringDiff | |
| { | |
| public: | |
| enum MATCHTYPE | |
| { | |
| same, // a and b are identical | |
| cap, // a and b differ only in capitalization | |
| flip, // two-letter flip | |
| permutation, // a and b have same letters but in different order | |
| accent, // a and b are the same basic letters, ignoring accents | |
| duplication, // a is empty | |
| insertion, // a is empty | |
| deletion, // b is empty | |
| mismatch, // none of the above | |
| noinit // not initialized | |
| }; | |
| struct Segment | |
| { | |
| static char const* elabel[]; | |
| int start_a, end_a; | |
| int start_b, end_b; | |
| MATCHTYPE match; | |
| float dist; | |
| Segment(); | |
| Segment(size_t const as, size_t const ae, | |
| size_t const bs, size_t const be, | |
| UnicodeString const& a, | |
| UnicodeString const& b); | |
| char const* label() const; | |
| }; | |
| private: | |
| UnicodeString a,b; | |
| std::vector<Segment> difflist; | |
| std::vector<int> diffcnt; | |
| public: | |
| UnicodeString const& set_a(std::string const& a); | |
| UnicodeString const& set_b(std::string const& b); | |
| UnicodeString const& get_a() const; | |
| UnicodeString const& get_b() const; | |
| StringDiff(std::string const& a, std::string const& b); | |
| StringDiff(); | |
| size_t size(); | |
| size_t align(bool force=false); // returns the levenshtein distance | |
| void showDiff(std::ostream& out); | |
| float levenshtein(); | |
| Segment const& operator[](uint32_t i) const; | |
| void fillAlignmentMatrix(std::vector<std::vector<float> > & M) const; | |
| vector<int> const& getFeatures() const; | |
| }; | |
| } | |