| | #ifndef moses_Diffs_h |
| | #define moses_Diffs_h |
| |
|
| | #include <cmath> |
| |
|
| | namespace Moses |
| | { |
| |
|
| | typedef char Diff; |
| | typedef std::vector<Diff> Diffs; |
| |
|
| | template <class Sequence, class Pred> |
| | void CreateDiffRec(size_t** c, |
| | const Sequence &s1, |
| | const Sequence &s2, |
| | size_t start, |
| | size_t i, |
| | size_t j, |
| | Diffs& diffs, |
| | Pred pred) |
| | { |
| | if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) { |
| | CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred); |
| | diffs.push_back(Diff('m')); |
| | } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) { |
| | CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred); |
| | diffs.push_back(Diff('i')); |
| | } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) { |
| | CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred); |
| | diffs.push_back(Diff('d')); |
| | } |
| | } |
| |
|
| | template <class Sequence, class Pred> |
| | Diffs CreateDiff(const Sequence& s1, |
| | const Sequence& s2, |
| | Pred pred) |
| | { |
| |
|
| | Diffs diffs; |
| |
|
| | size_t n = s2.size(); |
| |
|
| | int start = 0; |
| | int m_end = s1.size() - 1; |
| | int n_end = s2.size() - 1; |
| |
|
| | while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) { |
| | diffs.push_back(Diff('m')); |
| | start++; |
| | } |
| | while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) { |
| | m_end--; |
| | n_end--; |
| | } |
| |
|
| | size_t m_new = m_end - start + 1; |
| | size_t n_new = n_end - start + 1; |
| |
|
| | size_t** c = new size_t*[m_new + 1]; |
| | for(size_t i = 0; i <= m_new; ++i) { |
| | c[i] = new size_t[n_new + 1]; |
| | c[i][0] = 0; |
| | } |
| | for(size_t j = 0; j <= n_new; ++j) |
| | c[0][j] = 0; |
| | for(size_t i = 1; i <= m_new; ++i) |
| | for(size_t j = 1; j <= n_new; ++j) |
| | if(pred(s1[i - 1 + start], s2[j - 1 + start])) |
| | c[i][j] = c[i-1][j-1] + 1; |
| | else |
| | c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j]; |
| |
|
| | CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred); |
| |
|
| | for(size_t i = 0; i <= m_new; ++i) |
| | delete[] c[i]; |
| | delete[] c; |
| |
|
| | for (size_t i = n_end + 1; i < n; ++i) |
| | diffs.push_back(Diff('m')); |
| |
|
| | return diffs; |
| | } |
| |
|
| | template <class Sequence> |
| | Diffs CreateDiff(const Sequence& s1, const Sequence& s2) |
| | { |
| | return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>()); |
| | } |
| |
|
| | template <class Sequence, class Sig, class Stats> |
| | void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) |
| | { |
| | if(sig.size() != stats.size()) |
| | throw "Signature size differs from score array size."; |
| |
|
| | size_t m = 0, d = 0, i = 0, s = 0; |
| | Diffs diff = CreateDiff(s1, s2); |
| |
|
| | for(int j = 0; j < (int)diff.size(); ++j) { |
| | if(diff[j] == 'm') |
| | m++; |
| | else if(diff[j] == 'd') { |
| | d++; |
| | int k = 0; |
| | while(j - k >= 0 && j + 1 + k < (int)diff.size() && |
| | diff[j - k] == 'd' && diff[j + 1 + k] == 'i') { |
| | d--; |
| | s++; |
| | k++; |
| | } |
| | j += k; |
| | } else if(diff[j] == 'i') |
| | i++; |
| | } |
| |
|
| | for(size_t j = 0; j < sig.size(); ++j) { |
| | switch (sig[j]) { |
| | case 'l': |
| | stats[j] += d + i + s; |
| | break; |
| | case 'm': |
| | stats[j] += m; |
| | break; |
| | case 'd': |
| | stats[j] += d; |
| | break; |
| | case 'i': |
| | stats[j] += i; |
| | break; |
| | case 's': |
| | stats[j] += s; |
| | break; |
| | case 'r': |
| | float macc = 1; |
| | if (d + i + s + m) |
| | macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m); |
| | if(macc > 0) |
| | stats[j] += log(macc); |
| | else |
| | stats[j] += log(1.0/(float)(d + i + s + m + 1)); |
| | break; |
| | } |
| | } |
| | } |
| |
|
| | } |
| |
|
| | #endif |
| |
|