File size: 5,715 Bytes
fd49381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#include "SymbolRangeCalculator.h"

namespace Moses
{
namespace Syntax
{
namespace S2T
{

void SymbolRangeCalculator::Calc(const PatternApplicationKey &key,
                                 int spanStart, int spanEnd,
                                 std::vector<SymbolRange> &ranges)
{
  FillInTerminalRanges(key, ranges);
  FillInAuxSymbolInfo(ranges);
  FillInGapRanges(key, spanStart, spanEnd, ranges);
}

// Fill in ranges for terminals and set ranges to -1 for non-terminals.
void SymbolRangeCalculator::FillInTerminalRanges(
  const PatternApplicationKey &key, std::vector<SymbolRange> &ranges)
{
  ranges.resize(key.size());
  for (std::size_t i = 0; i < key.size(); ++i) {
    const PatternApplicationTrie *patNode = key[i];
    if (patNode->IsTerminalNode()) {
      ranges[i].minStart = ranges[i].maxStart = patNode->m_start;
      ranges[i].minEnd = ranges[i].maxEnd = patNode->m_end;
    } else {
      ranges[i].minStart = ranges[i].maxStart = -1;
      ranges[i].minEnd = ranges[i].maxEnd = -1;
    }
  }
}

void SymbolRangeCalculator::FillInAuxSymbolInfo(
  const std::vector<SymbolRange> &ranges)
{
  m_auxSymbolInfo.resize(ranges.size());

  // Forward pass: set distanceToPrevTerminal.
  int distanceToPrevTerminal = -1;
  for (std::size_t i = 0; i < ranges.size(); ++i) {
    const SymbolRange &range = ranges[i];
    AuxSymbolInfo &auxInfo = m_auxSymbolInfo[i];
    if (range.minStart != -1) {
      // Symbol i is a terminal.
      assert(range.maxStart == range.minStart);
      distanceToPrevTerminal = 1;
      // Distances are not used for terminals so set auxInfo value to -1.
      auxInfo.distanceToPrevTerminal = -1;
    } else if (distanceToPrevTerminal == -1) {
      // Symbol i is a non-terminal and there are no preceding terminals.
      auxInfo.distanceToPrevTerminal = -1;
    } else {
      // Symbol i is a non-terminal and there is a preceding terminal.
      auxInfo.distanceToPrevTerminal = distanceToPrevTerminal++;
    }
  }

  // Backward pass: set distanceToNextTerminal
  int distanceToNextTerminal = -1;
  for (std::size_t j = ranges.size(); j > 0; --j) {
    std::size_t i = j-1;
    const SymbolRange &range = ranges[i];
    AuxSymbolInfo &auxInfo = m_auxSymbolInfo[i];
    if (range.minStart != -1) {
      // Symbol i is a terminal.
      assert(range.maxStart == range.minStart);
      distanceToNextTerminal = 1;
      // Distances are not used for terminals so set auxInfo value to -1.
      auxInfo.distanceToNextTerminal = -1;
    } else if (distanceToNextTerminal == -1) {
      // Symbol i is a non-terminal and there are no succeeding terminals.
      auxInfo.distanceToNextTerminal = -1;
    } else {
      // Symbol i is a non-terminal and there is a succeeding terminal.
      auxInfo.distanceToNextTerminal = distanceToNextTerminal++;
    }
  }
}

void SymbolRangeCalculator::FillInGapRanges(const PatternApplicationKey &key,
    int spanStart, int spanEnd,
    std::vector<SymbolRange> &ranges)
{
  for (std::size_t i = 0; i < key.size(); ++i) {
    const PatternApplicationTrie *patNode = key[i];

    if (patNode->IsTerminalNode()) {
      continue;
    }

    SymbolRange &range = ranges[i];
    AuxSymbolInfo &auxInfo = m_auxSymbolInfo[i];

    // Determine minimum start position.
    if (auxInfo.distanceToPrevTerminal == -1) {
      // There are no preceding terminals in pattern.
      range.minStart = spanStart + i;
    } else {
      // There is at least one preceeding terminal in the pattern.
      int j = i - auxInfo.distanceToPrevTerminal;
      assert(ranges[j].minEnd == ranges[j].maxEnd);
      range.minStart = ranges[j].maxEnd + auxInfo.distanceToPrevTerminal;
    }

    // Determine maximum start position.
    if (i == 0) {
      // Gap is leftmost symbol in pattern.
      range.maxStart = spanStart;
    } else if (auxInfo.distanceToPrevTerminal == 1) {
      // Gap follows terminal so start position is fixed.
      range.maxStart = ranges[i-1].maxEnd + 1;
    } else if (auxInfo.distanceToNextTerminal == -1) {
      // There are no succeeding terminals in the pattern.
      int numFollowingGaps = (ranges.size()-1) - i;
      range.maxStart = spanEnd - numFollowingGaps;
    } else {
      // There is at least one succeeding terminal in the pattern.
      int j = i + auxInfo.distanceToNextTerminal;
      range.maxStart = ranges[j].minStart - auxInfo.distanceToNextTerminal;
    }

    // Determine minimum end position.
    if (i+1 == key.size()) {
      // Gap is rightmost symbol in pattern.
      range.minEnd = spanEnd;
    } else if (auxInfo.distanceToNextTerminal == 1) {
      // Gap immediately precedes terminal.
      range.minEnd = ranges[i+1].minStart - 1;
    } else if (auxInfo.distanceToPrevTerminal == -1) {
      // There are no preceding terminals in pattern.
      range.minEnd = spanStart + i;
    } else {
      // There is at least one preceeding terminal in the pattern.
      int j = i - auxInfo.distanceToPrevTerminal;
      assert(ranges[j].minEnd == ranges[j].maxEnd);
      range.minEnd = ranges[j].maxEnd + auxInfo.distanceToPrevTerminal;
    }

    // Determine maximum end position.
    if (i+1 == key.size()) {
      // Gap is rightmost symbol in pattern.
      range.maxEnd = spanEnd;
    } else if (auxInfo.distanceToNextTerminal == -1) {
      // There are no succeeding terminals in the pattern.
      int numFollowingGaps = (ranges.size()-1) - i;
      range.maxEnd = spanEnd - numFollowingGaps;
    } else {
      // There is at least one succeeding terminal in the pattern.
      int j = i + auxInfo.distanceToNextTerminal;
      range.maxEnd = ranges[j].minStart - auxInfo.distanceToNextTerminal;
    }
  }
}

}  // namespace S2T
}  // namespace Syntax
}  // namespace Moses