File size: 7,247 Bytes
b01dc2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""
korean_rules.py
Pure-Python, deterministic Korean grammar rule engine.
No ML. Uses Unicode Hangul decomposition for batchim detection.
"""

HANGUL_BASE   = 0xAC00
INITIAL_COUNT = 21 * 28   # 588 per initial
MEDIAL_COUNT  = 28

FINALS = [
    None,'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ',
    'ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ',
    'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ',
    'ㅋ','ㅌ','ㅍ','ㅎ',
]
RIEUL = 'ㄹ'


def _last_hangul(word):
    for ch in reversed(word):
        if '\uAC00' <= ch <= '\uD7A3':
            return ch
    return ''

def _batchim(syl):
    if not syl or not ('\uAC00' <= syl <= '\uD7A3'):
        return None
    return FINALS[(ord(syl) - HANGUL_BASE) % 28]

def has_batchim(word):
    return _batchim(_last_hangul(word)) is not None

def has_batchim_no_rieul(word):
    b = _batchim(_last_hangul(word))
    return b is not None and b != RIEUL


class KoreanRuleEngine:

    # ── Particles ─────────────────────────────────────────────────────────────

    def get_topic_marker(self, noun):
        return '은' if has_batchim_no_rieul(noun) else '는'

    def get_subject_marker(self, noun):
        return '이' if has_batchim_no_rieul(noun) else '가'

    def get_object_marker(self, noun):
        return '을' if has_batchim(noun) else '를'

    def get_copula(self, noun):
        return '이에요' if has_batchim(noun) else '예요'

    def get_negative_marker(self, noun):
        """Full negative copula: '이 아니에요' or '가 아니에요'."""
        m = '이' if has_batchim_no_rieul(noun) else '가'
        return f'{m} 아니에요'

    def attach_topic_marker(self, noun):
        return noun + self.get_topic_marker(noun)

    def attach_subject_marker(self, noun):
        return noun + self.get_subject_marker(noun)

    def attach_object_marker(self, noun):
        return noun + self.get_object_marker(noun)

    def attach_copula(self, noun):
        return noun + self.get_copula(noun)

    def attach_negative_copula(self, noun):
        return noun + self.get_negative_marker(noun)

    # ── Indirect quotation ────────────────────────────────────────────────────

    def conjugate_indirect_quote(self, verb_stem, form, tense='present',
                                  is_adjective=False):
        """
        form: statement | command | neg_command |
              request_me | request_other | question | suggestion
        tense: past | present | future
        is_adjective: True for adjectives/있다/없다 — uses plain +다고 in present
        """
        s = verb_stem
        if form == 'statement':
            if tense == 'past':
                suffix = '았' if self._needs_a(s) else '었'
                return s + suffix + '다고'
            if tense == 'future':
                return (self._attach_batchim(s, 'ㄹ') + ' 거라고'
                        if not has_batchim(s) else s + '을 거라고')
            # present
            if is_adjective:
                return s + '다고'   # adjective/있다/없다: stem+다고
            return (self._attach_batchim(s, 'ㄴ') + '다고'
                    if not has_batchim(s) else s + '는다고')
        if form == 'command':
            return s + ('라고' if not has_batchim(s) else '으라고')
        if form == 'neg_command':
            return s + '지 말라고'
        if form == 'request_me':
            return s + self._vowel(s) + ' 달라고'
        if form == 'request_other':
            return s + self._vowel(s) + ' 주라고'
        if form == 'question':
            return self._drop_rieul(s) + '냐고'
        if form == 'suggestion':
            return s + '자고'
        return s + '다고'

    def conjugate_regret(self, verb_stem, negative=False):
        if negative:
            return verb_stem + '지 말 걸 그랬다'
        if not has_batchim(verb_stem):
            return self._attach_batchim(verb_stem, 'ㄹ') + ' 걸 그랬다'
        return verb_stem + '을 걸 그랬다'

    # ── Validation ────────────────────────────────────────────────────────────

    def validate_token_order(self, submitted, correct):
        return list(submitted) == list(correct)

    def validate_particle(self, word, chosen, ptype):
        fn = {
            'topic':    self.get_topic_marker,
            'subject':  self.get_subject_marker,
            'object':   self.get_object_marker,
            'copula':   self.get_copula,
            'negative': self.get_negative_marker,
        }.get(ptype)
        return fn is not None and chosen == fn(word)

    def get_hint(self, word, ptype):
        syl  = _last_hangul(word)
        b    = _batchim(syl) if syl else None
        desc = f"ends with consonant '{b}'" if b else "ends with a vowel sound"
        ans  = {
            'topic':    '은' if (b and b != RIEUL) else '는',
            'subject':  '이' if (b and b != RIEUL) else '가',
            'object':   '을' if b else '를',
            'copula':   '이에요' if b else '예요',
            'negative': ('이 아니에요' if (b and b != RIEUL) else '가 아니에요'),
        }.get(ptype, '?')
        return f"'{word}' {desc} → use {ans}"

    # ── Internal ──────────────────────────────────────────────────────────────

    def _needs_a(self, stem):
        syl = _last_hangul(stem)
        if not syl:
            return False
        medial = ((ord(syl) - HANGUL_BASE) // 28) % 21
        return medial in (0, 8)

    def _vowel(self, stem):
        return '아' if self._needs_a(stem) else '어'

    def _drop_rieul(self, stem):
        syl = _last_hangul(stem)
        if syl and _batchim(syl) == RIEUL:
            code    = ord(syl) - HANGUL_BASE
            initial = code // INITIAL_COUNT
            medial  = (code % INITIAL_COUNT) // 28
            return stem[:-1] + chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28)
        return stem

    def _attach_batchim(self, stem, jamo):
        """
        Attach a jamo final consonant to the last syllable of stem.
        e.g. '가' + 'ㄴ' → '간',  '가' + 'ㄹ' → '갈'
        Only works when last syllable has no existing batchim.
        """
        FINAL_IDX = {f: i for i, f in enumerate(FINALS) if f}
        idx = FINAL_IDX.get(jamo)
        if idx is None:
            return stem + jamo   # fallback: just concatenate
        syl = _last_hangul(stem)
        if not syl or _batchim(syl) is not None:
            return stem + jamo   # already has batchim
        code    = ord(syl) - HANGUL_BASE
        initial = code // INITIAL_COUNT
        medial  = (code % INITIAL_COUNT) // 28
        new_syl = chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28 + idx)
        return stem[:-1] + new_syl


rule_engine = KoreanRuleEngine()