""" korean_rules.py Pure-Python, deterministic Korean grammar rule engine. No ML. Uses Unicode Hangul decomposition for batchim detection. """ HANGUL_BASE = 0xAC00 INITIAL_COUNT = 21 * 28 # 588 per initial MEDIAL_COUNT = 28 FINALS = [ None,'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ', 'ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ', 'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ', 'ㅋ','ㅌ','ㅍ','ㅎ', ] RIEUL = 'ㄹ' def _last_hangul(word): for ch in reversed(word): if '\uAC00' <= ch <= '\uD7A3': return ch return '' def _batchim(syl): if not syl or not ('\uAC00' <= syl <= '\uD7A3'): return None return FINALS[(ord(syl) - HANGUL_BASE) % 28] def has_batchim(word): return _batchim(_last_hangul(word)) is not None def has_batchim_no_rieul(word): b = _batchim(_last_hangul(word)) return b is not None and b != RIEUL class KoreanRuleEngine: # ── Particles ───────────────────────────────────────────────────────────── def get_topic_marker(self, noun): return '은' if has_batchim_no_rieul(noun) else '는' def get_subject_marker(self, noun): return '이' if has_batchim_no_rieul(noun) else '가' def get_object_marker(self, noun): return '을' if has_batchim(noun) else '를' def get_copula(self, noun): return '이에요' if has_batchim(noun) else '예요' def get_negative_marker(self, noun): """Full negative copula: '이 아니에요' or '가 아니에요'.""" m = '이' if has_batchim_no_rieul(noun) else '가' return f'{m} 아니에요' def attach_topic_marker(self, noun): return noun + self.get_topic_marker(noun) def attach_subject_marker(self, noun): return noun + self.get_subject_marker(noun) def attach_object_marker(self, noun): return noun + self.get_object_marker(noun) def attach_copula(self, noun): return noun + self.get_copula(noun) def attach_negative_copula(self, noun): return noun + self.get_negative_marker(noun) # ── Indirect quotation ──────────────────────────────────────────────────── def conjugate_indirect_quote(self, verb_stem, form, tense='present', is_adjective=False): """ form: statement | command | neg_command | request_me | request_other | question | suggestion tense: past | present | future is_adjective: True for adjectives/있다/없다 — uses plain +다고 in present """ s = verb_stem if form == 'statement': if tense == 'past': suffix = '았' if self._needs_a(s) else '었' return s + suffix + '다고' if tense == 'future': return (self._attach_batchim(s, 'ㄹ') + ' 거라고' if not has_batchim(s) else s + '을 거라고') # present if is_adjective: return s + '다고' # adjective/있다/없다: stem+다고 return (self._attach_batchim(s, 'ㄴ') + '다고' if not has_batchim(s) else s + '는다고') if form == 'command': return s + ('라고' if not has_batchim(s) else '으라고') if form == 'neg_command': return s + '지 말라고' if form == 'request_me': return s + self._vowel(s) + ' 달라고' if form == 'request_other': return s + self._vowel(s) + ' 주라고' if form == 'question': return self._drop_rieul(s) + '냐고' if form == 'suggestion': return s + '자고' return s + '다고' def conjugate_regret(self, verb_stem, negative=False): if negative: return verb_stem + '지 말 걸 그랬다' if not has_batchim(verb_stem): return self._attach_batchim(verb_stem, 'ㄹ') + ' 걸 그랬다' return verb_stem + '을 걸 그랬다' # ── Validation ──────────────────────────────────────────────────────────── def validate_token_order(self, submitted, correct): return list(submitted) == list(correct) def validate_particle(self, word, chosen, ptype): fn = { 'topic': self.get_topic_marker, 'subject': self.get_subject_marker, 'object': self.get_object_marker, 'copula': self.get_copula, 'negative': self.get_negative_marker, }.get(ptype) return fn is not None and chosen == fn(word) def get_hint(self, word, ptype): syl = _last_hangul(word) b = _batchim(syl) if syl else None desc = f"ends with consonant '{b}'" if b else "ends with a vowel sound" ans = { 'topic': '은' if (b and b != RIEUL) else '는', 'subject': '이' if (b and b != RIEUL) else '가', 'object': '을' if b else '를', 'copula': '이에요' if b else '예요', 'negative': ('이 아니에요' if (b and b != RIEUL) else '가 아니에요'), }.get(ptype, '?') return f"'{word}' {desc} → use {ans}" # ── Internal ────────────────────────────────────────────────────────────── def _needs_a(self, stem): syl = _last_hangul(stem) if not syl: return False medial = ((ord(syl) - HANGUL_BASE) // 28) % 21 return medial in (0, 8) def _vowel(self, stem): return '아' if self._needs_a(stem) else '어' def _drop_rieul(self, stem): syl = _last_hangul(stem) if syl and _batchim(syl) == RIEUL: code = ord(syl) - HANGUL_BASE initial = code // INITIAL_COUNT medial = (code % INITIAL_COUNT) // 28 return stem[:-1] + chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28) return stem def _attach_batchim(self, stem, jamo): """ Attach a jamo final consonant to the last syllable of stem. e.g. '가' + 'ㄴ' → '간', '가' + 'ㄹ' → '갈' Only works when last syllable has no existing batchim. """ FINAL_IDX = {f: i for i, f in enumerate(FINALS) if f} idx = FINAL_IDX.get(jamo) if idx is None: return stem + jamo # fallback: just concatenate syl = _last_hangul(stem) if not syl or _batchim(syl) is not None: return stem + jamo # already has batchim code = ord(syl) - HANGUL_BASE initial = code // INITIAL_COUNT medial = (code % INITIAL_COUNT) // 28 new_syl = chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28 + idx) return stem[:-1] + new_syl rule_engine = KoreanRuleEngine()