Spaces:
Running
Running
| """ | |
| korean_rules.py | |
| Pure-Python, deterministic Korean grammar rule engine. | |
| No ML. Uses Unicode Hangul decomposition for batchim detection. | |
| """ | |
| HANGUL_BASE = 0xAC00 | |
| INITIAL_COUNT = 21 * 28 # 588 per initial | |
| MEDIAL_COUNT = 28 | |
| FINALS = [ | |
| None,'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ', | |
| 'ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ', | |
| 'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ', | |
| 'ㅋ','ㅌ','ㅍ','ㅎ', | |
| ] | |
| RIEUL = 'ㄹ' | |
| def _last_hangul(word): | |
| for ch in reversed(word): | |
| if '\uAC00' <= ch <= '\uD7A3': | |
| return ch | |
| return '' | |
| def _batchim(syl): | |
| if not syl or not ('\uAC00' <= syl <= '\uD7A3'): | |
| return None | |
| return FINALS[(ord(syl) - HANGUL_BASE) % 28] | |
| def has_batchim(word): | |
| return _batchim(_last_hangul(word)) is not None | |
| def has_batchim_no_rieul(word): | |
| b = _batchim(_last_hangul(word)) | |
| return b is not None and b != RIEUL | |
| class KoreanRuleEngine: | |
| # ── Particles ───────────────────────────────────────────────────────────── | |
| def get_topic_marker(self, noun): | |
| return '은' if has_batchim_no_rieul(noun) else '는' | |
| def get_subject_marker(self, noun): | |
| return '이' if has_batchim_no_rieul(noun) else '가' | |
| def get_object_marker(self, noun): | |
| return '을' if has_batchim(noun) else '를' | |
| def get_copula(self, noun): | |
| return '이에요' if has_batchim(noun) else '예요' | |
| def get_negative_marker(self, noun): | |
| """Full negative copula: '이 아니에요' or '가 아니에요'.""" | |
| m = '이' if has_batchim_no_rieul(noun) else '가' | |
| return f'{m} 아니에요' | |
| def attach_topic_marker(self, noun): | |
| return noun + self.get_topic_marker(noun) | |
| def attach_subject_marker(self, noun): | |
| return noun + self.get_subject_marker(noun) | |
| def attach_object_marker(self, noun): | |
| return noun + self.get_object_marker(noun) | |
| def attach_copula(self, noun): | |
| return noun + self.get_copula(noun) | |
| def attach_negative_copula(self, noun): | |
| return noun + self.get_negative_marker(noun) | |
| # ── Indirect quotation ──────────────────────────────────────────────────── | |
| def conjugate_indirect_quote(self, verb_stem, form, tense='present', | |
| is_adjective=False): | |
| """ | |
| form: statement | command | neg_command | | |
| request_me | request_other | question | suggestion | |
| tense: past | present | future | |
| is_adjective: True for adjectives/있다/없다 — uses plain +다고 in present | |
| """ | |
| s = verb_stem | |
| if form == 'statement': | |
| if tense == 'past': | |
| suffix = '았' if self._needs_a(s) else '었' | |
| return s + suffix + '다고' | |
| if tense == 'future': | |
| return (self._attach_batchim(s, 'ㄹ') + ' 거라고' | |
| if not has_batchim(s) else s + '을 거라고') | |
| # present | |
| if is_adjective: | |
| return s + '다고' # adjective/있다/없다: stem+다고 | |
| return (self._attach_batchim(s, 'ㄴ') + '다고' | |
| if not has_batchim(s) else s + '는다고') | |
| if form == 'command': | |
| return s + ('라고' if not has_batchim(s) else '으라고') | |
| if form == 'neg_command': | |
| return s + '지 말라고' | |
| if form == 'request_me': | |
| return s + self._vowel(s) + ' 달라고' | |
| if form == 'request_other': | |
| return s + self._vowel(s) + ' 주라고' | |
| if form == 'question': | |
| return self._drop_rieul(s) + '냐고' | |
| if form == 'suggestion': | |
| return s + '자고' | |
| return s + '다고' | |
| def conjugate_regret(self, verb_stem, negative=False): | |
| if negative: | |
| return verb_stem + '지 말 걸 그랬다' | |
| if not has_batchim(verb_stem): | |
| return self._attach_batchim(verb_stem, 'ㄹ') + ' 걸 그랬다' | |
| return verb_stem + '을 걸 그랬다' | |
| # ── Validation ──────────────────────────────────────────────────────────── | |
| def validate_token_order(self, submitted, correct): | |
| return list(submitted) == list(correct) | |
| def validate_particle(self, word, chosen, ptype): | |
| fn = { | |
| 'topic': self.get_topic_marker, | |
| 'subject': self.get_subject_marker, | |
| 'object': self.get_object_marker, | |
| 'copula': self.get_copula, | |
| 'negative': self.get_negative_marker, | |
| }.get(ptype) | |
| return fn is not None and chosen == fn(word) | |
| def get_hint(self, word, ptype): | |
| syl = _last_hangul(word) | |
| b = _batchim(syl) if syl else None | |
| desc = f"ends with consonant '{b}'" if b else "ends with a vowel sound" | |
| ans = { | |
| 'topic': '은' if (b and b != RIEUL) else '는', | |
| 'subject': '이' if (b and b != RIEUL) else '가', | |
| 'object': '을' if b else '를', | |
| 'copula': '이에요' if b else '예요', | |
| 'negative': ('이 아니에요' if (b and b != RIEUL) else '가 아니에요'), | |
| }.get(ptype, '?') | |
| return f"'{word}' {desc} → use {ans}" | |
| # ── Internal ────────────────────────────────────────────────────────────── | |
| def _needs_a(self, stem): | |
| syl = _last_hangul(stem) | |
| if not syl: | |
| return False | |
| medial = ((ord(syl) - HANGUL_BASE) // 28) % 21 | |
| return medial in (0, 8) | |
| def _vowel(self, stem): | |
| return '아' if self._needs_a(stem) else '어' | |
| def _drop_rieul(self, stem): | |
| syl = _last_hangul(stem) | |
| if syl and _batchim(syl) == RIEUL: | |
| code = ord(syl) - HANGUL_BASE | |
| initial = code // INITIAL_COUNT | |
| medial = (code % INITIAL_COUNT) // 28 | |
| return stem[:-1] + chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28) | |
| return stem | |
| def _attach_batchim(self, stem, jamo): | |
| """ | |
| Attach a jamo final consonant to the last syllable of stem. | |
| e.g. '가' + 'ㄴ' → '간', '가' + 'ㄹ' → '갈' | |
| Only works when last syllable has no existing batchim. | |
| """ | |
| FINAL_IDX = {f: i for i, f in enumerate(FINALS) if f} | |
| idx = FINAL_IDX.get(jamo) | |
| if idx is None: | |
| return stem + jamo # fallback: just concatenate | |
| syl = _last_hangul(stem) | |
| if not syl or _batchim(syl) is not None: | |
| return stem + jamo # already has batchim | |
| code = ord(syl) - HANGUL_BASE | |
| initial = code // INITIAL_COUNT | |
| medial = (code % INITIAL_COUNT) // 28 | |
| new_syl = chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28 + idx) | |
| return stem[:-1] + new_syl | |
| rule_engine = KoreanRuleEngine() |