KoreAI-API

Running

App Files Files Community

KoreAI-API / korean_rules.py

rairo

Create korean_rules.py

b01dc2a verified 26 days ago

raw

history blame contribute delete

7.25 kB

	"""
	korean_rules.py
	Pure-Python, deterministic Korean grammar rule engine.
	No ML. Uses Unicode Hangul decomposition for batchim detection.
	"""

	HANGUL_BASE = 0xAC00
	INITIAL_COUNT = 21 * 28 # 588 per initial
	MEDIAL_COUNT = 28

	FINALS = [
	None,'ㄱ','ㄲ','ㄳ','ㄴ','ㄵ','ㄶ','ㄷ',
	'ㄹ','ㄺ','ㄻ','ㄼ','ㄽ','ㄾ','ㄿ','ㅀ',
	'ㅁ','ㅂ','ㅄ','ㅅ','ㅆ','ㅇ','ㅈ','ㅊ',
	'ㅋ','ㅌ','ㅍ','ㅎ',
	]
	RIEUL = 'ㄹ'


	def _last_hangul(word):
	for ch in reversed(word):
	if '\uAC00' <= ch <= '\uD7A3':
	return ch
	return ''

	def _batchim(syl):
	if not syl or not ('\uAC00' <= syl <= '\uD7A3'):
	return None
	return FINALS[(ord(syl) - HANGUL_BASE) % 28]

	def has_batchim(word):
	return _batchim(_last_hangul(word)) is not None

	def has_batchim_no_rieul(word):
	b = _batchim(_last_hangul(word))
	return b is not None and b != RIEUL


	class KoreanRuleEngine:

	# ── Particles ─────────────────────────────────────────────────────────────

	def get_topic_marker(self, noun):
	return '은' if has_batchim_no_rieul(noun) else '는'

	def get_subject_marker(self, noun):
	return '이' if has_batchim_no_rieul(noun) else '가'

	def get_object_marker(self, noun):
	return '을' if has_batchim(noun) else '를'

	def get_copula(self, noun):
	return '이에요' if has_batchim(noun) else '예요'

	def get_negative_marker(self, noun):
	"""Full negative copula: '이 아니에요' or '가 아니에요'."""
	m = '이' if has_batchim_no_rieul(noun) else '가'
	return f'{m} 아니에요'

	def attach_topic_marker(self, noun):
	return noun + self.get_topic_marker(noun)

	def attach_subject_marker(self, noun):
	return noun + self.get_subject_marker(noun)

	def attach_object_marker(self, noun):
	return noun + self.get_object_marker(noun)

	def attach_copula(self, noun):
	return noun + self.get_copula(noun)

	def attach_negative_copula(self, noun):
	return noun + self.get_negative_marker(noun)

	# ── Indirect quotation ────────────────────────────────────────────────────

	def conjugate_indirect_quote(self, verb_stem, form, tense='present',
	is_adjective=False):
	"""
	form: statement \| command \| neg_command \|
	request_me \| request_other \| question \| suggestion
	tense: past \| present \| future
	is_adjective: True for adjectives/있다/없다 — uses plain +다고 in present
	"""
	s = verb_stem
	if form == 'statement':
	if tense == 'past':
	suffix = '았' if self._needs_a(s) else '었'
	return s + suffix + '다고'
	if tense == 'future':
	return (self._attach_batchim(s, 'ㄹ') + ' 거라고'
	if not has_batchim(s) else s + '을 거라고')
	# present
	if is_adjective:
	return s + '다고' # adjective/있다/없다: stem+다고
	return (self._attach_batchim(s, 'ㄴ') + '다고'
	if not has_batchim(s) else s + '는다고')
	if form == 'command':
	return s + ('라고' if not has_batchim(s) else '으라고')
	if form == 'neg_command':
	return s + '지 말라고'
	if form == 'request_me':
	return s + self._vowel(s) + ' 달라고'
	if form == 'request_other':
	return s + self._vowel(s) + ' 주라고'
	if form == 'question':
	return self._drop_rieul(s) + '냐고'
	if form == 'suggestion':
	return s + '자고'
	return s + '다고'

	def conjugate_regret(self, verb_stem, negative=False):
	if negative:
	return verb_stem + '지 말 걸 그랬다'
	if not has_batchim(verb_stem):
	return self._attach_batchim(verb_stem, 'ㄹ') + ' 걸 그랬다'
	return verb_stem + '을 걸 그랬다'

	# ── Validation ────────────────────────────────────────────────────────────

	def validate_token_order(self, submitted, correct):
	return list(submitted) == list(correct)

	def validate_particle(self, word, chosen, ptype):
	fn = {
	'topic': self.get_topic_marker,
	'subject': self.get_subject_marker,
	'object': self.get_object_marker,
	'copula': self.get_copula,
	'negative': self.get_negative_marker,
	}.get(ptype)
	return fn is not None and chosen == fn(word)

	def get_hint(self, word, ptype):
	syl = _last_hangul(word)
	b = _batchim(syl) if syl else None
	desc = f"ends with consonant '{b}'" if b else "ends with a vowel sound"
	ans = {
	'topic': '은' if (b and b != RIEUL) else '는',
	'subject': '이' if (b and b != RIEUL) else '가',
	'object': '을' if b else '를',
	'copula': '이에요' if b else '예요',
	'negative': ('이 아니에요' if (b and b != RIEUL) else '가 아니에요'),
	}.get(ptype, '?')
	return f"'{word}' {desc} → use {ans}"

	# ── Internal ──────────────────────────────────────────────────────────────

	def _needs_a(self, stem):
	syl = _last_hangul(stem)
	if not syl:
	return False
	medial = ((ord(syl) - HANGUL_BASE) // 28) % 21
	return medial in (0, 8)

	def _vowel(self, stem):
	return '아' if self._needs_a(stem) else '어'

	def _drop_rieul(self, stem):
	syl = _last_hangul(stem)
	if syl and _batchim(syl) == RIEUL:
	code = ord(syl) - HANGUL_BASE
	initial = code // INITIAL_COUNT
	medial = (code % INITIAL_COUNT) // 28
	return stem[:-1] + chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28)
	return stem

	def _attach_batchim(self, stem, jamo):
	"""
	Attach a jamo final consonant to the last syllable of stem.
	e.g. '가' + 'ㄴ' → '간', '가' + 'ㄹ' → '갈'
	Only works when last syllable has no existing batchim.
	"""
	FINAL_IDX = {f: i for i, f in enumerate(FINALS) if f}
	idx = FINAL_IDX.get(jamo)
	if idx is None:
	return stem + jamo # fallback: just concatenate
	syl = _last_hangul(stem)
	if not syl or _batchim(syl) is not None:
	return stem + jamo # already has batchim
	code = ord(syl) - HANGUL_BASE
	initial = code // INITIAL_COUNT
	medial = (code % INITIAL_COUNT) // 28
	new_syl = chr(HANGUL_BASE + initial * INITIAL_COUNT + medial * 28 + idx)
	return stem[:-1] + new_syl


	rule_engine = KoreanRuleEngine()