""" QuestionGenerator — Generates Korean grammar questions using rule engine + Gemini. Produces standardized payloads consumed by Unity XR client. """ import json import uuid import random import logging from typing import Optional from korean_rules import rule_engine from content_pack import get_active_pack, get_nouns, get_pronouns, get_verbs, get_adjectives logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Question type → grammar rule mapping # --------------------------------------------------------------------------- QUESTION_TYPES = [ "topic_marker", "copula", "negative_copula", "scrabble", "indirect_quote_dago", "indirect_quote_commands", "indirect_quote_questions", "indirect_quote_suggestions", "regret_expression", ] # Question types that map to grammar rules for mastery tracking QTYPE_TO_RULE = { "topic_marker": "topic_marker", "copula": "copula", "negative_copula": "negative_copula", "scrabble": "topic_marker", # scrabble covers basic sentence structure "indirect_quote_dago": "indirect_quote_dago", "indirect_quote_commands": "indirect_quote_commands", "indirect_quote_questions": "indirect_quote_questions", "indirect_quote_suggestions": "indirect_quote_suggestions", "regret_expression": "regret_expression", } # Difficulty → available question types DIFFICULTY_TYPES = { 1: ["topic_marker", "copula", "negative_copula", "scrabble"], 2: ["topic_marker", "copula", "negative_copula", "scrabble", "indirect_quote_dago", "indirect_quote_commands"], 3: QUESTION_TYPES, } class QuestionGenerator: def __init__(self, gemini_client=None): self.client = gemini_client # ── Main Entry Point ───────────────────────────────────────────────────── def generate(self, difficulty: int = 1, grammar_rule: str = None, history: list = None, session_id: str = None) -> dict: """ Generate a question payload for Unity. Returns a standardized dict with all fields Unity needs. """ pack = get_active_pack() history = history or [] # Select question type q_type = grammar_rule if grammar_rule in QUESTION_TYPES else self._select_type(difficulty, history) try: if q_type == "topic_marker": return self._q_topic_marker(pack) elif q_type == "copula": return self._q_copula(pack) elif q_type == "negative_copula": return self._q_negative_copula(pack) elif q_type == "scrabble": return self._q_scrabble(pack, difficulty) elif q_type == "indirect_quote_dago": return self._q_indirect_dago(pack, difficulty) elif q_type == "indirect_quote_commands": return self._q_indirect_commands(pack) elif q_type == "indirect_quote_questions": return self._q_indirect_questions(pack) elif q_type == "indirect_quote_suggestions": return self._q_indirect_suggestions(pack) elif q_type == "regret_expression": return self._q_regret(pack) except Exception as e: logger.error(f"Question generation error for {q_type}: {e}") # Fallback to simplest question type return self._q_topic_marker(pack) # ── Type 1: Topic Marker Choose ───────────────────────────────────────── def _q_topic_marker(self, pack: dict) -> dict: noun_data = random.choice(get_nouns(pack) + get_pronouns(pack)) noun = noun_data["korean"] correct = rule_engine.get_topic_marker(noun) wrong = '는' if correct == '은' else '은' choices = [correct, wrong] random.shuffle(choices) return self._build_payload( question_type="topic_marker", interaction_mode="choose_select", prompt_korean=noun + "____", prompt_english=f"Choose the correct topic marker for: {noun_data['english']}", choices=choices, answer_key=correct, tokens=[noun, correct], correct_order=[0, 1], slot_count=1, translation=f"{noun_data['english']} (topic)", grammar_rule="topic_marker", hint=rule_engine.get_hint(noun, 'topic'), difficulty=1, metadata={"lesson": "KLP7-base", "word_tested": noun}, ) # ── Type 2: Copula Choose ──────────────────────────────────────────────── def _q_copula(self, pack: dict) -> dict: noun_data = random.choice(get_nouns(pack)) noun = noun_data["korean"] correct = rule_engine.get_copula(noun) wrong = '예요' if correct == '이에요' else '이에요' subject_data = random.choice(get_pronouns(pack)) subject = subject_data["korean"] topic = rule_engine.attach_topic_marker(subject) choices = [correct, wrong] random.shuffle(choices) return self._build_payload( question_type="copula", interaction_mode="choose_select", prompt_korean=f"{topic} {noun}____", prompt_english=f"{subject_data['english']} is a/an {noun_data['english']}", choices=choices, answer_key=correct, tokens=[topic, noun + correct], correct_order=[0, 1], slot_count=1, translation=f"{subject_data['english']} is a/an {noun_data['english']}", grammar_rule="copula", hint=rule_engine.get_hint(noun, 'copula'), difficulty=1, metadata={"lesson": "KLP7-base", "word_tested": noun}, ) # ── Type 3: Negative Copula ────────────────────────────────────────────── def _q_negative_copula(self, pack: dict) -> dict: noun_data = random.choice(get_nouns(pack)) noun = noun_data["korean"] correct_marker = rule_engine.get_subject_marker(noun) wrong_marker = '가' if correct_marker == '이' else '이' subject_data = random.choice(get_pronouns(pack)) subject = subject_data["korean"] topic = rule_engine.attach_topic_marker(subject) choices = [correct_marker, wrong_marker] random.shuffle(choices) return self._build_payload( question_type="negative_copula", interaction_mode="choose_select", prompt_korean=f"{topic} {noun}____ 아니에요", prompt_english=f"{subject_data['english']} is not a/an {noun_data['english']}", choices=choices, answer_key=correct_marker, tokens=[topic, noun + correct_marker + " 아니에요"], correct_order=[0, 1], slot_count=1, translation=f"{subject_data['english']} is not a/an {noun_data['english']}", grammar_rule="negative_copula", hint=rule_engine.get_hint(noun, 'negative'), difficulty=1, metadata={"lesson": "KLP7-base", "word_tested": noun}, ) # ── Type 4: Scrabble (Sentence Assembly) ───────────────────────────────── def _q_scrabble(self, pack: dict, difficulty: int = 1) -> dict: """ Build a shuffled token assembly question. For difficulty 1: simple [Subject+Topic] [Noun+Copula] For difficulty 2+: use Gemini to generate a more complex sentence """ if difficulty >= 2 and self.client: return self._q_scrabble_gemini(pack, difficulty) # Simple rule-based sentence subject_data = random.choice(get_pronouns(pack)) noun_data = random.choice(get_nouns(pack)) subject = subject_data["korean"] noun = noun_data["korean"] token_1 = rule_engine.attach_topic_marker(subject) token_2 = rule_engine.attach_copula(noun) tokens = [token_1, token_2] correct_order = [0, 1] shuffled_tokens = list(tokens) random.shuffle(shuffled_tokens) shuffled_indices = [tokens.index(t) for t in shuffled_tokens] return self._build_payload( question_type="scrabble", interaction_mode="assemble", prompt_korean="", prompt_english=f"{subject_data['english']} is a/an {noun_data['english']}", choices=[], answer_key=None, tokens=shuffled_tokens, correct_order=correct_order, slot_count=len(tokens), translation=f"{subject_data['english']} is a/an {noun_data['english']}", grammar_rule="topic_marker", hint=f"Topic comes first, then the noun with copula", difficulty=difficulty, metadata={ "lesson": "KLP7-base", "sentence": f"{token_1} {token_2}", "shuffled_indices": shuffled_indices, }, ) def _q_scrabble_gemini(self, pack: dict, difficulty: int) -> dict: """Use Gemini to generate a varied Korean sentence for assembly.""" try: vocab_sample = random.sample( [v["korean"] for v in pack["vocab"] if v["type"] in ("noun", "verb")], min(8, len(pack["vocab"])) ) prompt = f"""You are a Korean language teacher generating a sentence assembly exercise. Create a natural Korean sentence using words from this vocabulary: {', '.join(vocab_sample)} Difficulty level: {difficulty} (1=simple 2=intermediate 3=advanced) Rules: - Difficulty 2: 3-4 tokens, include at least one of: 은/는, 이에요/예요, 을/를 - Difficulty 3: 4-6 tokens, may include indirect quotation patterns like -다고, -자고, -냐고 Return ONLY valid JSON, no markdown: {{ "sentence": "complete Korean sentence", "tokens": ["token1", "token2", "token3"], "correct_order": [0, 1, 2], "translation": "English translation", "grammar_focus": "what grammar point this tests" }} The tokens array must be in shuffled order (not the correct order). correct_order must be indices into the tokens array giving the right sequence.""" response = self.client.models.generate_content( model="gemini-2.5-flash", contents=prompt, ) text = response.text.strip() if text.startswith("```"): text = text.split("```")[1] if text.startswith("json"): text = text[4:] data = json.loads(text) return self._build_payload( question_type="scrabble", interaction_mode="assemble", prompt_korean="", prompt_english=data.get("translation", ""), choices=[], answer_key=None, tokens=data.get("tokens", []), correct_order=data.get("correct_order", []), slot_count=len(data.get("tokens", [])), translation=data.get("translation", ""), grammar_rule=QTYPE_TO_RULE.get("scrabble", "topic_marker"), hint=f"Focus on: {data.get('grammar_focus', 'sentence structure')}", difficulty=difficulty, metadata={"lesson": "KLP7-10", "sentence": data.get("sentence", ""), "source": "gemini"}, ) except Exception as e: logger.warning(f"Gemini scrabble failed, falling back to rule-based: {e}") return self._q_scrabble(pack, difficulty=1) # ── Type 5: Indirect Quote -다고 ───────────────────────────────────────── def _q_indirect_dago(self, pack: dict, difficulty: int = 2) -> dict: """Generate a -다고 indirect quotation question.""" if self.client: return self._q_indirect_gemini( pack, "indirect_quote_dago", system="""Generate a -다고 indirect quotation exercise. Return JSON: { "direct_speech": "original Korean sentence", "speaker": "Korean name (e.g. 민호, 지수, 현민)", "indirect_speech": "correctly converted indirect speech", "tokens": ["token1", "token2", ...], "correct_order": [0, 1, ...], "translation": "English translation of indirect speech", "tense": "past/present/future" } Use patterns: verb+ㄴ/는다고, adjective+다고, past+었/았다고, future+ㄹ 거라고 Tokens should be 3-5 elements in shuffled order.""" ) # Fallback: static example return self._build_payload( question_type="indirect_quote_dago", interaction_mode="assemble", prompt_korean='민호: "감기 때문에 많이 아파요"', prompt_english="Minho says he is very sick because of a cold", choices=[], answer_key=None, tokens=["많이", "민호가", "아프다고", "감기 때문에", "했어요"], correct_order=[1, 3, 0, 2, 4], slot_count=5, translation="Minho said he is very sick because of a cold", grammar_rule="indirect_quote_dago", hint="V/Adj + 다고 하다 for statements", difficulty=2, metadata={"lesson": "KLP7-8", "form": "adjective_present"}, ) # ── Type 6: Indirect Quote Commands ───────────────────────────────────── def _q_indirect_commands(self, pack: dict) -> dict: if self.client: return self._q_indirect_gemini( pack, "indirect_quote_commands", system="""Generate a command indirect quotation exercise using one of: -(으)라고 (command), -지 말라고 (negative command), -달라고 (request for self), -주라고 (request for other). Return JSON: { "direct_speech": "original Korean sentence with command/request", "speaker": "Korean name", "listener": "Korean name", "form": "command|neg_command|request_me|request_other", "indirect_speech": "correctly converted indirect speech", "tokens": ["token1", ...], "correct_order": [0, ...], "translation": "English translation" } Tokens 3-5 elements, shuffled.""" ) return self._build_payload( question_type="indirect_quote_commands", interaction_mode="assemble", prompt_korean='의사: "약을 먹으세요"', prompt_english="The doctor said to take medicine", choices=[], answer_key=None, tokens=["했어요", "의사가", "약을", "먹으라고"], correct_order=[1, 2, 3, 0], slot_count=4, translation="The doctor said to take medicine", grammar_rule="indirect_quote_commands", hint="V + (으)라고 for commands", difficulty=2, metadata={"lesson": "KLP7-9", "form": "command"}, ) # ── Type 7: Indirect Quote Questions ──────────────────────────────────── def _q_indirect_questions(self, pack: dict) -> dict: if self.client: return self._q_indirect_gemini( pack, "indirect_quote_questions", system="""Generate a question indirect quotation exercise using -냐고 or -느냐고. Return JSON: { "direct_speech": "original Korean question", "speaker": "Korean name", "indirect_speech": "correctly converted indirect speech", "tokens": ["token1", ...], "correct_order": [0, ...], "translation": "English translation" } Remember: drop ㄹ from stem before 냐고. Tokens 3-5 elements, shuffled.""" ) return self._build_payload( question_type="indirect_quote_questions", interaction_mode="assemble", prompt_korean='현민: "사는 곳이 어디예요?"', prompt_english="Hyunmin asked where you live", choices=[], answer_key=None, tokens=["현민이", "사는 곳이", "물어봤어요", "어디냐고"], correct_order=[0, 1, 3, 2], slot_count=4, translation="Hyunmin asked where you live", grammar_rule="indirect_quote_questions", hint="V/Adj + 냐고 for questions (drop ㄹ)", difficulty=2, metadata={"lesson": "KLP7-10", "form": "question"}, ) # ── Type 8: Indirect Quote Suggestions ────────────────────────────────── def _q_indirect_suggestions(self, pack: dict) -> dict: if self.client: return self._q_indirect_gemini( pack, "indirect_quote_suggestions", system="""Generate a suggestion indirect quotation exercise using -자고. Return JSON: { "direct_speech": "original Korean suggestion", "speaker": "Korean name", "indirect_speech": "correctly converted indirect speech", "tokens": ["token1", ...], "correct_order": [0, ...], "translation": "English translation" } Pattern: V + 자고 하다. Tokens 3-5 elements, shuffled.""" ) return self._build_payload( question_type="indirect_quote_suggestions", interaction_mode="assemble", prompt_korean='친구: "같이 밥 먹자"', prompt_english="My friend suggested eating together", choices=[], answer_key=None, tokens=["친구가", "밥 먹자고", "같이", "했어요"], correct_order=[0, 2, 1, 3], slot_count=4, translation="My friend suggested eating together", grammar_rule="indirect_quote_suggestions", hint="V + 자고 for suggestions", difficulty=2, metadata={"lesson": "KLP7-10", "form": "suggestion"}, ) # ── Type 9: Regret Expression ──────────────────────────────────────────── def _q_regret(self, pack: dict) -> dict: if self.client: return self._q_indirect_gemini( pack, "regret_expression", system="""Generate a Korean regret expression exercise using -(으)ㄹ 걸 그랬다 or -지 말 걸 그랬다. Return JSON: { "situation": "brief situation description in English", "sentence": "Korean regret sentence", "tokens": ["token1", ...], "correct_order": [0, ...], "translation": "English translation (I should have...)", "negative": false } Tokens 3-5 elements, shuffled. Use realistic daily life situations from the KLP7 lessons.""" ) return self._build_payload( question_type="regret_expression", interaction_mode="assemble", prompt_korean="You were late to class because you overslept.", prompt_english="I should have gotten up earlier", choices=[], answer_key=None, tokens=["더", "일어날 걸", "일찍"], correct_order=[0, 2, 1], slot_count=3, translation="I should have gotten up earlier", grammar_rule="regret_expression", hint="Verb stem + (으)ㄹ 걸 그랬다", difficulty=2, metadata={"lesson": "KLP7-10", "negative": False}, ) # ── Gemini Generic Indirect Quote Helper ───────────────────────────────── def _q_indirect_gemini(self, pack: dict, q_type: str, system: str) -> dict: """Generic Gemini-powered indirect quote generator.""" vocab_sample = random.sample( [f"{v['korean']} ({v['english']})" for v in pack["vocab"] if v["type"] in ("noun", "verb", "adjective")], min(10, len(pack["vocab"])) ) prompt = f"""{system} Use vocabulary from this list where natural: {', '.join(vocab_sample)} Return ONLY valid JSON, no markdown backticks.""" try: response = self.client.models.generate_content( model="gemini-2.5-flash", contents=prompt, ) text = response.text.strip() if "```" in text: text = text.split("```")[1] if text.startswith("json"): text = text[4:] data = json.loads(text.strip()) tokens = data.get("tokens", []) correct_order = data.get("correct_order", list(range(len(tokens)))) translation = data.get("translation", "") indirect = data.get("indirect_speech", data.get("sentence", "")) grammar_rule = QTYPE_TO_RULE.get(q_type, q_type) return self._build_payload( question_type=q_type, interaction_mode="assemble", prompt_korean=data.get("direct_speech", ""), prompt_english=data.get("situation", translation), choices=[], answer_key=None, tokens=tokens, correct_order=correct_order, slot_count=len(tokens), translation=translation, grammar_rule=grammar_rule, hint=self._get_hint_for_type(q_type), difficulty=2, metadata={ "lesson": "KLP7-10", "indirect_speech": indirect, "source": "gemini", **{k: v for k, v in data.items() if k not in ("tokens", "correct_order", "translation")}, }, ) except Exception as e: logger.error(f"Gemini indirect quote failed for {q_type}: {e}") raise # ── Helpers ─────────────────────────────────────────────────────────────── def _select_type(self, difficulty: int, history: list) -> str: available = DIFFICULTY_TYPES.get(difficulty, DIFFICULTY_TYPES[1]) recent = [h.get("question_type") for h in history[-3:]] # Avoid repeating the same type 3 times in a row choices = [t for t in available if t not in recent] or available return random.choice(choices) def _get_hint_for_type(self, q_type: str) -> str: hints = { "indirect_quote_dago": "V+ㄴ/는다고, Adj+다고, Past+었/았다고, Future+ㄹ 거라고", "indirect_quote_commands": "(으)라고 commands, 지 말라고 negatives, 달라고/주라고 requests", "indirect_quote_questions": "V/Adj + 냐고 (remember to drop ㄹ from stem)", "indirect_quote_suggestions": "V + 자고 for suggestions", "regret_expression": "(으)ㄹ 걸 그랬다 = should have done; 지 말 걸 = should not have done", } return hints.get(q_type, "Check the grammar rule pattern") def _build_payload(self, **kwargs) -> dict: """Build the standardized question payload sent to Unity.""" return { "question_id": str(uuid.uuid4()), "question_type": kwargs.get("question_type"), "interaction_mode": kwargs.get("interaction_mode"), "prompt_korean": kwargs.get("prompt_korean", ""), "prompt_english": kwargs.get("prompt_english", ""), "tokens": kwargs.get("tokens", []), "correct_order": kwargs.get("correct_order", []), "slot_count": kwargs.get("slot_count", 0), "choices": kwargs.get("choices", []), "answer_key": kwargs.get("answer_key"), "translation": kwargs.get("translation", ""), "grammar_rule": kwargs.get("grammar_rule"), "hint": kwargs.get("hint", ""), "difficulty": kwargs.get("difficulty", 1), "metadata": kwargs.get("metadata", {}), }