Spaces:
Running
Running
| """ | |
| QuestionGenerator — Generates Korean grammar questions using rule engine + Gemini. | |
| Produces standardized payloads consumed by Unity XR client. | |
| """ | |
| import json | |
| import uuid | |
| import random | |
| import logging | |
| from typing import Optional | |
| from korean_rules import rule_engine | |
| from content_pack import get_active_pack, get_nouns, get_pronouns, get_verbs, get_adjectives | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Question type → grammar rule mapping | |
| # --------------------------------------------------------------------------- | |
| QUESTION_TYPES = [ | |
| "topic_marker", | |
| "copula", | |
| "negative_copula", | |
| "scrabble", | |
| "indirect_quote_dago", | |
| "indirect_quote_commands", | |
| "indirect_quote_questions", | |
| "indirect_quote_suggestions", | |
| "regret_expression", | |
| ] | |
| # Question types that map to grammar rules for mastery tracking | |
| QTYPE_TO_RULE = { | |
| "topic_marker": "topic_marker", | |
| "copula": "copula", | |
| "negative_copula": "negative_copula", | |
| "scrabble": "topic_marker", # scrabble covers basic sentence structure | |
| "indirect_quote_dago": "indirect_quote_dago", | |
| "indirect_quote_commands": "indirect_quote_commands", | |
| "indirect_quote_questions": "indirect_quote_questions", | |
| "indirect_quote_suggestions": "indirect_quote_suggestions", | |
| "regret_expression": "regret_expression", | |
| } | |
| # Difficulty → available question types | |
| DIFFICULTY_TYPES = { | |
| 1: ["topic_marker", "copula", "negative_copula", "scrabble"], | |
| 2: ["topic_marker", "copula", "negative_copula", "scrabble", | |
| "indirect_quote_dago", "indirect_quote_commands"], | |
| 3: QUESTION_TYPES, | |
| } | |
| class QuestionGenerator: | |
| def __init__(self, gemini_client=None): | |
| self.client = gemini_client | |
| # ── Main Entry Point ───────────────────────────────────────────────────── | |
| def generate(self, difficulty: int = 1, grammar_rule: str = None, | |
| history: list = None, session_id: str = None) -> dict: | |
| """ | |
| Generate a question payload for Unity. | |
| Returns a standardized dict with all fields Unity needs. | |
| """ | |
| pack = get_active_pack() | |
| history = history or [] | |
| # Select question type | |
| q_type = grammar_rule if grammar_rule in QUESTION_TYPES else self._select_type(difficulty, history) | |
| try: | |
| if q_type == "topic_marker": | |
| return self._q_topic_marker(pack) | |
| elif q_type == "copula": | |
| return self._q_copula(pack) | |
| elif q_type == "negative_copula": | |
| return self._q_negative_copula(pack) | |
| elif q_type == "scrabble": | |
| return self._q_scrabble(pack, difficulty) | |
| elif q_type == "indirect_quote_dago": | |
| return self._q_indirect_dago(pack, difficulty) | |
| elif q_type == "indirect_quote_commands": | |
| return self._q_indirect_commands(pack) | |
| elif q_type == "indirect_quote_questions": | |
| return self._q_indirect_questions(pack) | |
| elif q_type == "indirect_quote_suggestions": | |
| return self._q_indirect_suggestions(pack) | |
| elif q_type == "regret_expression": | |
| return self._q_regret(pack) | |
| except Exception as e: | |
| logger.error(f"Question generation error for {q_type}: {e}") | |
| # Fallback to simplest question type | |
| return self._q_topic_marker(pack) | |
| # ── Type 1: Topic Marker Choose ───────────────────────────────────────── | |
| def _q_topic_marker(self, pack: dict) -> dict: | |
| noun_data = random.choice(get_nouns(pack) + get_pronouns(pack)) | |
| noun = noun_data["korean"] | |
| correct = rule_engine.get_topic_marker(noun) | |
| wrong = '는' if correct == '은' else '은' | |
| choices = [correct, wrong] | |
| random.shuffle(choices) | |
| return self._build_payload( | |
| question_type="topic_marker", | |
| interaction_mode="choose_select", | |
| prompt_korean=noun + "____", | |
| prompt_english=f"Choose the correct topic marker for: {noun_data['english']}", | |
| choices=choices, | |
| answer_key=correct, | |
| tokens=[noun, correct], | |
| correct_order=[0, 1], | |
| slot_count=1, | |
| translation=f"{noun_data['english']} (topic)", | |
| grammar_rule="topic_marker", | |
| hint=rule_engine.get_hint(noun, 'topic'), | |
| difficulty=1, | |
| metadata={"lesson": "KLP7-base", "word_tested": noun}, | |
| ) | |
| # ── Type 2: Copula Choose ──────────────────────────────────────────────── | |
| def _q_copula(self, pack: dict) -> dict: | |
| noun_data = random.choice(get_nouns(pack)) | |
| noun = noun_data["korean"] | |
| correct = rule_engine.get_copula(noun) | |
| wrong = '예요' if correct == '이에요' else '이에요' | |
| subject_data = random.choice(get_pronouns(pack)) | |
| subject = subject_data["korean"] | |
| topic = rule_engine.attach_topic_marker(subject) | |
| choices = [correct, wrong] | |
| random.shuffle(choices) | |
| return self._build_payload( | |
| question_type="copula", | |
| interaction_mode="choose_select", | |
| prompt_korean=f"{topic} {noun}____", | |
| prompt_english=f"{subject_data['english']} is a/an {noun_data['english']}", | |
| choices=choices, | |
| answer_key=correct, | |
| tokens=[topic, noun + correct], | |
| correct_order=[0, 1], | |
| slot_count=1, | |
| translation=f"{subject_data['english']} is a/an {noun_data['english']}", | |
| grammar_rule="copula", | |
| hint=rule_engine.get_hint(noun, 'copula'), | |
| difficulty=1, | |
| metadata={"lesson": "KLP7-base", "word_tested": noun}, | |
| ) | |
| # ── Type 3: Negative Copula ────────────────────────────────────────────── | |
| def _q_negative_copula(self, pack: dict) -> dict: | |
| noun_data = random.choice(get_nouns(pack)) | |
| noun = noun_data["korean"] | |
| correct_marker = rule_engine.get_subject_marker(noun) | |
| wrong_marker = '가' if correct_marker == '이' else '이' | |
| subject_data = random.choice(get_pronouns(pack)) | |
| subject = subject_data["korean"] | |
| topic = rule_engine.attach_topic_marker(subject) | |
| choices = [correct_marker, wrong_marker] | |
| random.shuffle(choices) | |
| return self._build_payload( | |
| question_type="negative_copula", | |
| interaction_mode="choose_select", | |
| prompt_korean=f"{topic} {noun}____ 아니에요", | |
| prompt_english=f"{subject_data['english']} is not a/an {noun_data['english']}", | |
| choices=choices, | |
| answer_key=correct_marker, | |
| tokens=[topic, noun + correct_marker + " 아니에요"], | |
| correct_order=[0, 1], | |
| slot_count=1, | |
| translation=f"{subject_data['english']} is not a/an {noun_data['english']}", | |
| grammar_rule="negative_copula", | |
| hint=rule_engine.get_hint(noun, 'negative'), | |
| difficulty=1, | |
| metadata={"lesson": "KLP7-base", "word_tested": noun}, | |
| ) | |
| # ── Type 4: Scrabble (Sentence Assembly) ───────────────────────────────── | |
| def _q_scrabble(self, pack: dict, difficulty: int = 1) -> dict: | |
| """ | |
| Build a shuffled token assembly question. | |
| For difficulty 1: simple [Subject+Topic] [Noun+Copula] | |
| For difficulty 2+: use Gemini to generate a more complex sentence | |
| """ | |
| if difficulty >= 2 and self.client: | |
| return self._q_scrabble_gemini(pack, difficulty) | |
| # Simple rule-based sentence | |
| subject_data = random.choice(get_pronouns(pack)) | |
| noun_data = random.choice(get_nouns(pack)) | |
| subject = subject_data["korean"] | |
| noun = noun_data["korean"] | |
| token_1 = rule_engine.attach_topic_marker(subject) | |
| token_2 = rule_engine.attach_copula(noun) | |
| tokens = [token_1, token_2] | |
| correct_order = [0, 1] | |
| shuffled_tokens = list(tokens) | |
| random.shuffle(shuffled_tokens) | |
| shuffled_indices = [tokens.index(t) for t in shuffled_tokens] | |
| return self._build_payload( | |
| question_type="scrabble", | |
| interaction_mode="assemble", | |
| prompt_korean="", | |
| prompt_english=f"{subject_data['english']} is a/an {noun_data['english']}", | |
| choices=[], | |
| answer_key=None, | |
| tokens=shuffled_tokens, | |
| correct_order=correct_order, | |
| slot_count=len(tokens), | |
| translation=f"{subject_data['english']} is a/an {noun_data['english']}", | |
| grammar_rule="topic_marker", | |
| hint=f"Topic comes first, then the noun with copula", | |
| difficulty=difficulty, | |
| metadata={ | |
| "lesson": "KLP7-base", | |
| "sentence": f"{token_1} {token_2}", | |
| "shuffled_indices": shuffled_indices, | |
| }, | |
| ) | |
| def _q_scrabble_gemini(self, pack: dict, difficulty: int) -> dict: | |
| """Use Gemini to generate a varied Korean sentence for assembly.""" | |
| try: | |
| vocab_sample = random.sample( | |
| [v["korean"] for v in pack["vocab"] if v["type"] in ("noun", "verb")], | |
| min(8, len(pack["vocab"])) | |
| ) | |
| prompt = f"""You are a Korean language teacher generating a sentence assembly exercise. | |
| Create a natural Korean sentence using words from this vocabulary: | |
| {', '.join(vocab_sample)} | |
| Difficulty level: {difficulty} (1=simple 2=intermediate 3=advanced) | |
| Rules: | |
| - Difficulty 2: 3-4 tokens, include at least one of: 은/는, 이에요/예요, 을/를 | |
| - Difficulty 3: 4-6 tokens, may include indirect quotation patterns like -다고, -자고, -냐고 | |
| Return ONLY valid JSON, no markdown: | |
| {{ | |
| "sentence": "complete Korean sentence", | |
| "tokens": ["token1", "token2", "token3"], | |
| "correct_order": [0, 1, 2], | |
| "translation": "English translation", | |
| "grammar_focus": "what grammar point this tests" | |
| }} | |
| The tokens array must be in shuffled order (not the correct order). | |
| correct_order must be indices into the tokens array giving the right sequence.""" | |
| response = self.client.models.generate_content( | |
| model="gemini-2.5-flash", | |
| contents=prompt, | |
| ) | |
| text = response.text.strip() | |
| if text.startswith("```"): | |
| text = text.split("```")[1] | |
| if text.startswith("json"): | |
| text = text[4:] | |
| data = json.loads(text) | |
| return self._build_payload( | |
| question_type="scrabble", | |
| interaction_mode="assemble", | |
| prompt_korean="", | |
| prompt_english=data.get("translation", ""), | |
| choices=[], | |
| answer_key=None, | |
| tokens=data.get("tokens", []), | |
| correct_order=data.get("correct_order", []), | |
| slot_count=len(data.get("tokens", [])), | |
| translation=data.get("translation", ""), | |
| grammar_rule=QTYPE_TO_RULE.get("scrabble", "topic_marker"), | |
| hint=f"Focus on: {data.get('grammar_focus', 'sentence structure')}", | |
| difficulty=difficulty, | |
| metadata={"lesson": "KLP7-10", "sentence": data.get("sentence", ""), "source": "gemini"}, | |
| ) | |
| except Exception as e: | |
| logger.warning(f"Gemini scrabble failed, falling back to rule-based: {e}") | |
| return self._q_scrabble(pack, difficulty=1) | |
| # ── Type 5: Indirect Quote -다고 ───────────────────────────────────────── | |
| def _q_indirect_dago(self, pack: dict, difficulty: int = 2) -> dict: | |
| """Generate a -다고 indirect quotation question.""" | |
| if self.client: | |
| return self._q_indirect_gemini( | |
| pack, "indirect_quote_dago", | |
| system="""Generate a -다고 indirect quotation exercise. | |
| Return JSON: | |
| { | |
| "direct_speech": "original Korean sentence", | |
| "speaker": "Korean name (e.g. 민호, 지수, 현민)", | |
| "indirect_speech": "correctly converted indirect speech", | |
| "tokens": ["token1", "token2", ...], | |
| "correct_order": [0, 1, ...], | |
| "translation": "English translation of indirect speech", | |
| "tense": "past/present/future" | |
| } | |
| Use patterns: verb+ㄴ/는다고, adjective+다고, past+었/았다고, future+ㄹ 거라고 | |
| Tokens should be 3-5 elements in shuffled order.""" | |
| ) | |
| # Fallback: static example | |
| return self._build_payload( | |
| question_type="indirect_quote_dago", | |
| interaction_mode="assemble", | |
| prompt_korean='민호: "감기 때문에 많이 아파요"', | |
| prompt_english="Minho says he is very sick because of a cold", | |
| choices=[], | |
| answer_key=None, | |
| tokens=["많이", "민호가", "아프다고", "감기 때문에", "했어요"], | |
| correct_order=[1, 3, 0, 2, 4], | |
| slot_count=5, | |
| translation="Minho said he is very sick because of a cold", | |
| grammar_rule="indirect_quote_dago", | |
| hint="V/Adj + 다고 하다 for statements", | |
| difficulty=2, | |
| metadata={"lesson": "KLP7-8", "form": "adjective_present"}, | |
| ) | |
| # ── Type 6: Indirect Quote Commands ───────────────────────────────────── | |
| def _q_indirect_commands(self, pack: dict) -> dict: | |
| if self.client: | |
| return self._q_indirect_gemini( | |
| pack, "indirect_quote_commands", | |
| system="""Generate a command indirect quotation exercise using one of: | |
| -(으)라고 (command), -지 말라고 (negative command), -달라고 (request for self), -주라고 (request for other). | |
| Return JSON: | |
| { | |
| "direct_speech": "original Korean sentence with command/request", | |
| "speaker": "Korean name", | |
| "listener": "Korean name", | |
| "form": "command|neg_command|request_me|request_other", | |
| "indirect_speech": "correctly converted indirect speech", | |
| "tokens": ["token1", ...], | |
| "correct_order": [0, ...], | |
| "translation": "English translation" | |
| } | |
| Tokens 3-5 elements, shuffled.""" | |
| ) | |
| return self._build_payload( | |
| question_type="indirect_quote_commands", | |
| interaction_mode="assemble", | |
| prompt_korean='의사: "약을 먹으세요"', | |
| prompt_english="The doctor said to take medicine", | |
| choices=[], | |
| answer_key=None, | |
| tokens=["했어요", "의사가", "약을", "먹으라고"], | |
| correct_order=[1, 2, 3, 0], | |
| slot_count=4, | |
| translation="The doctor said to take medicine", | |
| grammar_rule="indirect_quote_commands", | |
| hint="V + (으)라고 for commands", | |
| difficulty=2, | |
| metadata={"lesson": "KLP7-9", "form": "command"}, | |
| ) | |
| # ── Type 7: Indirect Quote Questions ──────────────────────────────────── | |
| def _q_indirect_questions(self, pack: dict) -> dict: | |
| if self.client: | |
| return self._q_indirect_gemini( | |
| pack, "indirect_quote_questions", | |
| system="""Generate a question indirect quotation exercise using -냐고 or -느냐고. | |
| Return JSON: | |
| { | |
| "direct_speech": "original Korean question", | |
| "speaker": "Korean name", | |
| "indirect_speech": "correctly converted indirect speech", | |
| "tokens": ["token1", ...], | |
| "correct_order": [0, ...], | |
| "translation": "English translation" | |
| } | |
| Remember: drop ㄹ from stem before 냐고. | |
| Tokens 3-5 elements, shuffled.""" | |
| ) | |
| return self._build_payload( | |
| question_type="indirect_quote_questions", | |
| interaction_mode="assemble", | |
| prompt_korean='현민: "사는 곳이 어디예요?"', | |
| prompt_english="Hyunmin asked where you live", | |
| choices=[], | |
| answer_key=None, | |
| tokens=["현민이", "사는 곳이", "물어봤어요", "어디냐고"], | |
| correct_order=[0, 1, 3, 2], | |
| slot_count=4, | |
| translation="Hyunmin asked where you live", | |
| grammar_rule="indirect_quote_questions", | |
| hint="V/Adj + 냐고 for questions (drop ㄹ)", | |
| difficulty=2, | |
| metadata={"lesson": "KLP7-10", "form": "question"}, | |
| ) | |
| # ── Type 8: Indirect Quote Suggestions ────────────────────────────────── | |
| def _q_indirect_suggestions(self, pack: dict) -> dict: | |
| if self.client: | |
| return self._q_indirect_gemini( | |
| pack, "indirect_quote_suggestions", | |
| system="""Generate a suggestion indirect quotation exercise using -자고. | |
| Return JSON: | |
| { | |
| "direct_speech": "original Korean suggestion", | |
| "speaker": "Korean name", | |
| "indirect_speech": "correctly converted indirect speech", | |
| "tokens": ["token1", ...], | |
| "correct_order": [0, ...], | |
| "translation": "English translation" | |
| } | |
| Pattern: V + 자고 하다. | |
| Tokens 3-5 elements, shuffled.""" | |
| ) | |
| return self._build_payload( | |
| question_type="indirect_quote_suggestions", | |
| interaction_mode="assemble", | |
| prompt_korean='친구: "같이 밥 먹자"', | |
| prompt_english="My friend suggested eating together", | |
| choices=[], | |
| answer_key=None, | |
| tokens=["친구가", "밥 먹자고", "같이", "했어요"], | |
| correct_order=[0, 2, 1, 3], | |
| slot_count=4, | |
| translation="My friend suggested eating together", | |
| grammar_rule="indirect_quote_suggestions", | |
| hint="V + 자고 for suggestions", | |
| difficulty=2, | |
| metadata={"lesson": "KLP7-10", "form": "suggestion"}, | |
| ) | |
| # ── Type 9: Regret Expression ──────────────────────────────────────────── | |
| def _q_regret(self, pack: dict) -> dict: | |
| if self.client: | |
| return self._q_indirect_gemini( | |
| pack, "regret_expression", | |
| system="""Generate a Korean regret expression exercise using -(으)ㄹ 걸 그랬다 or -지 말 걸 그랬다. | |
| Return JSON: | |
| { | |
| "situation": "brief situation description in English", | |
| "sentence": "Korean regret sentence", | |
| "tokens": ["token1", ...], | |
| "correct_order": [0, ...], | |
| "translation": "English translation (I should have...)", | |
| "negative": false | |
| } | |
| Tokens 3-5 elements, shuffled. | |
| Use realistic daily life situations from the KLP7 lessons.""" | |
| ) | |
| return self._build_payload( | |
| question_type="regret_expression", | |
| interaction_mode="assemble", | |
| prompt_korean="You were late to class because you overslept.", | |
| prompt_english="I should have gotten up earlier", | |
| choices=[], | |
| answer_key=None, | |
| tokens=["더", "일어날 걸", "일찍"], | |
| correct_order=[0, 2, 1], | |
| slot_count=3, | |
| translation="I should have gotten up earlier", | |
| grammar_rule="regret_expression", | |
| hint="Verb stem + (으)ㄹ 걸 그랬다", | |
| difficulty=2, | |
| metadata={"lesson": "KLP7-10", "negative": False}, | |
| ) | |
| # ── Gemini Generic Indirect Quote Helper ───────────────────────────────── | |
| def _q_indirect_gemini(self, pack: dict, q_type: str, system: str) -> dict: | |
| """Generic Gemini-powered indirect quote generator.""" | |
| vocab_sample = random.sample( | |
| [f"{v['korean']} ({v['english']})" for v in pack["vocab"] | |
| if v["type"] in ("noun", "verb", "adjective")], | |
| min(10, len(pack["vocab"])) | |
| ) | |
| prompt = f"""{system} | |
| Use vocabulary from this list where natural: | |
| {', '.join(vocab_sample)} | |
| Return ONLY valid JSON, no markdown backticks.""" | |
| try: | |
| response = self.client.models.generate_content( | |
| model="gemini-2.5-flash", | |
| contents=prompt, | |
| ) | |
| text = response.text.strip() | |
| if "```" in text: | |
| text = text.split("```")[1] | |
| if text.startswith("json"): | |
| text = text[4:] | |
| data = json.loads(text.strip()) | |
| tokens = data.get("tokens", []) | |
| correct_order = data.get("correct_order", list(range(len(tokens)))) | |
| translation = data.get("translation", "") | |
| indirect = data.get("indirect_speech", data.get("sentence", "")) | |
| grammar_rule = QTYPE_TO_RULE.get(q_type, q_type) | |
| return self._build_payload( | |
| question_type=q_type, | |
| interaction_mode="assemble", | |
| prompt_korean=data.get("direct_speech", ""), | |
| prompt_english=data.get("situation", translation), | |
| choices=[], | |
| answer_key=None, | |
| tokens=tokens, | |
| correct_order=correct_order, | |
| slot_count=len(tokens), | |
| translation=translation, | |
| grammar_rule=grammar_rule, | |
| hint=self._get_hint_for_type(q_type), | |
| difficulty=2, | |
| metadata={ | |
| "lesson": "KLP7-10", | |
| "indirect_speech": indirect, | |
| "source": "gemini", | |
| **{k: v for k, v in data.items() | |
| if k not in ("tokens", "correct_order", "translation")}, | |
| }, | |
| ) | |
| except Exception as e: | |
| logger.error(f"Gemini indirect quote failed for {q_type}: {e}") | |
| raise | |
| # ── Helpers ─────────────────────────────────────────────────────────────── | |
| def _select_type(self, difficulty: int, history: list) -> str: | |
| available = DIFFICULTY_TYPES.get(difficulty, DIFFICULTY_TYPES[1]) | |
| recent = [h.get("question_type") for h in history[-3:]] | |
| # Avoid repeating the same type 3 times in a row | |
| choices = [t for t in available if t not in recent] or available | |
| return random.choice(choices) | |
| def _get_hint_for_type(self, q_type: str) -> str: | |
| hints = { | |
| "indirect_quote_dago": "V+ㄴ/는다고, Adj+다고, Past+었/았다고, Future+ㄹ 거라고", | |
| "indirect_quote_commands": "(으)라고 commands, 지 말라고 negatives, 달라고/주라고 requests", | |
| "indirect_quote_questions": "V/Adj + 냐고 (remember to drop ㄹ from stem)", | |
| "indirect_quote_suggestions": "V + 자고 for suggestions", | |
| "regret_expression": "(으)ㄹ 걸 그랬다 = should have done; 지 말 걸 = should not have done", | |
| } | |
| return hints.get(q_type, "Check the grammar rule pattern") | |
| def _build_payload(self, **kwargs) -> dict: | |
| """Build the standardized question payload sent to Unity.""" | |
| return { | |
| "question_id": str(uuid.uuid4()), | |
| "question_type": kwargs.get("question_type"), | |
| "interaction_mode": kwargs.get("interaction_mode"), | |
| "prompt_korean": kwargs.get("prompt_korean", ""), | |
| "prompt_english": kwargs.get("prompt_english", ""), | |
| "tokens": kwargs.get("tokens", []), | |
| "correct_order": kwargs.get("correct_order", []), | |
| "slot_count": kwargs.get("slot_count", 0), | |
| "choices": kwargs.get("choices", []), | |
| "answer_key": kwargs.get("answer_key"), | |
| "translation": kwargs.get("translation", ""), | |
| "grammar_rule": kwargs.get("grammar_rule"), | |
| "hint": kwargs.get("hint", ""), | |
| "difficulty": kwargs.get("difficulty", 1), | |
| "metadata": kwargs.get("metadata", {}), | |
| } |