KoreAI-API

Sleeping

App Files Files Community

rairo commited on Mar 6

Commit

bdfea50

verified ·

1 Parent(s): 7ec4af5

Create question_generator.py

Browse files

Files changed (1) hide show

question_generator.py +591 -0

question_generator.py ADDED Viewed

	@@ -0,0 +1,591 @@

+"""
+QuestionGenerator — Generates Korean grammar questions using rule engine + Gemini.
+Produces standardized payloads consumed by Unity XR client.
+"""
+import json
+import uuid
+import random
+import logging
+from typing import Optional
+from korean_rules import rule_engine
+from content_pack import get_active_pack, get_nouns, get_pronouns, get_verbs, get_adjectives
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Question type → grammar rule mapping
+# ---------------------------------------------------------------------------
+QUESTION_TYPES = [
+    "topic_marker",
+    "copula",
+    "negative_copula",
+    "scrabble",
+    "indirect_quote_dago",
+    "indirect_quote_commands",
+    "indirect_quote_questions",
+    "indirect_quote_suggestions",
+    "regret_expression",
+]
+# Question types that map to grammar rules for mastery tracking
+QTYPE_TO_RULE = {
+    "topic_marker": "topic_marker",
+    "copula": "copula",
+    "negative_copula": "negative_copula",
+    "scrabble": "topic_marker",           # scrabble covers basic sentence structure
+    "indirect_quote_dago": "indirect_quote_dago",
+    "indirect_quote_commands": "indirect_quote_commands",
+    "indirect_quote_questions": "indirect_quote_questions",
+    "indirect_quote_suggestions": "indirect_quote_suggestions",
+    "regret_expression": "regret_expression",
+}
+# Difficulty → available question types
+DIFFICULTY_TYPES = {
+    1: ["topic_marker", "copula", "negative_copula", "scrabble"],
+    2: ["topic_marker", "copula", "negative_copula", "scrabble",
+        "indirect_quote_dago", "indirect_quote_commands"],
+    3: QUESTION_TYPES,
+}
+class QuestionGenerator:
+    def __init__(self, gemini_client=None):
+        self.client = gemini_client
+    # ── Main Entry Point ─────────────────────────────────────────────────────
+    def generate(self, difficulty: int = 1, grammar_rule: str = None,
+                 history: list = None, session_id: str = None) -> dict:
+        """
+        Generate a question payload for Unity.
+        Returns a standardized dict with all fields Unity needs.
+        """
+        pack = get_active_pack()
+        history = history or []
+        # Select question type
+        q_type = grammar_rule if grammar_rule in QUESTION_TYPES else self._select_type(difficulty, history)
+        try:
+            if q_type == "topic_marker":
+                return self._q_topic_marker(pack)
+            elif q_type == "copula":
+                return self._q_copula(pack)
+            elif q_type == "negative_copula":
+                return self._q_negative_copula(pack)
+            elif q_type == "scrabble":
+                return self._q_scrabble(pack, difficulty)
+            elif q_type == "indirect_quote_dago":
+                return self._q_indirect_dago(pack, difficulty)
+            elif q_type == "indirect_quote_commands":
+                return self._q_indirect_commands(pack)
+            elif q_type == "indirect_quote_questions":
+                return self._q_indirect_questions(pack)
+            elif q_type == "indirect_quote_suggestions":
+                return self._q_indirect_suggestions(pack)
+            elif q_type == "regret_expression":
+                return self._q_regret(pack)
+        except Exception as e:
+            logger.error(f"Question generation error for {q_type}: {e}")
+            # Fallback to simplest question type
+            return self._q_topic_marker(pack)
+    # ── Type 1: Topic Marker Choose ─────────────────────────────────────────
+    def _q_topic_marker(self, pack: dict) -> dict:
+        noun_data = random.choice(get_nouns(pack) + get_pronouns(pack))
+        noun = noun_data["korean"]
+        correct = rule_engine.get_topic_marker(noun)
+        wrong = '는' if correct == '은' else '은'
+        choices = [correct, wrong]
+        random.shuffle(choices)
+        return self._build_payload(
+            question_type="topic_marker",
+            interaction_mode="choose_select",
+            prompt_korean=noun + "____",
+            prompt_english=f"Choose the correct topic marker for: {noun_data['english']}",
+            choices=choices,
+            answer_key=correct,
+            tokens=[noun, correct],
+            correct_order=[0, 1],
+            slot_count=1,
+            translation=f"{noun_data['english']} (topic)",
+            grammar_rule="topic_marker",
+            hint=rule_engine.get_hint(noun, 'topic'),
+            difficulty=1,
+            metadata={"lesson": "KLP7-base", "word_tested": noun},
+        )
+    # ── Type 2: Copula Choose ────────────────────────────────────────────────
+    def _q_copula(self, pack: dict) -> dict:
+        noun_data = random.choice(get_nouns(pack))
+        noun = noun_data["korean"]
+        correct = rule_engine.get_copula(noun)
+        wrong = '예요' if correct == '이에요' else '이에요'
+        subject_data = random.choice(get_pronouns(pack))
+        subject = subject_data["korean"]
+        topic = rule_engine.attach_topic_marker(subject)
+        choices = [correct, wrong]
+        random.shuffle(choices)
+        return self._build_payload(
+            question_type="copula",
+            interaction_mode="choose_select",
+            prompt_korean=f"{topic} {noun}____",
+            prompt_english=f"{subject_data['english']} is a/an {noun_data['english']}",
+            choices=choices,
+            answer_key=correct,
+            tokens=[topic, noun + correct],
+            correct_order=[0, 1],
+            slot_count=1,
+            translation=f"{subject_data['english']} is a/an {noun_data['english']}",
+            grammar_rule="copula",
+            hint=rule_engine.get_hint(noun, 'copula'),
+            difficulty=1,
+            metadata={"lesson": "KLP7-base", "word_tested": noun},
+        )
+    # ── Type 3: Negative Copula ──────────────────────────────────────────────
+    def _q_negative_copula(self, pack: dict) -> dict:
+        noun_data = random.choice(get_nouns(pack))
+        noun = noun_data["korean"]
+        correct_marker = rule_engine.get_subject_marker(noun)
+        wrong_marker = '가' if correct_marker == '이' else '이'
+        subject_data = random.choice(get_pronouns(pack))
+        subject = subject_data["korean"]
+        topic = rule_engine.attach_topic_marker(subject)
+        choices = [correct_marker, wrong_marker]
+        random.shuffle(choices)
+        return self._build_payload(
+            question_type="negative_copula",
+            interaction_mode="choose_select",
+            prompt_korean=f"{topic} {noun}____ 아니에요",
+            prompt_english=f"{subject_data['english']} is not a/an {noun_data['english']}",
+            choices=choices,
+            answer_key=correct_marker,
+            tokens=[topic, noun + correct_marker + " 아니에요"],
+            correct_order=[0, 1],
+            slot_count=1,
+            translation=f"{subject_data['english']} is not a/an {noun_data['english']}",
+            grammar_rule="negative_copula",
+            hint=rule_engine.get_hint(noun, 'negative'),
+            difficulty=1,
+            metadata={"lesson": "KLP7-base", "word_tested": noun},
+        )
+    # ── Type 4: Scrabble (Sentence Assembly) ─────────────────────────────────
+    def _q_scrabble(self, pack: dict, difficulty: int = 1) -> dict:
+        """
+        Build a shuffled token assembly question.
+        For difficulty 1: simple [Subject+Topic] [Noun+Copula]
+        For difficulty 2+: use Gemini to generate a more complex sentence
+        """
+        if difficulty >= 2 and self.client:
+            return self._q_scrabble_gemini(pack, difficulty)
+        # Simple rule-based sentence
+        subject_data = random.choice(get_pronouns(pack))
+        noun_data = random.choice(get_nouns(pack))
+        subject = subject_data["korean"]
+        noun = noun_data["korean"]
+        token_1 = rule_engine.attach_topic_marker(subject)
+        token_2 = rule_engine.attach_copula(noun)
+        tokens = [token_1, token_2]
+        correct_order = [0, 1]
+        shuffled_tokens = list(tokens)
+        random.shuffle(shuffled_tokens)
+        shuffled_indices = [tokens.index(t) for t in shuffled_tokens]
+        return self._build_payload(
+            question_type="scrabble",
+            interaction_mode="assemble",
+            prompt_korean="",
+            prompt_english=f"{subject_data['english']} is a/an {noun_data['english']}",
+            choices=[],
+            answer_key=None,
+            tokens=shuffled_tokens,
+            correct_order=correct_order,
+            slot_count=len(tokens),
+            translation=f"{subject_data['english']} is a/an {noun_data['english']}",
+            grammar_rule="topic_marker",
+            hint=f"Topic comes first, then the noun with copula",
+            difficulty=difficulty,
+            metadata={
+                "lesson": "KLP7-base",
+                "sentence": f"{token_1} {token_2}",
+                "shuffled_indices": shuffled_indices,
+            },
+        )
+    def _q_scrabble_gemini(self, pack: dict, difficulty: int) -> dict:
+        """Use Gemini to generate a varied Korean sentence for assembly."""
+        try:
+            vocab_sample = random.sample(
+                [v["korean"] for v in pack["vocab"] if v["type"] in ("noun", "verb")],
+                min(8, len(pack["vocab"]))
+            )
+            prompt = f"""You are a Korean language teacher generating a sentence assembly exercise.
+Create a natural Korean sentence using words from this vocabulary:
+{', '.join(vocab_sample)}
+Difficulty level: {difficulty} (1=simple 2=intermediate 3=advanced)
+Rules:
+- Difficulty 2: 3-4 tokens, include at least one of: 은/는, 이에요/예요, 을/를
+- Difficulty 3: 4-6 tokens, may include indirect quotation patterns like -다고, -자고, -냐고
+Return ONLY valid JSON, no markdown:
+{{
+  "sentence": "complete Korean sentence",
+  "tokens": ["token1", "token2", "token3"],
+  "correct_order": [0, 1, 2],
+  "translation": "English translation",
+  "grammar_focus": "what grammar point this tests"
+}}
+The tokens array must be in shuffled order (not the correct order).
+correct_order must be indices into the tokens array giving the right sequence."""
+            response = self.client.models.generate_content(
+                model="gemini-2.0-flash",
+                contents=prompt,
+            )
+            text = response.text.strip()
+            if text.startswith("```"):
+                text = text.split("```")[1]
+                if text.startswith("json"):
+                    text = text[4:]
+            data = json.loads(text)
+            return self._build_payload(
+                question_type="scrabble",
+                interaction_mode="assemble",
+                prompt_korean="",
+                prompt_english=data.get("translation", ""),
+                choices=[],
+                answer_key=None,
+                tokens=data.get("tokens", []),
+                correct_order=data.get("correct_order", []),
+                slot_count=len(data.get("tokens", [])),
+                translation=data.get("translation", ""),
+                grammar_rule=QTYPE_TO_RULE.get("scrabble", "topic_marker"),
+                hint=f"Focus on: {data.get('grammar_focus', 'sentence structure')}",
+                difficulty=difficulty,
+                metadata={"lesson": "KLP7-10", "sentence": data.get("sentence", ""), "source": "gemini"},
+            )
+        except Exception as e:
+            logger.warning(f"Gemini scrabble failed, falling back to rule-based: {e}")
+            return self._q_scrabble(pack, difficulty=1)
+    # ── Type 5: Indirect Quote -다고 ─────────────────────────────────────────
+    def _q_indirect_dago(self, pack: dict, difficulty: int = 2) -> dict:
+        """Generate a -다고 indirect quotation question."""
+        if self.client:
+            return self._q_indirect_gemini(
+                pack, "indirect_quote_dago",
+                system="""Generate a -다고 indirect quotation exercise.
+Return JSON:
+{
+  "direct_speech": "original Korean sentence",
+  "speaker": "Korean name (e.g. 민호, 지수, 현민)",
+  "indirect_speech": "correctly converted indirect speech",
+  "tokens": ["token1", "token2", ...],
+  "correct_order": [0, 1, ...],
+  "translation": "English translation of indirect speech",
+  "tense": "past/present/future"
+}
+Use patterns: verb+ㄴ/는다고, adjective+다고, past+었/았다고, future+ㄹ 거라고
+Tokens should be 3-5 elements in shuffled order."""
+            )
+        # Fallback: static example
+        return self._build_payload(
+            question_type="indirect_quote_dago",
+            interaction_mode="assemble",
+            prompt_korean='민호: "감기 때문에 많이 아파요"',
+            prompt_english="Minho says he is very sick because of a cold",
+            choices=[],
+            answer_key=None,
+            tokens=["많이", "민호가", "아프다고", "감기 때문에", "했어요"],
+            correct_order=[1, 3, 0, 2, 4],
+            slot_count=5,
+            translation="Minho said he is very sick because of a cold",
+            grammar_rule="indirect_quote_dago",
+            hint="V/Adj + 다고 하다 for statements",
+            difficulty=2,
+            metadata={"lesson": "KLP7-8", "form": "adjective_present"},
+        )
+    # ── Type 6: Indirect Quote Commands ─────────────────────────────────────
+    def _q_indirect_commands(self, pack: dict) -> dict:
+        if self.client:
+            return self._q_indirect_gemini(
+                pack, "indirect_quote_commands",
+                system="""Generate a command indirect quotation exercise using one of:
+-(으)라고 (command), -지 말라고 (negative command), -달라고 (request for self), -주라고 (request for other).
+Return JSON:
+{
+  "direct_speech": "original Korean sentence with command/request",
+  "speaker": "Korean name",
+  "listener": "Korean name",
+  "form": "command|neg_command|request_me|request_other",
+  "indirect_speech": "correctly converted indirect speech",
+  "tokens": ["token1", ...],
+  "correct_order": [0, ...],
+  "translation": "English translation"
+}
+Tokens 3-5 elements, shuffled."""
+            )
+        return self._build_payload(
+            question_type="indirect_quote_commands",
+            interaction_mode="assemble",
+            prompt_korean='의사: "약을 먹으세요"',
+            prompt_english="The doctor said to take medicine",
+            choices=[],
+            answer_key=None,
+            tokens=["했어요", "의사가", "약을", "먹으라고"],
+            correct_order=[1, 2, 3, 0],
+            slot_count=4,
+            translation="The doctor said to take medicine",
+            grammar_rule="indirect_quote_commands",
+            hint="V + (으)라고 for commands",
+            difficulty=2,
+            metadata={"lesson": "KLP7-9", "form": "command"},
+        )
+    # ── Type 7: Indirect Quote Questions ────────────────────────────────────
+    def _q_indirect_questions(self, pack: dict) -> dict:
+        if self.client:
+            return self._q_indirect_gemini(
+                pack, "indirect_quote_questions",
+                system="""Generate a question indirect quotation exercise using -냐고 or -느냐고.
+Return JSON:
+{
+  "direct_speech": "original Korean question",
+  "speaker": "Korean name",
+  "indirect_speech": "correctly converted indirect speech",
+  "tokens": ["token1", ...],
+  "correct_order": [0, ...],
+  "translation": "English translation"
+}
+Remember: drop ㄹ from stem before 냐고.
+Tokens 3-5 elements, shuffled."""
+            )
+        return self._build_payload(
+            question_type="indirect_quote_questions",
+            interaction_mode="assemble",
+            prompt_korean='현민: "사는 곳이 어디예요?"',
+            prompt_english="Hyunmin asked where you live",
+            choices=[],
+            answer_key=None,
+            tokens=["현민이", "사는 곳이", "물어봤어요", "어디냐고"],
+            correct_order=[0, 1, 3, 2],
+            slot_count=4,
+            translation="Hyunmin asked where you live",
+            grammar_rule="indirect_quote_questions",
+            hint="V/Adj + 냐고 for questions (drop ㄹ)",
+            difficulty=2,
+            metadata={"lesson": "KLP7-10", "form": "question"},
+        )
+    # ── Type 8: Indirect Quote Suggestions ──────────────────────────────────
+    def _q_indirect_suggestions(self, pack: dict) -> dict:
+        if self.client:
+            return self._q_indirect_gemini(
+                pack, "indirect_quote_suggestions",
+                system="""Generate a suggestion indirect quotation exercise using -자고.
+Return JSON:
+{
+  "direct_speech": "original Korean suggestion",
+  "speaker": "Korean name",
+  "indirect_speech": "correctly converted indirect speech",
+  "tokens": ["token1", ...],
+  "correct_order": [0, ...],
+  "translation": "English translation"
+}
+Pattern: V + 자고 하다.
+Tokens 3-5 elements, shuffled."""
+            )
+        return self._build_payload(
+            question_type="indirect_quote_suggestions",
+            interaction_mode="assemble",
+            prompt_korean='친구: "같이 밥 먹자"',
+            prompt_english="My friend suggested eating together",
+            choices=[],
+            answer_key=None,
+            tokens=["친구가", "밥 먹자고", "같이", "했어요"],
+            correct_order=[0, 2, 1, 3],
+            slot_count=4,
+            translation="My friend suggested eating together",
+            grammar_rule="indirect_quote_suggestions",
+            hint="V + 자고 for suggestions",
+            difficulty=2,
+            metadata={"lesson": "KLP7-10", "form": "suggestion"},
+        )
+    # ── Type 9: Regret Expression ────────────────────────────────────────────
+    def _q_regret(self, pack: dict) -> dict:
+        if self.client:
+            return self._q_indirect_gemini(
+                pack, "regret_expression",
+                system="""Generate a Korean regret expression exercise using -(으)ㄹ 걸 그랬다 or -지 말 걸 그랬다.
+Return JSON:
+{
+  "situation": "brief situation description in English",
+  "sentence": "Korean regret sentence",
+  "tokens": ["token1", ...],
+  "correct_order": [0, ...],
+  "translation": "English translation (I should have...)",
+  "negative": false
+}
+Tokens 3-5 elements, shuffled.
+Use realistic daily life situations from the KLP7 lessons."""
+            )
+        return self._build_payload(
+            question_type="regret_expression",
+            interaction_mode="assemble",
+            prompt_korean="You were late to class because you overslept.",
+            prompt_english="I should have gotten up earlier",
+            choices=[],
+            answer_key=None,
+            tokens=["더", "일어날 걸", "일찍"],
+            correct_order=[0, 2, 1],
+            slot_count=3,
+            translation="I should have gotten up earlier",
+            grammar_rule="regret_expression",
+            hint="Verb stem + (으)ㄹ 걸 그랬다",
+            difficulty=2,
+            metadata={"lesson": "KLP7-10", "negative": False},
+        )
+    # ── Gemini Generic Indirect Quote Helper ─────────────────────────────────
+    def _q_indirect_gemini(self, pack: dict, q_type: str, system: str) -> dict:
+        """Generic Gemini-powered indirect quote generator."""
+        vocab_sample = random.sample(
+            [f"{v['korean']} ({v['english']})" for v in pack["vocab"]
+             if v["type"] in ("noun", "verb", "adjective")],
+            min(10, len(pack["vocab"]))
+        )
+        prompt = f"""{system}
+Use vocabulary from this list where natural:
+{', '.join(vocab_sample)}
+Return ONLY valid JSON, no markdown backticks."""
+        try:
+            response = self.client.models.generate_content(
+                model="gemini-2.0-flash",
+                contents=prompt,
+            )
+            text = response.text.strip()
+            if "```" in text:
+                text = text.split("```")[1]
+                if text.startswith("json"):
+                    text = text[4:]
+            data = json.loads(text.strip())
+            tokens = data.get("tokens", [])
+            correct_order = data.get("correct_order", list(range(len(tokens))))
+            translation = data.get("translation", "")
+            indirect = data.get("indirect_speech", data.get("sentence", ""))
+            grammar_rule = QTYPE_TO_RULE.get(q_type, q_type)
+            return self._build_payload(
+                question_type=q_type,
+                interaction_mode="assemble",
+                prompt_korean=data.get("direct_speech", ""),
+                prompt_english=data.get("situation", translation),
+                choices=[],
+                answer_key=None,
+                tokens=tokens,
+                correct_order=correct_order,
+                slot_count=len(tokens),
+                translation=translation,
+                grammar_rule=grammar_rule,
+                hint=self._get_hint_for_type(q_type),
+                difficulty=2,
+                metadata={
+                    "lesson": "KLP7-10",
+                    "indirect_speech": indirect,
+                    "source": "gemini",
+                    **{k: v for k, v in data.items()
+                       if k not in ("tokens", "correct_order", "translation")},
+                },
+            )
+        except Exception as e:
+            logger.error(f"Gemini indirect quote failed for {q_type}: {e}")
+            raise
+    # ── Helpers ───────────────────────────────────────────────────────────────
+    def _select_type(self, difficulty: int, history: list) -> str:
+        available = DIFFICULTY_TYPES.get(difficulty, DIFFICULTY_TYPES[1])
+        recent = [h.get("question_type") for h in history[-3:]]
+        # Avoid repeating the same type 3 times in a row
+        choices = [t for t in available if t not in recent] or available
+        return random.choice(choices)
+    def _get_hint_for_type(self, q_type: str) -> str:
+        hints = {
+            "indirect_quote_dago": "V+ㄴ/는다고, Adj+다고, Past+었/았다고, Future+ㄹ 거라고",
+            "indirect_quote_commands": "(으)라고 commands, 지 말라고 negatives, 달라고/주라고 requests",
+            "indirect_quote_questions": "V/Adj + 냐고 (remember to drop ㄹ from stem)",
+            "indirect_quote_suggestions": "V + 자고 for suggestions",
+            "regret_expression": "(으)ㄹ 걸 그랬다 = should have done; 지 말 걸 = should not have done",
+        }
+        return hints.get(q_type, "Check the grammar rule pattern")
+    def _build_payload(self, **kwargs) -> dict:
+        """Build the standardized question payload sent to Unity."""
+        return {
+            "question_id": str(uuid.uuid4()),
+            "question_type": kwargs.get("question_type"),
+            "interaction_mode": kwargs.get("interaction_mode"),
+            "prompt_korean": kwargs.get("prompt_korean", ""),
+            "prompt_english": kwargs.get("prompt_english", ""),
+            "tokens": kwargs.get("tokens", []),
+            "correct_order": kwargs.get("correct_order", []),
+            "slot_count": kwargs.get("slot_count", 0),
+            "choices": kwargs.get("choices", []),
+            "answer_key": kwargs.get("answer_key"),
+            "translation": kwargs.get("translation", ""),
+            "grammar_rule": kwargs.get("grammar_rule"),
+            "hint": kwargs.get("hint", ""),
+            "difficulty": kwargs.get("difficulty", 1),
+            "metadata": kwargs.get("metadata", {}),
+        }