KoreAI-API

Sleeping

App Files Files Community

rairo commited on Mar 6

Commit

cd30a21

verified ·

1 Parent(s): bdfea50

Update app.py

Browse files

Files changed (1) hide show

app.py +713 -120

app.py CHANGED Viewed

@@ -7,10 +7,12 @@ import subprocess
 import wave
 import struct
 import logging
 import cv2
 import numpy as np
 from flask import Flask
 from flask_socketio import SocketIO, emit
 from PIL import Image
 # --- 2025 AI STANDARDS ---
@@ -18,8 +20,13 @@ from google import genai
 from google.genai import types
 import azure.cognitiveservices.speech as speechsdk
-# --- LOGGING SETUP (Critical for Hugging Face) ---
-# Hugging Face captures logs sent to stderr/stdout
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
@@ -34,14 +41,26 @@ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 AZURE_SPEECH_KEY = os.environ.get("AZURE_SPEECH_KEY")
 AZURE_SPEECH_REGION = os.environ.get("AZURE_SPEECH_REGION")
-# Initialize Gemini Client
 try:
     client = genai.Client(api_key=GEMINI_API_KEY)
     logger.info("✅ Gemini Client Initialized")
 except Exception as e:
     logger.error(f"❌ Failed to init Gemini: {e}")
-# --- HELPER: Base64 to PIL Image ---
 def decode_image(base64_string):
     try:
         if "," in base64_string:
@@ -54,20 +73,10 @@ def decode_image(base64_string):
         logger.error(f"Image Decode Error: {e}")
         return None
-# --- HELPER: Audio Sanitizer (The Fix for Azure) ---
 def sanitize_audio(input_path):
-    """
-    Forces audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV.
-    Uses FFmpeg (installed in Dockerfile).
-    """
     output_path = input_path + "_clean.wav"
-    # FFmpeg Command:
-    # -y: Overwrite output
-    # -i: Input file
-    # -ac 1: 1 Audio Channel (Mono)
-    # -ar 16000: 16000 Hz Sample Rate
-    # -acodec pcm_s16le: 16-bit Signed Integer PCM encoding
     command = [
         "ffmpeg", "-y", "-v", "error",
         "-i", input_path,
@@ -76,7 +85,6 @@ def sanitize_audio(input_path):
         "-acodec", "pcm_s16le",
         output_path
     ]
     try:
         subprocess.run(command, check=True)
         logger.info(f"✅ FFmpeg conversion successful: {output_path}")
@@ -88,187 +96,741 @@ def sanitize_audio(input_path):
         logger.error(f"❌ System error running FFmpeg: {e}")
         return None
 def analyze_audio_volume(file_path):
-    """
-    Checks if the WAV file actually contains sound or just silence.
-    """
     try:
         with wave.open(file_path, 'rb') as wf:
-            framerate = wf.getframerate()
             nframes = wf.getnframes()
-            channels = wf.getnchannels()
             raw_data = wf.readframes(nframes)
-            # Convert to 16-bit integers
             fmt = "%dh" % (len(raw_data) // 2)
             pcm_data = struct.unpack(fmt, raw_data)
             if not pcm_data:
                 return False
             max_val = max(abs(x) for x in pcm_data)
-            logger.info(f"🔊 Audio Stats - Rate: {framerate}Hz | Peak Amplitude: {max_val}/32767")
             if max_val < 100:
-                logger.warning("⚠️  Audio file appears to be SILENT.")
                 return False
             return True
     except Exception as e:
-        logger.warning(f"Could not analyze audio volume: {e}")
         return True
-# ==========================================
-# 1. VISUAL RECOGNITION (Wand/Pen)
-# ==========================================
-@socketio.on('verify_object')
-def handle_object_verification(data):
-    target = data.get('target', 'magic wand')
-    logger.info(f"👁️ Vision Request: Checking for '{target}'")
-    try:
-        pil_image = decode_image(data.get('image'))
-        if not pil_image:
-            emit('vision_result', {"verified": False, "feedback": "Could not decode image"})
-            return
-        img_byte_arr = io.BytesIO()
-        pil_image.save(img_byte_arr, format='JPEG', quality=80)
-        img_bytes = img_byte_arr.getvalue()
-        schema = {
-            "type": "OBJECT",
-            "properties": {
-                "verified": {"type": "BOOLEAN"},
-                "confidence": {"type": "NUMBER"},
-                "feedback": {"type": "STRING"}
-            },
-            "required": ["verified", "feedback"]
         }
-        prompt = f"""
-        You are the 'Eye of the Spellbook'.
-        Look at this image. Is the user holding a '{target}'?
-        IMPORTANT: Be lenient. If target is 'wand', accept a pen, pencil, chopstick, or stick.
-        Return JSON matching the schema.
-        """
-        response = client.models.generate_content(
-            model="gemini-2.0-flash",
-            contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
-            config=types.GenerateContentConfig(
-                response_mime_type="application/json",
-                response_schema=schema,
-                temperature=0.1
             )
         )
-        result = json.loads(response.text)
-        logger.info(f"👁️ AI Result: {result}")
-        emit('vision_result', result)
     except Exception as e:
-        logger.error(f"Vision Error: {e}")
-        emit('vision_result', {"verified": False, "feedback": "The magic eye is clouded (Server Error)."})
-# ==========================================
-# 2. PRONUNCIATION ASSESSMENT (The Spell)
-# ==========================================
 @socketio.on('assess_pronunciation')
 def handle_pronunciation(data):
     ref_text = data.get('text')
-    lang = data.get('lang', 'en-US')
-    logger.info(f"🎤 Audio Request: Assessing '{ref_text}'")
     raw_path = None
     clean_path = None
     try:
-        # 1. Decode and Save
         audio_b64 = data.get('audio')
         if "," in audio_b64:
             audio_b64 = audio_b64.split(",")[1]
         audio_bytes = base64.b64decode(audio_b64)
         with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
             temp_raw.write(audio_bytes)
             raw_path = temp_raw.name
-        # 2. Sanitize
         clean_path = sanitize_audio(raw_path)
-        if not clean_path: raise Exception("Audio conversion failed")
-        # 3. Configure Azure
-        speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
         speech_config.speech_recognition_language = lang
         audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
-        # Enable granular details
         pronunciation_config = speechsdk.PronunciationAssessmentConfig(
             reference_text=ref_text,
             grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
-            granularity=speechsdk.PronunciationAssessmentGranularity.Word, # Get Word-level details
             enable_miscue=True
         )
-        recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
         pronunciation_config.apply_to(recognizer)
-        # 4. Recognize
         result = recognizer.recognize_once_async().get()
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
             pron_result = speechsdk.PronunciationAssessmentResult(result)
-            # --- EXTRACT WORD DETAILS ---
             detailed_words = []
             for word in pron_result.words:
                 detailed_words.append({
                     "word": word.word,
                     "score": word.accuracy_score,
-                    "error": word.error_type # 'None', 'Omission', 'Insertion', 'Mispronunciation'
                 })
-            # ---------------------------
             response = {
                 "success": True,
-                "score": pron_result.accuracy_score,
-                "fluency": pron_result.fluency_score,
-                "completeness": pron_result.completeness_score,
                 "recognized_text": result.text,
-                "word_details": detailed_words # Send this array to UI
             }
-            logger.info(f"✅ Score: {pron_result.accuracy_score}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
-            response = {"success": False, "score": 0, "recognized_text": "I couldn't hear you clearly."}
         else:
-            response = {"success": False, "score": 0, "recognized_text": "Error during recognition."}
         emit('pronunciation_result', response)
     except Exception as e:
-        logger.error(f"Audio Exception: {e}")
-        emit('pronunciation_result', {"success": False, "score": 0, "recognized_text": "Server Error"})
     finally:
-        if raw_path and os.path.exists(raw_path): os.remove(raw_path)
-        if clean_path and os.path.exists(clean_path): os.remove(clean_path)
-# ==========================================
-# 3. HANDWRITING/OCR
-# ==========================================
 @socketio.on('verify_writing')
 def handle_writing_verification(data):
-    expected = data.get('expected_word', 'of')
     logger.info(f"📖 Handwriting Check: Expecting '{expected}'")
     try:
         pil_image = decode_image(data.get('image'))
         if not pil_image:
             return
         img_byte_arr = io.BytesIO()
@@ -279,38 +841,69 @@ def handle_writing_verification(data):
             "type": "OBJECT",
             "properties": {
                 "correct": {"type": "BOOLEAN"},
-                "detected_text": {"type": "STRING"}
             },
             "required": ["correct", "detected_text"]
         }
-        prompt = f"Read the handwriting. Does it spell '{expected}'? Return JSON."
         response = client.models.generate_content(
             model="gemini-2.0-flash",
             contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
             config=types.GenerateContentConfig(
                 response_mime_type="application/json",
-                response_schema=schema
             )
         )
         result = json.loads(response.text)
-        logger.info(f"📖 Result: {result}")
         emit('writing_result', result)
     except Exception as e:
         logger.error(f"OCR Error: {e}")
-        emit('writing_result', {"correct": False, "detected_text": "Error"})
-@socketio.on('connect')
-def handle_connect():
-    logger.info(f"Client connected")
-@socketio.on('disconnect')
-def handle_disconnect():
-    logger.info(f"Client disconnected")
 if __name__ == '__main__':
-    # Port 7860 is required for Hugging Face Spaces
     socketio.run(app, host='0.0.0.0', port=7860)

 import wave
 import struct
 import logging
+import uuid
 import cv2
 import numpy as np
 from flask import Flask
 from flask_socketio import SocketIO, emit
 from PIL import Image
 # --- 2025 AI STANDARDS ---
 from google.genai import types
 import azure.cognitiveservices.speech as speechsdk
+# --- KLP Modules ---
+from korean_rules import rule_engine
+from content_pack import get_active_pack, replace_active_pack
+from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
+from question_generator import QuestionGenerator, QTYPE_TO_RULE
+# --- LOGGING SETUP ---
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 AZURE_SPEECH_KEY = os.environ.get("AZURE_SPEECH_KEY")
 AZURE_SPEECH_REGION = os.environ.get("AZURE_SPEECH_REGION")
+# --- Initialize Gemini Client ---
+client = None
 try:
     client = genai.Client(api_key=GEMINI_API_KEY)
     logger.info("✅ Gemini Client Initialized")
 except Exception as e:
     logger.error(f"❌ Failed to init Gemini: {e}")
+# --- Initialize Question Generator ---
+question_gen = QuestionGenerator(gemini_client=client)
+# --- Session ID → socket SID mapping ---
+# Maps socket session ID to learner model session ID
+_socket_to_learner: dict[str, str] = {}
+# ===========================================================================
+# HELPERS
+# ===========================================================================
 def decode_image(base64_string):
     try:
         if "," in base64_string:
         logger.error(f"Image Decode Error: {e}")
         return None
 def sanitize_audio(input_path):
+    """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
     output_path = input_path + "_clean.wav"
     command = [
         "ffmpeg", "-y", "-v", "error",
         "-i", input_path,
         "-acodec", "pcm_s16le",
         output_path
     ]
     try:
         subprocess.run(command, check=True)
         logger.info(f"✅ FFmpeg conversion successful: {output_path}")
         logger.error(f"❌ System error running FFmpeg: {e}")
         return None
 def analyze_audio_volume(file_path):
     try:
         with wave.open(file_path, 'rb') as wf:
             nframes = wf.getnframes()
             raw_data = wf.readframes(nframes)
             fmt = "%dh" % (len(raw_data) // 2)
             pcm_data = struct.unpack(fmt, raw_data)
             if not pcm_data:
                 return False
             max_val = max(abs(x) for x in pcm_data)
+            logger.info(f"🔊 Audio Stats - Peak: {max_val}/32767")
             if max_val < 100:
+                logger.warning("⚠️  Audio appears SILENT.")
                 return False
             return True
     except Exception as e:
+        logger.warning(f"Could not analyze audio: {e}")
         return True
+def get_learner(socket_sid: str):
+    """Get learner model for the current socket connection."""
+    learner_id = _socket_to_learner.get(socket_sid)
+    if learner_id:
+        return get_session(learner_id)
+    return None
+# ===========================================================================
+# CONNECTION HANDLERS
+# ===========================================================================
+@socketio.on('connect')
+def handle_connect():
+    from flask import request
+    sid = request.sid
+    learner_id = str(uuid.uuid4())
+    _socket_to_learner[sid] = learner_id
+    model = get_or_create_session(learner_id)
+    logger.info(f"✅ Client connected: socket={sid} learner={learner_id}")
+    emit('session_ready', {
+        "session_id": learner_id,
+        "message": "Connected to KLP AI Service",
+        "mastery": model.mastery,
+        "difficulty": model.difficulty,
+        "content_pack": {
+            "lesson": get_active_pack().get("lesson"),
+            "version": get_active_pack().get("version"),
+            "vocab_count": len(get_active_pack().get("vocab", [])),
         }
+    })
+@socketio.on('disconnect')
+def handle_disconnect():
+    from flask import request
+    sid = request.sid
+    learner_id = _socket_to_learner.pop(sid, None)
+    if learner_id:
+        logger.info(f"Client disconnected: socket={sid} learner={learner_id}")
+        # Don't delete learner model immediately - allow reconnect grace period
+    else:
+        logger.info(f"Client disconnected: socket={sid}")
+# ===========================================================================
+# 1. CONTENT PACK LOADER
+# ===========================================================================
+@socketio.on('load_content_pack')
+def handle_load_content_pack(data):
+    """
+    Load a teacher-uploaded content pack.
+    Expected data:
+    {
+        "file_bytes": "<base64 encoded DOCX/PDF/JSON>",
+        "file_type": "json|docx|pdf",
+        "lesson": "KLP7-10",
+        "description": "optional description"
+    }
+    For JSON packs: must contain {"vocab": [...], "grammar_rules": {...}}
+    For DOCX/PDF: Gemini parses it into structured data
+    """
+    logger.info("📦 Content pack upload received")
+    try:
+        file_type = data.get("file_type", "json").lower()
+        file_b64 = data.get("file_bytes", "")
+        lesson = data.get("lesson", "custom")
+        description = data.get("description", "Custom content pack")
+        if "," in file_b64:
+            file_b64 = file_b64.split(",")[1]
+        file_bytes = base64.b64decode(file_b64)
+        if file_type == "json":
+            raw = json.loads(file_bytes.decode("utf-8"))
+            new_pack = replace_active_pack({
+                **raw,
+                "lesson": lesson,
+                "description": description,
+            })
+            emit('content_pack_loaded', {
+                "success": True,
+                "lesson": new_pack["lesson"],
+                "vocab_count": len(new_pack["vocab"]),
+                "grammar_rules": list(new_pack["grammar_rules"].keys()),
+                "source": "json_upload",
+            })
+        elif file_type in ("docx", "pdf"):
+            # Use Gemini to parse the document into structured vocab + grammar
+            if not client:
+                emit('content_pack_loaded', {"success": False, "error": "Gemini not available"})
+                return
+            mime = "application/pdf" if file_type == "pdf" else \
+                   "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+            parse_prompt = """You are parsing a Korean language teaching document.
+Extract all vocabulary items and grammar rules.
+Return ONLY valid JSON in this exact structure:
+{
+  "vocab": [
+    {"korean": "학생", "english": "student", "type": "noun"}
+  ],
+  "grammar_rules": {
+    "rule_id": {
+      "id": "rule_id",
+      "name": "Rule Name",
+      "description": "What the rule does",
+      "examples": [{"sentence": "...", "translation": "..."}],
+      "difficulty": 1
+    }
+  },
+  "lesson": "lesson name/number",
+  "description": "brief description"
+}
+Types for vocab: noun, verb, adjective, pronoun, adverb, expression
+Grammar rule IDs should be snake_case."""
+            response = client.models.generate_content(
+                model="gemini-2.0-flash",
+                contents=[
+                    parse_prompt,
+                    types.Part.from_bytes(data=file_bytes, mime_type=mime)
+                ],
             )
+            text = response.text.strip()
+            if "```" in text:
+                text = text.split("```")[1]
+                if text.startswith("json"):
+                    text = text[4:]
+            parsed = json.loads(text.strip())
+            new_pack = replace_active_pack(parsed)
+            emit('content_pack_loaded', {
+                "success": True,
+                "lesson": new_pack["lesson"],
+                "vocab_count": len(new_pack["vocab"]),
+                "grammar_rules": list(new_pack["grammar_rules"].keys()),
+                "source": "gemini_parsed",
+            })
+        else:
+            emit('content_pack_loaded', {"success": False, "error": f"Unsupported file type: {file_type}"})
+    except Exception as e:
+        logger.error(f"Content pack load error: {e}")
+        emit('content_pack_loaded', {"success": False, "error": str(e)})
+# ===========================================================================
+# 2. QUESTION GENERATION
+# ===========================================================================
+@socketio.on('request_question')
+def handle_request_question(data):
+    """
+    Generate the next question for the learner.
+    Expected data (all optional):
+    {
+        "grammar_rule": "topic_marker|copula|...",  // force a specific type
+        "difficulty": 1,                             // override difficulty
+        "interaction_mode": "assemble|choose_select|fill_in|speak"  // prefer a mode
+    }
+    """
+    from flask import request as req
+    sid = req.sid
+    learner = get_learner(sid)
+    if not learner:
+        emit('question_payload', {"error": "No active session. Please reconnect."})
+        return
+    try:
+        # Determine parameters
+        forced_rule = data.get("grammar_rule") if data else None
+        override_difficulty = data.get("difficulty") if data else None
+        difficulty = override_difficulty or learner.difficulty
+        # Smart rule selection if not forced
+        grammar_rule = forced_rule or learner.get_recommended_rule()
+        logger.info(f"🎯 Generating question: rule={grammar_rule} difficulty={difficulty} session={learner.session_id}")
+        payload = question_gen.generate(
+            difficulty=difficulty,
+            grammar_rule=grammar_rule,
+            history=learner.history,
+            session_id=learner.session_id,
         )
+        emit('question_payload', payload)
     except Exception as e:
+        logger.error(f"Question generation failed: {e}")
+        emit('question_payload', {"error": "Could not generate question. Please try again."})
+# ===========================================================================
+# 3. ANSWER VALIDATION
+# ===========================================================================
+@socketio.on('submit_answer')
+def handle_submit_answer(data):
+    """
+    Validate a player's answer.
+    Expected data:
+    {
+        "question_id": "uuid",
+        "question_type": "topic_marker|copula|...",
+        "grammar_rule": "topic_marker",
+        "interaction_mode": "choose_select|assemble|fill_in",
+        "answer": "는",                              // for choose_select / fill_in
+        "token_order": [1, 0, 2],                    // for assemble mode
+        "correct_order": [0, 1, 2],                  // expected order (from question payload)
+        "word_tested": "사과",                        // for particle questions
+        "particle_type": "topic|copula|subject|negative",
+        "attempt_number": 1
+    }
+    """
+    from flask import request as req
+    sid = req.sid
+    learner = get_learner(sid)
+    q_type = data.get("question_type", "")
+    grammar_rule = data.get("grammar_rule", q_type)
+    interaction_mode = data.get("interaction_mode", "")
+    attempt = data.get("attempt_number", 1)
+    try:
+        correct = False
+        # ── Assemble mode: compare token order ──
+        if interaction_mode == "assemble":
+            submitted = data.get("token_order", [])
+            expected = data.get("correct_order", [])
+            correct = rule_engine.validate_token_order(submitted, expected)
+        # ── Choose / fill-in: compare answer to answer_key ──
+        elif interaction_mode in ("choose_select", "fill_in"):
+            chosen = str(data.get("answer", "")).strip()
+            answer_key = str(data.get("answer_key", "")).strip()
+            # If particle validation, use rule engine
+            word_tested = data.get("word_tested")
+            particle_type = data.get("particle_type")
+            if word_tested and particle_type:
+                correct = rule_engine.validate_particle_choice(word_tested, chosen, particle_type)
+            else:
+                correct = (chosen == answer_key)
+        # ── Server-side re-check for indirect quote forms ──
+        if not correct and q_type in ("indirect_quote_dago", "indirect_quote_commands",
+                                       "indirect_quote_questions", "indirect_quote_suggestions"):
+            # For complex grammar, Gemini does a re-check if first attempt fails
+            if client and interaction_mode == "fill_in" and attempt <= 2:
+                correct = _gemini_recheck(data)
+        # Update mastery
+        if learner:
+            learner.record_outcome(grammar_rule, correct, interaction_mode)
+        # Build response
+        hint = None
+        if not correct:
+            word = data.get("word_tested")
+            ptype = data.get("particle_type")
+            if word and ptype:
+                hint = rule_engine.get_hint(word, ptype)
+            else:
+                hint = data.get("hint_text", "Review the grammar rule and try again")
+        retry_allowed = not correct and attempt < 3
+        speech_stage_unlocked = correct
+        response = {
+            "question_id": data.get("question_id"),
+            "correct": correct,
+            "score_delta": 10 if correct else 0,
+            "feedback": _build_feedback(correct, q_type, grammar_rule),
+            "hint": hint,
+            "retry_allowed": retry_allowed,
+            "attempt_number": attempt,
+            "speech_stage_unlocked": speech_stage_unlocked,
+        }
+        if learner:
+            response["mastery_update"] = dict(learner.mastery)
+            response["streak"] = learner.streak
+        emit('answer_result', response)
+    except Exception as e:
+        logger.error(f"Answer validation error: {e}")
+        emit('answer_result', {
+            "correct": False,
+            "score_delta": 0,
+            "feedback": "Server error during validation",
+            "retry_allowed": True,
+        })
+def _gemini_recheck(data: dict) -> bool:
+    """Use Gemini to re-check a complex indirect quotation answer."""
+    try:
+        prompt = f"""You are a Korean language grammar validator.
+Direct speech: {data.get('direct_speech', '')}
+Student's indirect speech: {data.get('answer', '')}
+Expected indirect speech: {data.get('answer_key', '')}
+Is the student's answer grammatically correct as an indirect quotation?
+Consider: minor spacing differences are OK, but wrong particles or wrong verb endings are not.
+Reply with ONLY valid JSON: {{"correct": true}} or {{"correct": false, "reason": "explanation"}}"""
+        response = client.models.generate_content(
+            model="gemini-2.0-flash",
+            contents=prompt,
+        )
+        result = json.loads(response.text.strip())
+        return result.get("correct", False)
+    except Exception as e:
+        logger.warning(f"Gemini recheck failed: {e}")
+        return False
+def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
+    """Build encouraging feedback message."""
+    if correct:
+        messages = [
+            "정확해요! Great job! 🎉",
+            "맞아요! That's correct! ⭐",
+            "완벽해요! Perfect! 🌟",
+            "잘했어요! Well done! 👏",
+        ]
+        import random
+        return random.choice(messages)
+    else:
+        rule_hints = {
+            "topic_marker": "Remember: 은 for consonant endings, 는 for vowel endings",
+            "copula": "Remember: 이에요 for consonant endings, 예요 for vowel endings",
+            "negative_copula": "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
+            "indirect_quote_dago": "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
+            "indirect_quote_commands": "Review: (으)라고 commands, 지 말라고 negatives",
+            "indirect_quote_questions": "Review: V/Adj+냐고 (drop ㄹ from stem)",
+            "indirect_quote_suggestions": "Review: V+자고 for suggestions",
+            "regret_expression": "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
+        }
+        base = "다시 해 보세요! Let's try again. "
+        return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
+# ===========================================================================
+# 4. PRONUNCIATION ASSESSMENT (Azure Speech — existing, extended)
+# ===========================================================================
 @socketio.on('assess_pronunciation')
 def handle_pronunciation(data):
+    """
+    Assess Korean (or any language) pronunciation via Azure.
+    Expected data:
+    {
+        "audio": "<base64 encoded audio>",
+        "text": "저는 학생이에요",
+        "lang": "ko-KR",                    // default ko-KR for Korean
+        "grammar_rule": "copula",           // optional: for mastery tracking
+        "question_id": "uuid"               // optional: link to question
+    }
+    """
+    from flask import request as req
+    sid = req.sid
+    learner = get_learner(sid)
     ref_text = data.get('text')
+    lang = data.get('lang', 'ko-KR')
+    grammar_rule = data.get('grammar_rule', '')
+    logger.info(f"🎤 Pronunciation Assessment: '{ref_text}' [{lang}]")
     raw_path = None
     clean_path = None
     try:
         audio_b64 = data.get('audio')
         if "," in audio_b64:
             audio_b64 = audio_b64.split(",")[1]
         audio_bytes = base64.b64decode(audio_b64)
         with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
             temp_raw.write(audio_bytes)
             raw_path = temp_raw.name
         clean_path = sanitize_audio(raw_path)
+        if not clean_path:
+            raise Exception("Audio conversion failed")
+        speech_config = speechsdk.SpeechConfig(
+            subscription=AZURE_SPEECH_KEY,
+            region=AZURE_SPEECH_REGION
+        )
         speech_config.speech_recognition_language = lang
         audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
         pronunciation_config = speechsdk.PronunciationAssessmentConfig(
             reference_text=ref_text,
             grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
+            granularity=speechsdk.PronunciationAssessmentGranularity.Word,
             enable_miscue=True
         )
+        recognizer = speechsdk.SpeechRecognizer(
+            speech_config=speech_config,
+            audio_config=audio_config
+        )
         pronunciation_config.apply_to(recognizer)
         result = recognizer.recognize_once_async().get()
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
             pron_result = speechsdk.PronunciationAssessmentResult(result)
             detailed_words = []
             for word in pron_result.words:
                 detailed_words.append({
                     "word": word.word,
                     "score": word.accuracy_score,
+                    "error": word.error_type
                 })
+            accuracy = pron_result.accuracy_score
+            fluency = pron_result.fluency_score
+            completeness = pron_result.completeness_score
+            # Generate teacher-style feedback
+            feedback = _build_pronunciation_feedback(
+                accuracy, fluency, completeness, detailed_words, ref_text
+            )
             response = {
                 "success": True,
+                "score": accuracy,
+                "fluency": fluency,
+                "completeness": completeness,
                 "recognized_text": result.text,
+                "word_details": detailed_words,
+                "feedback": feedback,
+                "question_id": data.get("question_id"),
             }
+            # Update mastery if grammar rule provided and score is high
+            if learner and grammar_rule and accuracy >= 70:
+                learner.record_outcome(grammar_rule, True, "speak")
+                response["mastery_update"] = dict(learner.mastery)
+            logger.info(f"✅ Pronunciation: acc={accuracy:.1f} fluency={fluency:.1f}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
+            response = {
+                "success": False,
+                "score": 0,
+                "fluency": 0,
+                "completeness": 0,
+                "recognized_text": "",
+                "word_details": [],
+                "feedback": "I couldn't hear you clearly. Please try speaking again.",
+            }
         else:
+            response = {
+                "success": False,
+                "score": 0,
+                "fluency": 0,
+                "completeness": 0,
+                "recognized_text": "",
+                "word_details": [],
+                "feedback": "Error during recognition. Please try again.",
+            }
         emit('pronunciation_result', response)
     except Exception as e:
+        logger.error(f"Pronunciation Error: {e}")
+        emit('pronunciation_result', {
+            "success": False,
+            "score": 0,
+            "fluency": 0,
+            "completeness": 0,
+            "recognized_text": "",
+            "word_details": [],
+            "feedback": "Server error during assessment.",
+        })
     finally:
+        if raw_path and os.path.exists(raw_path):
+            os.remove(raw_path)
+        if clean_path and os.path.exists(clean_path):
+            os.remove(clean_path)
+def _build_pronunciation_feedback(accuracy: float, fluency: float,
+                                   completeness: float, words: list,
+                                   ref_text: str) -> str:
+    """Build teacher-style pronunciation feedback."""
+    issues = [w for w in words if w.get("error") not in (None, "None", "") or w.get("score", 100) < 60]
+    if accuracy >= 85:
+        base = "훌륭해요! Excellent pronunciation! 🌟"
+    elif accuracy >= 70:
+        base = "잘했어요! Good pronunciation! Keep practicing."
+    elif accuracy >= 50:
+        base = "괜찮아요! Not bad, but let's work on a few sounds."
+    else:
+        base = "다시 해 보세요! Let's practice this together."
+    if issues:
+        problem_words = [w["word"] for w in issues[:3]]
+        base += f" Pay attention to: {', '.join(problem_words)}"
+    if fluency < 60:
+        base += " Try to speak more smoothly without pausing between words."
+    return base
+# ===========================================================================
+# 5. MASTERY & SESSION MANAGEMENT
+# ===========================================================================
+@socketio.on('get_mastery')
+def handle_get_mastery(data):
+    """
+    Unity polls this to display the learner's current mastery state.
+    Returns full learner model state for Unity to store if needed.
+    """
+    from flask import request as req
+    learner = get_learner(req.sid)
+    if not learner:
+        emit('mastery_state', {"error": "No active session"})
+        return
+    emit('mastery_state', learner.get_state())
+@socketio.on('restore_session')
+def handle_restore_session(data):
+    """
+    Unity can send a previously saved learner state to restore progress.
+    Expected data: the full state object from a previous get_mastery response.
+    {
+        "session_id": "...",
+        "mastery": {...},
+        "difficulty": 2,
+        ...
+    }
+    """
+    from flask import request as req
+    sid = req.sid
+    try:
+        learner_id = _socket_to_learner.get(sid)
+        if not learner_id:
+            emit('session_restored', {"success": False, "error": "No active socket session"})
+            return
+        learner = get_or_create_session(learner_id)
+        learner.set_state(data)
+        logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
+        emit('session_restored', {
+            "success": True,
+            "session_id": learner_id,
+            "mastery": learner.mastery,
+            "difficulty": learner.difficulty,
+            "question_count": learner.question_count,
+        })
+    except Exception as e:
+        logger.error(f"Session restore error: {e}")
+        emit('session_restored', {"success": False, "error": str(e)})
+@socketio.on('reset_session')
+def handle_reset_session(data):
+    """Reset the learner model for a fresh start."""
+    from flask import request as req
+    sid = req.sid
+    learner = get_learner(sid)
+    if learner:
+        learner.reset()
+        logger.info(f"🔄 Session reset: {learner.session_id}")
+        emit('session_reset', {
+            "success": True,
+            "mastery": learner.mastery,
+            "difficulty": learner.difficulty,
+        })
+    else:
+        emit('session_reset', {"success": False, "error": "No active session"})
+@socketio.on('update_mastery')
+def handle_update_mastery(data):
+    """
+    Explicit mastery update from Unity (e.g. after a mini-game result).
+    Expected data:
+    {
+        "grammar_rule": "topic_marker",
+        "correct": true,
+        "interaction_mode": "assemble"
+    }
+    """
+    from flask import request as req
+    learner = get_learner(req.sid)
+    if not learner:
+        emit('mastery_updated', {"error": "No active session"})
+        return
+    grammar_rule = data.get("grammar_rule", "")
+    correct = data.get("correct", False)
+    mode = data.get("interaction_mode", "")
+    if grammar_rule:
+        learner.record_outcome(grammar_rule, correct, mode)
+    emit('mastery_updated', {
+        "mastery": learner.mastery,
+        "difficulty": learner.difficulty,
+        "streak": learner.streak,
+    })
+# ===========================================================================
+# 6. VISUAL RECOGNITION (existing — wand/pen)
+# ===========================================================================
+@socketio.on('verify_object')
+def handle_object_verification(data):
+    target = data.get('target', 'magic wand')
+    logger.info(f"👁️ Vision Request: Checking for '{target}'")
+    try:
+        pil_image = decode_image(data.get('image'))
+        if not pil_image:
+            emit('vision_result', {"verified": False, "feedback": "Could not decode image"})
+            return
+        img_byte_arr = io.BytesIO()
+        pil_image.save(img_byte_arr, format='JPEG', quality=80)
+        img_bytes = img_byte_arr.getvalue()
+        schema = {
+            "type": "OBJECT",
+            "properties": {
+                "verified": {"type": "BOOLEAN"},
+                "confidence": {"type": "NUMBER"},
+                "feedback": {"type": "STRING"}
+            },
+            "required": ["verified", "feedback"]
+        }
+        prompt = f"""You are the 'Eye of the Spellbook'.
+Look at this image. Is the user holding a '{target}'?
+IMPORTANT: Be lenient. If target is 'wand', accept a pen, pencil, chopstick, or stick.
+Return JSON matching the schema."""
+        response = client.models.generate_content(
+            model="gemini-2.0-flash",
+            contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
+            config=types.GenerateContentConfig(
+                response_mime_type="application/json",
+                response_schema=schema,
+                temperature=0.1
+            )
+        )
+        result = json.loads(response.text)
+        logger.info(f"👁️ Vision Result: {result}")
+        emit('vision_result', result)
+    except Exception as e:
+        logger.error(f"Vision Error: {e}")
+        emit('vision_result', {"verified": False, "feedback": "The magic eye is clouded (Server Error)."})
+# ===========================================================================
+# 7. HANDWRITING / OCR (existing)
+# ===========================================================================
 @socketio.on('verify_writing')
 def handle_writing_verification(data):
+    expected = data.get('expected_word', '')
     logger.info(f"📖 Handwriting Check: Expecting '{expected}'")
     try:
         pil_image = decode_image(data.get('image'))
         if not pil_image:
+            emit('writing_result', {"correct": False, "detected_text": "Could not decode image"})
             return
         img_byte_arr = io.BytesIO()
             "type": "OBJECT",
             "properties": {
                 "correct": {"type": "BOOLEAN"},
+                "detected_text": {"type": "STRING"},
+                "feedback": {"type": "STRING"}
             },
             "required": ["correct", "detected_text"]
         }
+        prompt = f"""Read the handwriting in this image.
+Does it spell '{expected}'?
+Be lenient with stroke variation but strict about the actual characters.
+Return JSON with: correct (bool), detected_text (what you read), feedback (brief comment)."""
         response = client.models.generate_content(
             model="gemini-2.0-flash",
             contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
             config=types.GenerateContentConfig(
                 response_mime_type="application/json",
+                response_schema=schema,
             )
         )
         result = json.loads(response.text)
+        logger.info(f"📖 Writing Result: {result}")
         emit('writing_result', result)
     except Exception as e:
         logger.error(f"OCR Error: {e}")
+        emit('writing_result', {"correct": False, "detected_text": "Error", "feedback": "Server error"})
+# ===========================================================================
+# 8. GRAMMAR RULE INFO (utility for UI)
+# ===========================================================================
+@socketio.on('get_grammar_rules')
+def handle_get_grammar_rules(data):
+    """Return all available grammar rules from the active content pack."""
+    pack = get_active_pack()
+    emit('grammar_rules', {
+        "rules": pack.get("grammar_rules", {}),
+        "lesson": pack.get("lesson"),
+    })
+@socketio.on('get_content_pack_info')
+def handle_get_content_pack_info(data):
+    """Return info about the active content pack (no full vocab dump)."""
+    pack = get_active_pack()
+    emit('content_pack_info', {
+        "lesson": pack.get("lesson"),
+        "version": pack.get("version"),
+        "vocab_count": len(pack.get("vocab", [])),
+        "grammar_rules": list(pack.get("grammar_rules", {}).keys()),
+        "metadata": pack.get("metadata", {}),
+    })
+# ===========================================================================
+# ENTRY POINT
+# ===========================================================================
 if __name__ == '__main__':
+    # Purge stale sessions on startup
+    purge_stale_sessions()
+    logger.info("🚀 KLP AI Service starting on port 7860")
+    # Port 7860 required for Hugging Face Spaces
     socketio.run(app, host='0.0.0.0', port=7860)