KoreAI-API

Sleeping

App Files Files Community

rairo commited on Mar 6

Commit

c138057

verified ·

1 Parent(s): 3618e89

Update app.py

Browse files

Files changed (1) hide show

app.py +362 -246

app.py CHANGED Viewed

@@ -57,7 +57,6 @@ except Exception as e:
 question_gen = QuestionGenerator(gemini_client=client)
 # --- Session ID → socket SID mapping ---
-# Maps socket session ID to learner model session ID
 _socket_to_learner: dict[str, str] = {}
@@ -81,44 +80,109 @@ def decode_image(base64_string):
 def sanitize_audio(input_path):
     """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
     output_path = input_path + "_clean.wav"
     command = [
-        "ffmpeg", "-y", "-v", "error",
         "-i", input_path,
         "-ac", "1",
         "-ar", "16000",
         "-acodec", "pcm_s16le",
         output_path
     ]
     try:
-        subprocess.run(command, check=True)
-        logger.info(f"✅ FFmpeg conversion successful: {output_path}")
         return output_path
     except subprocess.CalledProcessError as e:
-        logger.error(f"❌ FFmpeg failed: {e}")
         return None
     except Exception as e:
-        logger.error(f"❌ System error running FFmpeg: {e}")
         return None
 def analyze_audio_volume(file_path):
     try:
         with wave.open(file_path, 'rb') as wf:
-            nframes = wf.getnframes()
             raw_data = wf.readframes(nframes)
             fmt = "%dh" % (len(raw_data) // 2)
             pcm_data = struct.unpack(fmt, raw_data)
             if not pcm_data:
                 return False
             max_val = max(abs(x) for x in pcm_data)
-            logger.info(f"🔊 Audio Stats - Peak: {max_val}/32767")
             if max_val < 100:
-                logger.warning("⚠️  Audio appears SILENT.")
                 return False
             return True
     except Exception as e:
-        logger.warning(f"Could not analyze audio: {e}")
-        return True
 def get_learner(socket_sid: str):
@@ -162,7 +226,6 @@ def handle_disconnect():
     learner_id = _socket_to_learner.pop(sid, None)
     if learner_id:
         logger.info(f"Client disconnected: socket={sid} learner={learner_id}")
-        # Don't delete learner model immediately - allow reconnect grace period
     else:
         logger.info(f"Client disconnected: socket={sid}")
@@ -173,26 +236,12 @@ def handle_disconnect():
 @socketio.on('load_content_pack')
 def handle_load_content_pack(data):
-    """
-    Load a teacher-uploaded content pack.
-    Expected data:
-    {
-        "file_bytes": "<base64 encoded DOCX/PDF/JSON>",
-        "file_type": "json|docx|pdf",
-        "lesson": "KLP7-10",
-        "description": "optional description"
-    }
-    For JSON packs: must contain {"vocab": [...], "grammar_rules": {...}}
-    For DOCX/PDF: Gemini parses it into structured data
-    """
     logger.info("📦 Content pack upload received")
     try:
-        file_type = data.get("file_type", "json").lower()
-        file_b64 = data.get("file_bytes", "")
-        lesson = data.get("lesson", "custom")
         description = data.get("description", "Custom content pack")
         if "," in file_b64:
@@ -215,7 +264,6 @@ def handle_load_content_pack(data):
             })
         elif file_type in ("docx", "pdf"):
-            # Use Gemini to parse the document into structured vocab + grammar
             if not client:
                 emit('content_pack_loaded', {"success": False, "error": "Gemini not available"})
                 return
@@ -286,16 +334,6 @@ Grammar rule IDs should be snake_case."""
 @socketio.on('request_question')
 def handle_request_question(data):
-    """
-    Generate the next question for the learner.
-    Expected data (all optional):
-    {
-        "grammar_rule": "topic_marker|copula|...",  // force a specific type
-        "difficulty": 1,                             // override difficulty
-        "interaction_mode": "assemble|choose_select|fill_in|speak"  // prefer a mode
-    }
-    """
     from flask import request as req
     sid = req.sid
     learner = get_learner(sid)
@@ -305,13 +343,10 @@ def handle_request_question(data):
         return
     try:
-        # Determine parameters
-        forced_rule = data.get("grammar_rule") if data else None
         override_difficulty = data.get("difficulty") if data else None
-        difficulty = override_difficulty or learner.difficulty
-        # Smart rule selection if not forced
-        grammar_rule = forced_rule or learner.get_recommended_rule()
         logger.info(f"🎯 Generating question: rule={grammar_rule} difficulty={difficulty} session={learner.session_id}")
@@ -335,48 +370,27 @@ def handle_request_question(data):
 @socketio.on('submit_answer')
 def handle_submit_answer(data):
-    """
-    Validate a player's answer.
-    Expected data:
-    {
-        "question_id": "uuid",
-        "question_type": "topic_marker|copula|...",
-        "grammar_rule": "topic_marker",
-        "interaction_mode": "choose_select|assemble|fill_in",
-        "answer": "는",                              // for choose_select / fill_in
-        "token_order": [1, 0, 2],                    // for assemble mode
-        "correct_order": [0, 1, 2],                  // expected order (from question payload)
-        "word_tested": "사과",                        // for particle questions
-        "particle_type": "topic|copula|subject|negative",
-        "attempt_number": 1
-    }
-    """
     from flask import request as req
-    sid = req.sid
     learner = get_learner(sid)
-    q_type = data.get("question_type", "")
-    grammar_rule = data.get("grammar_rule", q_type)
     interaction_mode = data.get("interaction_mode", "")
-    attempt = data.get("attempt_number", 1)
     try:
         correct = False
-        # ── Assemble mode: compare token order ──
         if interaction_mode == "assemble":
             submitted = data.get("token_order", [])
-            expected = data.get("correct_order", [])
-            correct = rule_engine.validate_token_order(submitted, expected)
-        # ── Choose / fill-in: compare answer to answer_key ──
         elif interaction_mode in ("choose_select", "fill_in"):
-            chosen = str(data.get("answer", "")).strip()
             answer_key = str(data.get("answer_key", "")).strip()
-            # If particle validation, use rule engine
-            word_tested = data.get("word_tested")
             particle_type = data.get("particle_type")
             if word_tested and particle_type:
@@ -384,44 +398,40 @@ def handle_submit_answer(data):
             else:
                 correct = (chosen == answer_key)
-        # ── Server-side re-check for indirect quote forms ──
         if not correct and q_type in ("indirect_quote_dago", "indirect_quote_commands",
                                        "indirect_quote_questions", "indirect_quote_suggestions"):
-            # For complex grammar, Gemini does a re-check if first attempt fails
             if client and interaction_mode == "fill_in" and attempt <= 2:
                 correct = _gemini_recheck(data)
-        # Update mastery
         if learner:
             learner.record_outcome(grammar_rule, correct, interaction_mode)
-        # Build response
         hint = None
         if not correct:
-            word = data.get("word_tested")
             ptype = data.get("particle_type")
             if word and ptype:
                 hint = rule_engine.get_hint(word, ptype)
             else:
                 hint = data.get("hint_text", "Review the grammar rule and try again")
-        retry_allowed = not correct and attempt < 3
         speech_stage_unlocked = correct
         response = {
-            "question_id": data.get("question_id"),
-            "correct": correct,
-            "score_delta": 10 if correct else 0,
-            "feedback": _build_feedback(correct, q_type, grammar_rule),
-            "hint": hint,
-            "retry_allowed": retry_allowed,
-            "attempt_number": attempt,
             "speech_stage_unlocked": speech_stage_unlocked,
         }
         if learner:
             response["mastery_update"] = dict(learner.mastery)
-            response["streak"] = learner.streak
         emit('answer_result', response)
@@ -436,10 +446,9 @@ def handle_submit_answer(data):
 def _gemini_recheck(data: dict) -> bool:
-    """Use Gemini to re-check a complex indirect quotation answer."""
     try:
         prompt = f"""You are a Korean language grammar validator.
 Direct speech: {data.get('direct_speech', '')}
 Student's indirect speech: {data.get('answer', '')}
 Expected indirect speech: {data.get('answer_key', '')}
@@ -461,7 +470,6 @@ Reply with ONLY valid JSON: {{"correct": true}} or {{"correct": false, "reason":
 def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
-    """Build encouraging feedback message."""
     if correct:
         messages = [
             "정확해요! Great job! 🎉",
@@ -473,170 +481,308 @@ def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
         return random.choice(messages)
     else:
         rule_hints = {
-            "topic_marker": "Remember: 은 for consonant endings, 는 for vowel endings",
-            "copula": "Remember: 이에요 for consonant endings, 예요 for vowel endings",
-            "negative_copula": "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
-            "indirect_quote_dago": "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
-            "indirect_quote_commands": "Review: (으)라고 commands, 지 말라고 negatives",
-            "indirect_quote_questions": "Review: V/Adj+냐고 (drop ㄹ from stem)",
-            "indirect_quote_suggestions": "Review: V+자고 for suggestions",
-            "regret_expression": "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
         }
         base = "다시 해 보세요! Let's try again. "
         return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
 # ===========================================================================
-# 4. PRONUNCIATION ASSESSMENT (Azure Speech — existing, extended)
 # ===========================================================================
 @socketio.on('assess_pronunciation')
 def handle_pronunciation(data):
-    """
-    Assess Korean (or any language) pronunciation via Azure.
-    Expected data:
-    {
-        "audio": "<base64 encoded audio>",
-        "text": "저는 학생이에요",
-        "lang": "ko-KR",                    // default ko-KR for Korean
-        "grammar_rule": "copula",           // optional: for mastery tracking
-        "question_id": "uuid"               // optional: link to question
-    }
-    """
     from flask import request as req
-    sid = req.sid
     learner = get_learner(sid)
-    ref_text = data.get('text')
-    lang = data.get('lang', 'ko-KR')
     grammar_rule = data.get('grammar_rule', '')
-    logger.info(f"🎤 Pronunciation Assessment: '{ref_text}' [{lang}]")
-    raw_path = None
-    clean_path = None
-    try:
-        audio_b64 = data.get('audio')
-        if "," in audio_b64:
-            audio_b64 = audio_b64.split(",")[1]
-        audio_bytes = base64.b64decode(audio_b64)
-        with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
-            temp_raw.write(audio_bytes)
-            raw_path = temp_raw.name
-        clean_path = sanitize_audio(raw_path)
-        if not clean_path:
-            raise Exception("Audio conversion failed")
-        speech_config = speechsdk.SpeechConfig(
-            subscription=AZURE_SPEECH_KEY,
-            region=AZURE_SPEECH_REGION
-        )
-        speech_config.speech_recognition_language = lang
-        audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
-        pronunciation_config = speechsdk.PronunciationAssessmentConfig(
-            reference_text=ref_text,
-            grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
-            granularity=speechsdk.PronunciationAssessmentGranularity.Word,
-            enable_miscue=True
-        )
-        recognizer = speechsdk.SpeechRecognizer(
-            speech_config=speech_config,
-            audio_config=audio_config
-        )
-        pronunciation_config.apply_to(recognizer)
-        result = recognizer.recognize_once_async().get()
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
-            pron_result = speechsdk.PronunciationAssessmentResult(result)
             detailed_words = []
             for word in pron_result.words:
-                detailed_words.append({
-                    "word": word.word,
-                    "score": word.accuracy_score,
-                    "error": word.error_type
-                })
-            accuracy = pron_result.accuracy_score
-            fluency = pron_result.fluency_score
-            completeness = pron_result.completeness_score
-            # Generate teacher-style feedback
-            feedback = _build_pronunciation_feedback(
-                accuracy, fluency, completeness, detailed_words, ref_text
-            )
             response = {
-                "success": True,
-                "score": accuracy,
-                "fluency": fluency,
-                "completeness": completeness,
                 "recognized_text": result.text,
-                "word_details": detailed_words,
-                "feedback": feedback,
-                "question_id": data.get("question_id"),
             }
-            # Update mastery if grammar rule provided and score is high
             if learner and grammar_rule and accuracy >= 70:
                 learner.record_outcome(grammar_rule, True, "speak")
                 response["mastery_update"] = dict(learner.mastery)
-            logger.info(f"✅ Pronunciation: acc={accuracy:.1f} fluency={fluency:.1f}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
             response = {
-                "success": False,
-                "score": 0,
-                "fluency": 0,
-                "completeness": 0,
                 "recognized_text": "",
                 "word_details": [],
-                "feedback": "I couldn't hear you clearly. Please try speaking again.",
             }
         else:
             response = {
-                "success": False,
-                "score": 0,
-                "fluency": 0,
-                "completeness": 0,
-                "recognized_text": "",
-                "word_details": [],
-                "feedback": "Error during recognition. Please try again.",
             }
         emit('pronunciation_result', response)
     except Exception as e:
-        logger.error(f"Pronunciation Error: {e}")
         emit('pronunciation_result', {
-            "success": False,
-            "score": 0,
-            "fluency": 0,
-            "completeness": 0,
-            "recognized_text": "",
-            "word_details": [],
             "feedback": "Server error during assessment.",
         })
     finally:
         if raw_path and os.path.exists(raw_path):
             os.remove(raw_path)
         if clean_path and os.path.exists(clean_path):
             os.remove(clean_path)
 def _build_pronunciation_feedback(accuracy: float, fluency: float,
                                    completeness: float, words: list,
                                    ref_text: str) -> str:
-    """Build teacher-style pronunciation feedback."""
     issues = [w for w in words if w.get("error") not in (None, "None", "") or w.get("score", 100) < 60]
     if accuracy >= 85:
@@ -664,10 +810,6 @@ def _build_pronunciation_feedback(accuracy: float, fluency: float,
 @socketio.on('get_mastery')
 def handle_get_mastery(data):
-    """
-    Unity polls this to display the learner's current mastery state.
-    Returns full learner model state for Unity to store if needed.
-    """
     from flask import request as req
     learner = get_learner(req.sid)
@@ -680,17 +822,6 @@ def handle_get_mastery(data):
 @socketio.on('restore_session')
 def handle_restore_session(data):
-    """
-    Unity can send a previously saved learner state to restore progress.
-    Expected data: the full state object from a previous get_mastery response.
-    {
-        "session_id": "...",
-        "mastery": {...},
-        "difficulty": 2,
-        ...
-    }
-    """
     from flask import request as req
     sid = req.sid
@@ -705,10 +836,10 @@ def handle_restore_session(data):
         logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
         emit('session_restored', {
-            "success": True,
-            "session_id": learner_id,
-            "mastery": learner.mastery,
-            "difficulty": learner.difficulty,
             "question_count": learner.question_count,
         })
@@ -719,17 +850,16 @@ def handle_restore_session(data):
 @socketio.on('reset_session')
 def handle_reset_session(data):
-    """Reset the learner model for a fresh start."""
     from flask import request as req
-    sid = req.sid
     learner = get_learner(sid)
     if learner:
         learner.reset()
         logger.info(f"🔄 Session reset: {learner.session_id}")
         emit('session_reset', {
-            "success": True,
-            "mastery": learner.mastery,
             "difficulty": learner.difficulty,
         })
     else:
@@ -738,16 +868,6 @@ def handle_reset_session(data):
 @socketio.on('update_mastery')
 def handle_update_mastery(data):
-    """
-    Explicit mastery update from Unity (e.g. after a mini-game result).
-    Expected data:
-    {
-        "grammar_rule": "topic_marker",
-        "correct": true,
-        "interaction_mode": "assemble"
-    }
-    """
     from flask import request as req
     learner = get_learner(req.sid)
@@ -756,21 +876,21 @@ def handle_update_mastery(data):
         return
     grammar_rule = data.get("grammar_rule", "")
-    correct = data.get("correct", False)
-    mode = data.get("interaction_mode", "")
     if grammar_rule:
         learner.record_outcome(grammar_rule, correct, mode)
     emit('mastery_updated', {
-        "mastery": learner.mastery,
         "difficulty": learner.difficulty,
-        "streak": learner.streak,
     })
 # ===========================================================================
-# 6. VISUAL RECOGNITION (existing — wand/pen)
 # ===========================================================================
 @socketio.on('verify_object')
@@ -791,9 +911,9 @@ def handle_object_verification(data):
         schema = {
             "type": "OBJECT",
             "properties": {
-                "verified": {"type": "BOOLEAN"},
                 "confidence": {"type": "NUMBER"},
-                "feedback": {"type": "STRING"}
             },
             "required": ["verified", "feedback"]
         }
@@ -823,7 +943,7 @@ Return JSON matching the schema."""
 # ===========================================================================
-# 7. HANDWRITING / OCR (existing)
 # ===========================================================================
 @socketio.on('verify_writing')
@@ -844,9 +964,9 @@ def handle_writing_verification(data):
         schema = {
             "type": "OBJECT",
             "properties": {
-                "correct": {"type": "BOOLEAN"},
                 "detected_text": {"type": "STRING"},
-                "feedback": {"type": "STRING"}
             },
             "required": ["correct", "detected_text"]
         }
@@ -875,29 +995,27 @@ Return JSON with: correct (bool), detected_text (what you read), feedback (brief
 # ===========================================================================
-# 8. GRAMMAR RULE INFO (utility for UI)
 # ===========================================================================
 @socketio.on('get_grammar_rules')
 def handle_get_grammar_rules(data):
-    """Return all available grammar rules from the active content pack."""
     pack = get_active_pack()
     emit('grammar_rules', {
-        "rules": pack.get("grammar_rules", {}),
         "lesson": pack.get("lesson"),
     })
 @socketio.on('get_content_pack_info')
 def handle_get_content_pack_info(data):
-    """Return info about the active content pack (no full vocab dump)."""
     pack = get_active_pack()
     emit('content_pack_info', {
-        "lesson": pack.get("lesson"),
-        "version": pack.get("version"),
-        "vocab_count": len(pack.get("vocab", [])),
         "grammar_rules": list(pack.get("grammar_rules", {}).keys()),
-        "metadata": pack.get("metadata", {}),
     })
@@ -906,8 +1024,6 @@ def handle_get_content_pack_info(data):
 # ===========================================================================
 if __name__ == '__main__':
-    # Purge stale sessions on startup
     purge_stale_sessions()
     logger.info("🚀 KLP AI Service starting on port 7860")
-    # Port 7860 required for Hugging Face Spaces
     socketio.run(app, host='0.0.0.0', port=7860)

 question_gen = QuestionGenerator(gemini_client=client)
 # --- Session ID → socket SID mapping ---
 _socket_to_learner: dict[str, str] = {}
 def sanitize_audio(input_path):
     """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
     output_path = input_path + "_clean.wav"
+    # --- STEP: Log input file info before conversion ---
+    try:
+        input_size = os.path.getsize(input_path)
+        logger.info(f"🔧 [FFmpeg] Input file: {input_path} | Size: {input_size} bytes")
+        if input_size == 0:
+            logger.error("❌ [FFmpeg] Input file is EMPTY (0 bytes) — audio was not captured correctly")
+            return None
+    except Exception as e:
+        logger.error(f"❌ [FFmpeg] Could not stat input file: {e}")
     command = [
+        "ffmpeg", "-y", "-v", "verbose",
         "-i", input_path,
         "-ac", "1",
         "-ar", "16000",
         "-acodec", "pcm_s16le",
         output_path
     ]
+    logger.info(f"🔧 [FFmpeg] Running command: {' '.join(command)}")
     try:
+        result = subprocess.run(
+            command,
+            check=True,
+            capture_output=True,
+            text=True
+        )
+        logger.info(f"✅ [FFmpeg] Conversion successful → {output_path}")
+        if result.stderr:
+            # ffmpeg writes progress/info to stderr even on success
+            logger.info(f"🔧 [FFmpeg] stderr output:\n{result.stderr[:2000]}")
+        output_size = os.path.getsize(output_path)
+        logger.info(f"🔧 [FFmpeg] Output file size: {output_size} bytes")
+        if output_size == 0:
+            logger.error("❌ [FFmpeg] Output WAV is EMPTY — conversion produced no data")
+            return None
         return output_path
     except subprocess.CalledProcessError as e:
+        logger.error(f"❌ [FFmpeg] Process failed with return code {e.returncode}")
+        logger.error(f"❌ [FFmpeg] stdout: {e.stdout}")
+        logger.error(f"❌ [FFmpeg] stderr: {e.stderr}")
+        return None
+    except FileNotFoundError:
+        logger.error("❌ [FFmpeg] ffmpeg binary not found — is it installed in the Docker image?")
         return None
     except Exception as e:
+        logger.error(f"❌ [FFmpeg] Unexpected error: {e}")
         return None
 def analyze_audio_volume(file_path):
+    """Inspect a WAV file: log frame rate, channels, duration, and peak amplitude."""
     try:
         with wave.open(file_path, 'rb') as wf:
+            framerate   = wf.getframerate()
+            nframes     = wf.getnframes()
+            channels    = wf.getnchannels()
+            sampwidth   = wf.getsampwidth()
+            duration_s  = nframes / framerate if framerate else 0
+            logger.info(
+                f"🔊 [WAV] framerate={framerate}Hz | channels={channels} | "
+                f"sampwidth={sampwidth}B | nframes={nframes} | duration={duration_s:.2f}s"
+            )
+            if duration_s < 0.2:
+                logger.warning(f"⚠️  [WAV] Audio is very short ({duration_s:.2f}s) — may not be enough for recognition")
             raw_data = wf.readframes(nframes)
+            if len(raw_data) == 0:
+                logger.error("❌ [WAV] No PCM data in file")
+                return False
             fmt = "%dh" % (len(raw_data) // 2)
             pcm_data = struct.unpack(fmt, raw_data)
             if not pcm_data:
+                logger.error("❌ [WAV] PCM unpack produced no samples")
                 return False
             max_val = max(abs(x) for x in pcm_data)
+            avg_val = sum(abs(x) for x in pcm_data) / len(pcm_data)
+            logger.info(f"🔊 [WAV] Peak amplitude: {max_val}/32767 | Avg amplitude: {avg_val:.1f}")
             if max_val < 100:
+                logger.warning("⚠️  [WAV] Audio appears SILENT (peak < 100) — microphone may not be working")
                 return False
+            if max_val < 500:
+                logger.warning(f"⚠️  [WAV] Audio is very quiet (peak={max_val}) — may affect recognition accuracy")
             return True
+    except wave.Error as e:
+        logger.error(f"❌ [WAV] wave.Error reading file: {e} — file may not be a valid WAV")
+        return False
     except Exception as e:
+        logger.warning(f"⚠️  [WAV] Could not analyze audio volume: {e}")
+        return True  # Don't block on analysis failure
 def get_learner(socket_sid: str):
     learner_id = _socket_to_learner.pop(sid, None)
     if learner_id:
         logger.info(f"Client disconnected: socket={sid} learner={learner_id}")
     else:
         logger.info(f"Client disconnected: socket={sid}")
 @socketio.on('load_content_pack')
 def handle_load_content_pack(data):
     logger.info("📦 Content pack upload received")
     try:
+        file_type   = data.get("file_type", "json").lower()
+        file_b64    = data.get("file_bytes", "")
+        lesson      = data.get("lesson", "custom")
         description = data.get("description", "Custom content pack")
         if "," in file_b64:
             })
         elif file_type in ("docx", "pdf"):
             if not client:
                 emit('content_pack_loaded', {"success": False, "error": "Gemini not available"})
                 return
 @socketio.on('request_question')
 def handle_request_question(data):
     from flask import request as req
     sid = req.sid
     learner = get_learner(sid)
         return
     try:
+        forced_rule         = data.get("grammar_rule") if data else None
         override_difficulty = data.get("difficulty") if data else None
+        difficulty          = override_difficulty or learner.difficulty
+        grammar_rule        = forced_rule or learner.get_recommended_rule()
         logger.info(f"🎯 Generating question: rule={grammar_rule} difficulty={difficulty} session={learner.session_id}")
 @socketio.on('submit_answer')
 def handle_submit_answer(data):
     from flask import request as req
+    sid     = req.sid
     learner = get_learner(sid)
+    q_type           = data.get("question_type", "")
+    grammar_rule     = data.get("grammar_rule", q_type)
     interaction_mode = data.get("interaction_mode", "")
+    attempt          = data.get("attempt_number", 1)
     try:
         correct = False
         if interaction_mode == "assemble":
             submitted = data.get("token_order", [])
+            expected  = data.get("correct_order", [])
+            correct   = rule_engine.validate_token_order(submitted, expected)
         elif interaction_mode in ("choose_select", "fill_in"):
+            chosen     = str(data.get("answer", "")).strip()
             answer_key = str(data.get("answer_key", "")).strip()
+            word_tested   = data.get("word_tested")
             particle_type = data.get("particle_type")
             if word_tested and particle_type:
             else:
                 correct = (chosen == answer_key)
         if not correct and q_type in ("indirect_quote_dago", "indirect_quote_commands",
                                        "indirect_quote_questions", "indirect_quote_suggestions"):
             if client and interaction_mode == "fill_in" and attempt <= 2:
                 correct = _gemini_recheck(data)
         if learner:
             learner.record_outcome(grammar_rule, correct, interaction_mode)
         hint = None
         if not correct:
+            word  = data.get("word_tested")
             ptype = data.get("particle_type")
             if word and ptype:
                 hint = rule_engine.get_hint(word, ptype)
             else:
                 hint = data.get("hint_text", "Review the grammar rule and try again")
+        retry_allowed        = not correct and attempt < 3
         speech_stage_unlocked = correct
         response = {
+            "question_id":          data.get("question_id"),
+            "correct":              correct,
+            "score_delta":          10 if correct else 0,
+            "feedback":             _build_feedback(correct, q_type, grammar_rule),
+            "hint":                 hint,
+            "retry_allowed":        retry_allowed,
+            "attempt_number":       attempt,
             "speech_stage_unlocked": speech_stage_unlocked,
         }
         if learner:
             response["mastery_update"] = dict(learner.mastery)
+            response["streak"]         = learner.streak
         emit('answer_result', response)
 def _gemini_recheck(data: dict) -> bool:
     try:
         prompt = f"""You are a Korean language grammar validator.
 Direct speech: {data.get('direct_speech', '')}
 Student's indirect speech: {data.get('answer', '')}
 Expected indirect speech: {data.get('answer_key', '')}
 def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
     if correct:
         messages = [
             "정확해요! Great job! 🎉",
         return random.choice(messages)
     else:
         rule_hints = {
+            "topic_marker":              "Remember: 은 for consonant endings, 는 for vowel endings",
+            "copula":                    "Remember: 이에요 for consonant endings, 예요 for vowel endings",
+            "negative_copula":           "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
+            "indirect_quote_dago":       "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
+            "indirect_quote_commands":   "Review: (으)라고 commands, 지 말라고 negatives",
+            "indirect_quote_questions":  "Review: V/Adj+냐고 (drop ㄹ from stem)",
+            "indirect_quote_suggestions":"Review: V+자고 for suggestions",
+            "regret_expression":         "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
         }
         base = "다시 해 보세요! Let's try again. "
         return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
 # ===========================================================================
+# 4. PRONUNCIATION ASSESSMENT
 # ===========================================================================
 @socketio.on('assess_pronunciation')
 def handle_pronunciation(data):
     from flask import request as req
+    sid     = req.sid
     learner = get_learner(sid)
+    ref_text     = data.get('text')
+    lang         = data.get('lang', 'ko-KR')
     grammar_rule = data.get('grammar_rule', '')
+    # ── STEP 1: Validate incoming data ──────────────────────────────────────
+    logger.info("=" * 60)
+    logger.info("🎤 [PRON] ── Pronunciation Assessment Start ──")
+    logger.info(f"🎤 [PRON] ref_text='{ref_text}' | lang='{lang}' | grammar_rule='{grammar_rule}'")
+    if not ref_text:
+        logger.error("❌ [PRON] STEP 1 FAILED: No reference text provided in payload")
+        emit('pronunciation_result', {
+            "success": False, "score": 0, "fluency": 0, "completeness": 0,
+            "recognized_text": "", "word_details": [],
+            "feedback": "No reference text provided.",
+        })
+        return
+    audio_b64 = data.get('audio', '')
+    if not audio_b64:
+        logger.error("❌ [PRON] STEP 1 FAILED: No audio data in payload")
+        emit('pronunciation_result', {
+            "success": False, "score": 0, "fluency": 0, "completeness": 0,
+            "recognized_text": "", "word_details": [],
+            "feedback": "No audio data received.",
+        })
+        return
+    logger.info(f"🎤 [PRON] STEP 1 OK: ref_text present, audio_b64 length={len(audio_b64)} chars")
+    # ── STEP 2: Validate Azure credentials ──────────────────────────────────
+    logger.info(f"🎤 [PRON] STEP 2: Checking Azure credentials...")
+    logger.info(f"🎤 [PRON] AZURE_SPEECH_KEY present: {bool(AZURE_SPEECH_KEY)} | length: {len(AZURE_SPEECH_KEY) if AZURE_SPEECH_KEY else 0}")
+    logger.info(f"🎤 [PRON] AZURE_SPEECH_REGION: '{AZURE_SPEECH_REGION}'")
+    if not AZURE_SPEECH_KEY:
+        logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_KEY env var is not set")
+        emit('pronunciation_result', {
+            "success": False, "score": 0, "fluency": 0, "completeness": 0,
+            "recognized_text": "", "word_details": [],
+            "feedback": "Azure Speech key not configured on server.",
+        })
+        return
+    if not AZURE_SPEECH_REGION:
+        logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_REGION env var is not set")
+        emit('pronunciation_result', {
+            "success": False, "score": 0, "fluency": 0, "completeness": 0,
+            "recognized_text": "", "word_details": [],
+            "feedback": "Azure Speech region not configured on server.",
+        })
+        return
+    logger.info(f"🎤 [PRON] STEP 2 OK: Azure credentials present")
+    raw_path   = None
+    clean_path = None
+    try:
+        # ── STEP 3: Decode base64 audio ──────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 3: Decoding base64 audio...")
+        try:
+            if "," in audio_b64:
+                header, audio_b64 = audio_b64.split(",", 1)
+                logger.info(f"🎤 [PRON] Stripped data URI header: '{header[:50]}'")
+            audio_bytes = base64.b64decode(audio_b64)
+            logger.info(f"🎤 [PRON] STEP 3 OK: Decoded {len(audio_bytes)} raw bytes")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 3 FAILED: base64 decode error: {e}")
+            raise
+        if len(audio_bytes) < 100:
+            logger.error(f"❌ [PRON] STEP 3 FAILED: Decoded audio is suspiciously small ({len(audio_bytes)} bytes)")
+            raise Exception(f"Audio payload too small: {len(audio_bytes)} bytes")
+        # ── STEP 4: Write to temp file ────────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 4: Writing audio to temp file...")
+        try:
+            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
+                temp_raw.write(audio_bytes)
+                raw_path = temp_raw.name
+            logger.info(f"🎤 [PRON] STEP 4 OK: Wrote to {raw_path} ({os.path.getsize(raw_path)} bytes)")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 4 FAILED: Could not write temp file: {e}")
+            raise
+        # ── STEP 5: FFmpeg conversion ─────────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 5: Running FFmpeg conversion to 16kHz mono PCM WAV...")
+        clean_path = sanitize_audio(raw_path)
+        if not clean_path:
+            logger.error("❌ [PRON] STEP 5 FAILED: sanitize_audio() returned None — see FFmpeg logs above")
+            raise Exception("Audio conversion failed (FFmpeg error)")
+        logger.info(f"🎤 [PRON] STEP 5 OK: Clean WAV at {clean_path}")
+        # ── STEP 6: Analyze WAV integrity ─────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 6: Analyzing WAV file integrity and volume...")
+        audio_ok = analyze_audio_volume(clean_path)
+        if not audio_ok:
+            logger.warning("⚠️  [PRON] STEP 6 WARNING: Audio appears silent — Azure will likely return NoMatch")
+            # Don't raise — let Azure try anyway, it gives a cleaner error
+        else:
+            logger.info("🎤 [PRON] STEP 6 OK: WAV has audible signal")
+        # ── STEP 7: Build Azure SpeechConfig ─────────────────────────────────
+        logger.info("🎤 [PRON] STEP 7: Building Azure SpeechConfig...")
+        try:
+            speech_config = speechsdk.SpeechConfig(
+                subscription=AZURE_SPEECH_KEY,
+                region=AZURE_SPEECH_REGION
+            )
+            speech_config.speech_recognition_language = lang
+            logger.info(f"🎤 [PRON] STEP 7 OK: SpeechConfig built — region={AZURE_SPEECH_REGION} lang={lang}")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 7 FAILED: SpeechConfig construction error: {e}")
+            raise
+        # ── STEP 8: Build AudioConfig ─────────────────────────────────────────
+        logger.info(f"🎤 [PRON] STEP 8: Building AudioConfig from file: {clean_path}")
+        try:
+            audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
+            logger.info("🎤 [PRON] STEP 8 OK: AudioConfig built")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 8 FAILED: AudioConfig construction error: {e}")
+            raise
+        # ── STEP 9: Build PronunciationAssessmentConfig ───────────────────────
+        logger.info(f"🎤 [PRON] STEP 9: Building PronunciationAssessmentConfig for text: '{ref_text}'")
+        try:
+            pronunciation_config = speechsdk.PronunciationAssessmentConfig(
+                reference_text=ref_text,
+                grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
+                granularity=speechsdk.PronunciationAssessmentGranularity.Word,
+                enable_miscue=True
+            )
+            logger.info("🎤 [PRON] STEP 9 OK: PronunciationAssessmentConfig built")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 9 FAILED: PronunciationAssessmentConfig error: {e}")
+            raise
+        # ── STEP 10: Build SpeechRecognizer ──────────────────────────────────
+        logger.info("🎤 [PRON] STEP 10: Building SpeechRecognizer...")
+        try:
+            recognizer = speechsdk.SpeechRecognizer(
+                speech_config=speech_config,
+                audio_config=audio_config
+            )
+            pronunciation_config.apply_to(recognizer)
+            logger.info("🎤 [PRON] STEP 10 OK: SpeechRecognizer built, pronunciation config applied")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 10 FAILED: SpeechRecognizer construction error: {e}")
+            raise
+        # ── STEP 11: Call Azure (the actual network call) ─────────────────────
+        logger.info("🎤 [PRON] STEP 11: Calling Azure recognize_once_async()... (network call)")
+        try:
+            result = recognizer.recognize_once_async().get()
+            logger.info(f"🎤 [PRON] STEP 11 OK: Azure returned result")
+            logger.info(f"🎤 [PRON] result.reason = {result.reason}")
+            logger.info(f"🎤 [PRON] result.text   = '{result.text}'")
+        except Exception as e:
+            logger.error(f"❌ [PRON] STEP 11 FAILED: recognize_once_async() threw: {e}")
+            raise
+        # ── STEP 12: Parse result ─────────────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 12: Parsing Azure result...")
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
+            logger.info("🎤 [PRON] STEP 12: Result reason = RecognizedSpeech ✅")
+            try:
+                pron_result = speechsdk.PronunciationAssessmentResult(result)
+                accuracy     = pron_result.accuracy_score
+                fluency      = pron_result.fluency_score
+                completeness = pron_result.completeness_score
+                logger.info(f"🎤 [PRON] Scores → accuracy={accuracy:.1f} fluency={fluency:.1f} completeness={completeness:.1f}")
+            except Exception as e:
+                logger.error(f"❌ [PRON] STEP 12 FAILED: PronunciationAssessmentResult parsing error: {e}")
+                raise
             detailed_words = []
             for word in pron_result.words:
+                w = {"word": word.word, "score": word.accuracy_score, "error": word.error_type}
+                detailed_words.append(w)
+                logger.info(f"🎤 [PRON] Word: '{word.word}' score={word.accuracy_score:.1f} error='{word.error_type}'")
+            feedback = _build_pronunciation_feedback(accuracy, fluency, completeness, detailed_words, ref_text)
             response = {
+                "success":         True,
+                "score":           accuracy,
+                "fluency":         fluency,
+                "completeness":    completeness,
                 "recognized_text": result.text,
+                "word_details":    detailed_words,
+                "feedback":        feedback,
+                "question_id":     data.get("question_id"),
             }
             if learner and grammar_rule and accuracy >= 70:
                 learner.record_outcome(grammar_rule, True, "speak")
                 response["mastery_update"] = dict(learner.mastery)
+            logger.info(f"✅ [PRON] STEP 12 OK: Assessment complete — acc={accuracy:.1f}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
+            no_match_detail = result.no_match_details if hasattr(result, 'no_match_details') else 'N/A'
+            logger.warning(f"⚠️  [PRON] STEP 12: Result reason = NoMatch — Azure heard nothing useful")
+            logger.warning(f"⚠️  [PRON] NoMatch details: {no_match_detail}")
             response = {
+                "success": False, "score": 0, "fluency": 0, "completeness": 0,
                 "recognized_text": "",
                 "word_details": [],
+                "feedback": "I couldn't hear you clearly. Please check your microphone and try speaking again.",
             }
+        elif result.reason == speechsdk.ResultReason.Canceled:
+            try:
+                cancellation = speechsdk.CancellationDetails(result)
+                logger.error(f"❌ [PRON] STEP 12: Result reason = Canceled")
+                logger.error(f"❌ [PRON] Cancellation reason: {cancellation.reason}")
+                logger.error(f"❌ [PRON] Cancellation error code: {cancellation.error_code}")
+                logger.error(f"❌ [PRON] Cancellation error details: {cancellation.error_details}")
+                # Common cancellation codes
+                if cancellation.reason == speechsdk.CancellationReason.Error:
+                    if "401" in str(cancellation.error_details):
+                        logger.error("❌ [PRON] HTTP 401 — Azure key is invalid or expired")
+                    elif "403" in str(cancellation.error_details):
+                        logger.error("❌ [PRON] HTTP 403 — Azure key doesn't have access to this region or feature")
+                    elif "connection" in str(cancellation.error_details).lower():
+                        logger.error("❌ [PRON] Connection error — Hugging Face Space may not have network access to Azure endpoint")
+                response = {
+                    "success": False, "score": 0, "fluency": 0, "completeness": 0,
+                    "recognized_text": "",
+                    "word_details": [],
+                    "feedback": f"Recognition canceled: {cancellation.error_details}",
+                }
+            except Exception as parse_e:
+                logger.error(f"❌ [PRON] Could not parse CancellationDetails: {parse_e}")
+                response = {
+                    "success": False, "score": 0, "fluency": 0, "completeness": 0,
+                    "recognized_text": "", "word_details": [],
+                    "feedback": "Recognition was canceled by Azure.",
+                }
         else:
+            logger.error(f"❌ [PRON] STEP 12: Unexpected result reason: {result.reason}")
             response = {
+                "success": False, "score": 0, "fluency": 0, "completeness": 0,
+                "recognized_text": "", "word_details": [],
+                "feedback": f"Unexpected recognition result: {result.reason}",
             }
+        logger.info("🎤 [PRON] ── Assessment End ──")
+        logger.info("=" * 60)
         emit('pronunciation_result', response)
     except Exception as e:
+        logger.error(f"❌ [PRON] UNHANDLED EXCEPTION in handle_pronunciation: {type(e).__name__}: {e}")
+        import traceback
+        logger.error(f"❌ [PRON] Traceback:\n{traceback.format_exc()}")
         emit('pronunciation_result', {
+            "success": False, "score": 0, "fluency": 0, "completeness": 0,
+            "recognized_text": "", "word_details": [],
             "feedback": "Server error during assessment.",
         })
     finally:
         if raw_path and os.path.exists(raw_path):
             os.remove(raw_path)
+            logger.info(f"🧹 [PRON] Cleaned up raw file: {raw_path}")
         if clean_path and os.path.exists(clean_path):
             os.remove(clean_path)
+            logger.info(f"🧹 [PRON] Cleaned up clean WAV: {clean_path}")
 def _build_pronunciation_feedback(accuracy: float, fluency: float,
                                    completeness: float, words: list,
                                    ref_text: str) -> str:
     issues = [w for w in words if w.get("error") not in (None, "None", "") or w.get("score", 100) < 60]
     if accuracy >= 85:
 @socketio.on('get_mastery')
 def handle_get_mastery(data):
     from flask import request as req
     learner = get_learner(req.sid)
 @socketio.on('restore_session')
 def handle_restore_session(data):
     from flask import request as req
     sid = req.sid
         logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
         emit('session_restored', {
+            "success":       True,
+            "session_id":    learner_id,
+            "mastery":       learner.mastery,
+            "difficulty":    learner.difficulty,
             "question_count": learner.question_count,
         })
 @socketio.on('reset_session')
 def handle_reset_session(data):
     from flask import request as req
+    sid     = req.sid
     learner = get_learner(sid)
     if learner:
         learner.reset()
         logger.info(f"🔄 Session reset: {learner.session_id}")
         emit('session_reset', {
+            "success":    True,
+            "mastery":    learner.mastery,
             "difficulty": learner.difficulty,
         })
     else:
 @socketio.on('update_mastery')
 def handle_update_mastery(data):
     from flask import request as req
     learner = get_learner(req.sid)
         return
     grammar_rule = data.get("grammar_rule", "")
+    correct      = data.get("correct", False)
+    mode         = data.get("interaction_mode", "")
     if grammar_rule:
         learner.record_outcome(grammar_rule, correct, mode)
     emit('mastery_updated', {
+        "mastery":    learner.mastery,
         "difficulty": learner.difficulty,
+        "streak":     learner.streak,
     })
 # ===========================================================================
+# 6. VISUAL RECOGNITION
 # ===========================================================================
 @socketio.on('verify_object')
         schema = {
             "type": "OBJECT",
             "properties": {
+                "verified":   {"type": "BOOLEAN"},
                 "confidence": {"type": "NUMBER"},
+                "feedback":   {"type": "STRING"}
             },
             "required": ["verified", "feedback"]
         }
 # ===========================================================================
+# 7. HANDWRITING / OCR
 # ===========================================================================
 @socketio.on('verify_writing')
         schema = {
             "type": "OBJECT",
             "properties": {
+                "correct":       {"type": "BOOLEAN"},
                 "detected_text": {"type": "STRING"},
+                "feedback":      {"type": "STRING"}
             },
             "required": ["correct", "detected_text"]
         }
 # ===========================================================================
+# 8. GRAMMAR RULE INFO
 # ===========================================================================
 @socketio.on('get_grammar_rules')
 def handle_get_grammar_rules(data):
     pack = get_active_pack()
     emit('grammar_rules', {
+        "rules":  pack.get("grammar_rules", {}),
         "lesson": pack.get("lesson"),
     })
 @socketio.on('get_content_pack_info')
 def handle_get_content_pack_info(data):
     pack = get_active_pack()
     emit('content_pack_info', {
+        "lesson":        pack.get("lesson"),
+        "version":       pack.get("version"),
+        "vocab_count":   len(pack.get("vocab", [])),
         "grammar_rules": list(pack.get("grammar_rules", {}).keys()),
+        "metadata":      pack.get("metadata", {}),
     })
 # ===========================================================================
 if __name__ == '__main__':
     purge_stale_sessions()
     logger.info("🚀 KLP AI Service starting on port 7860")
     socketio.run(app, host='0.0.0.0', port=7860)