KoreAI-API

Sleeping

App Files Files Community

rairo commited on Mar 6

Commit

8cfbcbe

verified ·

1 Parent(s): c138057

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -129

app.py CHANGED Viewed

@@ -8,6 +8,23 @@ import wave
 import struct
 import logging
 import uuid
 import cv2
 import numpy as np
 from flask import Flask
@@ -26,10 +43,9 @@ from content_pack import get_active_pack, replace_active_pack
 from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
 from question_generator import QuestionGenerator, QTYPE_TO_RULE
-import sys
 sys.path.append(os.path.dirname(__file__))
 # --- LOGGING SETUP ---
 logging.basicConfig(
     level=logging.INFO,
@@ -81,7 +97,7 @@ def sanitize_audio(input_path):
     """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
     output_path = input_path + "_clean.wav"
-    # --- STEP: Log input file info before conversion ---
     try:
         input_size = os.path.getsize(input_path)
         logger.info(f"🔧 [FFmpeg] Input file: {input_path} | Size: {input_size} bytes")
@@ -100,22 +116,16 @@ def sanitize_audio(input_path):
         output_path
     ]
-    logger.info(f"🔧 [FFmpeg] Running command: {' '.join(command)}")
     try:
-        result = subprocess.run(
-            command,
-            check=True,
-            capture_output=True,
-            text=True
-        )
         logger.info(f"✅ [FFmpeg] Conversion successful → {output_path}")
         if result.stderr:
-            # ffmpeg writes progress/info to stderr even on success
-            logger.info(f"🔧 [FFmpeg] stderr output:\n{result.stderr[:2000]}")
         output_size = os.path.getsize(output_path)
-        logger.info(f"🔧 [FFmpeg] Output file size: {output_size} bytes")
         if output_size == 0:
             logger.error("❌ [FFmpeg] Output WAV is EMPTY — conversion produced no data")
             return None
@@ -123,7 +133,7 @@ def sanitize_audio(input_path):
         return output_path
     except subprocess.CalledProcessError as e:
-        logger.error(f"❌ [FFmpeg] Process failed with return code {e.returncode}")
         logger.error(f"❌ [FFmpeg] stdout: {e.stdout}")
         logger.error(f"❌ [FFmpeg] stderr: {e.stderr}")
         return None
@@ -136,14 +146,14 @@ def sanitize_audio(input_path):
 def analyze_audio_volume(file_path):
-    """Inspect a WAV file: log frame rate, channels, duration, and peak amplitude."""
     try:
         with wave.open(file_path, 'rb') as wf:
-            framerate   = wf.getframerate()
-            nframes     = wf.getnframes()
-            channels    = wf.getnchannels()
-            sampwidth   = wf.getsampwidth()
-            duration_s  = nframes / framerate if framerate else 0
             logger.info(
                 f"🔊 [WAV] framerate={framerate}Hz | channels={channels} | "
@@ -151,7 +161,7 @@ def analyze_audio_volume(file_path):
             )
             if duration_s < 0.2:
-                logger.warning(f"⚠️  [WAV] Audio is very short ({duration_s:.2f}s) — may not be enough for recognition")
             raw_data = wf.readframes(nframes)
             if len(raw_data) == 0:
@@ -167,7 +177,7 @@ def analyze_audio_volume(file_path):
             max_val = max(abs(x) for x in pcm_data)
             avg_val = sum(abs(x) for x in pcm_data) / len(pcm_data)
-            logger.info(f"🔊 [WAV] Peak amplitude: {max_val}/32767 | Avg amplitude: {avg_val:.1f}")
             if max_val < 100:
                 logger.warning("⚠️  [WAV] Audio appears SILENT (peak < 100) — microphone may not be working")
@@ -178,10 +188,10 @@ def analyze_audio_volume(file_path):
             return True
     except wave.Error as e:
-        logger.error(f"❌ [WAV] wave.Error reading file: {e} — file may not be a valid WAV")
         return False
     except Exception as e:
-        logger.warning(f"⚠️  [WAV] Could not analyze audio volume: {e}")
         return True  # Don't block on analysis failure
@@ -388,8 +398,8 @@ def handle_submit_answer(data):
             correct   = rule_engine.validate_token_order(submitted, expected)
         elif interaction_mode in ("choose_select", "fill_in"):
-            chosen     = str(data.get("answer", "")).strip()
-            answer_key = str(data.get("answer_key", "")).strip()
             word_tested   = data.get("word_tested")
             particle_type = data.get("particle_type")
@@ -415,17 +425,17 @@ def handle_submit_answer(data):
             else:
                 hint = data.get("hint_text", "Review the grammar rule and try again")
-        retry_allowed        = not correct and attempt < 3
         speech_stage_unlocked = correct
         response = {
-            "question_id":          data.get("question_id"),
-            "correct":              correct,
-            "score_delta":          10 if correct else 0,
-            "feedback":             _build_feedback(correct, q_type, grammar_rule),
-            "hint":                 hint,
-            "retry_allowed":        retry_allowed,
-            "attempt_number":       attempt,
             "speech_stage_unlocked": speech_stage_unlocked,
         }
@@ -481,14 +491,14 @@ def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
         return random.choice(messages)
     else:
         rule_hints = {
-            "topic_marker":              "Remember: 은 for consonant endings, 는 for vowel endings",
-            "copula":                    "Remember: 이에요 for consonant endings, 예요 for vowel endings",
-            "negative_copula":           "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
-            "indirect_quote_dago":       "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
-            "indirect_quote_commands":   "Review: (으)라고 commands, 지 말라고 negatives",
-            "indirect_quote_questions":  "Review: V/Adj+냐고 (drop ㄹ from stem)",
-            "indirect_quote_suggestions":"Review: V+자고 for suggestions",
-            "regret_expression":         "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
         }
         base = "다시 해 보세요! Let's try again. "
         return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
@@ -508,13 +518,13 @@ def handle_pronunciation(data):
     lang         = data.get('lang', 'ko-KR')
     grammar_rule = data.get('grammar_rule', '')
-    # ── STEP 1: Validate incoming data ──────────────────────────────────────
     logger.info("=" * 60)
     logger.info("🎤 [PRON] ── Pronunciation Assessment Start ──")
     logger.info(f"🎤 [PRON] ref_text='{ref_text}' | lang='{lang}' | grammar_rule='{grammar_rule}'")
     if not ref_text:
-        logger.error("❌ [PRON] STEP 1 FAILED: No reference text provided in payload")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
             "recognized_text": "", "word_details": [],
@@ -532,15 +542,15 @@ def handle_pronunciation(data):
         })
         return
-    logger.info(f"🎤 [PRON] STEP 1 OK: ref_text present, audio_b64 length={len(audio_b64)} chars")
     # ── STEP 2: Validate Azure credentials ──────────────────────────────────
-    logger.info(f"🎤 [PRON] STEP 2: Checking Azure credentials...")
-    logger.info(f"🎤 [PRON] AZURE_SPEECH_KEY present: {bool(AZURE_SPEECH_KEY)} | length: {len(AZURE_SPEECH_KEY) if AZURE_SPEECH_KEY else 0}")
-    logger.info(f"🎤 [PRON] AZURE_SPEECH_REGION: '{AZURE_SPEECH_REGION}'")
     if not AZURE_SPEECH_KEY:
-        logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_KEY env var is not set")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
             "recognized_text": "", "word_details": [],
@@ -549,7 +559,7 @@ def handle_pronunciation(data):
         return
     if not AZURE_SPEECH_REGION:
-        logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_REGION env var is not set")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
             "recognized_text": "", "word_details": [],
@@ -557,80 +567,75 @@ def handle_pronunciation(data):
         })
         return
-    logger.info(f"🎤 [PRON] STEP 2 OK: Azure credentials present")
     raw_path   = None
     clean_path = None
     try:
-        # ── STEP 3: Decode base64 audio ──────────────────────────────────────
         logger.info("🎤 [PRON] STEP 3: Decoding base64 audio...")
         try:
             if "," in audio_b64:
                 header, audio_b64 = audio_b64.split(",", 1)
-                logger.info(f"🎤 [PRON] Stripped data URI header: '{header[:50]}'")
             audio_bytes = base64.b64decode(audio_b64)
-            logger.info(f"🎤 [PRON] STEP 3 OK: Decoded {len(audio_bytes)} raw bytes")
         except Exception as e:
             logger.error(f"❌ [PRON] STEP 3 FAILED: base64 decode error: {e}")
             raise
         if len(audio_bytes) < 100:
-            logger.error(f"❌ [PRON] STEP 3 FAILED: Decoded audio is suspiciously small ({len(audio_bytes)} bytes)")
             raise Exception(f"Audio payload too small: {len(audio_bytes)} bytes")
-        # ── STEP 4: Write to temp file ────────────────────────────────────────
-        logger.info("🎤 [PRON] STEP 4: Writing audio to temp file...")
-        try:
-            with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
-                temp_raw.write(audio_bytes)
-                raw_path = temp_raw.name
-            logger.info(f"🎤 [PRON] STEP 4 OK: Wrote to {raw_path} ({os.path.getsize(raw_path)} bytes)")
-        except Exception as e:
-            logger.error(f"❌ [PRON] STEP 4 FAILED: Could not write temp file: {e}")
-            raise
-        # ── STEP 5: FFmpeg conversion ─────────────────────────────────────────
-        logger.info("🎤 [PRON] STEP 5: Running FFmpeg conversion to 16kHz mono PCM WAV...")
         clean_path = sanitize_audio(raw_path)
         if not clean_path:
-            logger.error("❌ [PRON] STEP 5 FAILED: sanitize_audio() returned None — see FFmpeg logs above")
-            raise Exception("Audio conversion failed (FFmpeg error)")
-        logger.info(f"🎤 [PRON] STEP 5 OK: Clean WAV at {clean_path}")
-        # ── STEP 6: Analyze WAV integrity ─────────────────────────────────────
-        logger.info("🎤 [PRON] STEP 6: Analyzing WAV file integrity and volume...")
         audio_ok = analyze_audio_volume(clean_path)
         if not audio_ok:
-            logger.warning("⚠️  [PRON] STEP 6 WARNING: Audio appears silent — Azure will likely return NoMatch")
-            # Don't raise — let Azure try anyway, it gives a cleaner error
         else:
-            logger.info("🎤 [PRON] STEP 6 OK: WAV has audible signal")
-        # ── STEP 7: Build Azure SpeechConfig ─────────────────────────────────
-        logger.info("🎤 [PRON] STEP 7: Building Azure SpeechConfig...")
         try:
             speech_config = speechsdk.SpeechConfig(
                 subscription=AZURE_SPEECH_KEY,
                 region=AZURE_SPEECH_REGION
             )
             speech_config.speech_recognition_language = lang
-            logger.info(f"🎤 [PRON] STEP 7 OK: SpeechConfig built — region={AZURE_SPEECH_REGION} lang={lang}")
         except Exception as e:
-            logger.error(f"❌ [PRON] STEP 7 FAILED: SpeechConfig construction error: {e}")
             raise
-        # ── STEP 8: Build AudioConfig ─────────────────────────────────────────
-        logger.info(f"🎤 [PRON] STEP 8: Building AudioConfig from file: {clean_path}")
         try:
             audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
             logger.info("🎤 [PRON] STEP 8 OK: AudioConfig built")
         except Exception as e:
-            logger.error(f"❌ [PRON] STEP 8 FAILED: AudioConfig construction error: {e}")
             raise
-        # ── STEP 9: Build PronunciationAssessmentConfig ───────────────────────
-        logger.info(f"🎤 [PRON] STEP 9: Building PronunciationAssessmentConfig for text: '{ref_text}'")
         try:
             pronunciation_config = speechsdk.PronunciationAssessmentConfig(
                 reference_text=ref_text,
@@ -643,7 +648,7 @@ def handle_pronunciation(data):
             logger.error(f"❌ [PRON] STEP 9 FAILED: PronunciationAssessmentConfig error: {e}")
             raise
-        # ── STEP 10: Build SpeechRecognizer ──────────────────────────────────
         logger.info("🎤 [PRON] STEP 10: Building SpeechRecognizer...")
         try:
             recognizer = speechsdk.SpeechRecognizer(
@@ -651,36 +656,47 @@ def handle_pronunciation(data):
                 audio_config=audio_config
             )
             pronunciation_config.apply_to(recognizer)
-            logger.info("🎤 [PRON] STEP 10 OK: SpeechRecognizer built, pronunciation config applied")
         except Exception as e:
-            logger.error(f"❌ [PRON] STEP 10 FAILED: SpeechRecognizer construction error: {e}")
             raise
-        # ── STEP 11: Call Azure (the actual network call) ─────────────────────
-        logger.info("🎤 [PRON] STEP 11: Calling Azure recognize_once_async()... (network call)")
         try:
-            result = recognizer.recognize_once_async().get()
-            logger.info(f"🎤 [PRON] STEP 11 OK: Azure returned result")
             logger.info(f"🎤 [PRON] result.reason = {result.reason}")
             logger.info(f"🎤 [PRON] result.text   = '{result.text}'")
         except Exception as e:
-            logger.error(f"❌ [PRON] STEP 11 FAILED: recognize_once_async() threw: {e}")
             raise
-        # ── STEP 12: Parse result ─────────────────────────────────────────────
-        logger.info("🎤 [PRON] STEP 12: Parsing Azure result...")
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
-            logger.info("🎤 [PRON] STEP 12: Result reason = RecognizedSpeech ✅")
             try:
-                pron_result = speechsdk.PronunciationAssessmentResult(result)
                 accuracy     = pron_result.accuracy_score
                 fluency      = pron_result.fluency_score
                 completeness = pron_result.completeness_score
                 logger.info(f"🎤 [PRON] Scores → accuracy={accuracy:.1f} fluency={fluency:.1f} completeness={completeness:.1f}")
             except Exception as e:
-                logger.error(f"❌ [PRON] STEP 12 FAILED: PronunciationAssessmentResult parsing error: {e}")
                 raise
             detailed_words = []
@@ -706,52 +722,71 @@ def handle_pronunciation(data):
                 learner.record_outcome(grammar_rule, True, "speak")
                 response["mastery_update"] = dict(learner.mastery)
-            logger.info(f"✅ [PRON] STEP 12 OK: Assessment complete — acc={accuracy:.1f}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
-            no_match_detail = result.no_match_details if hasattr(result, 'no_match_details') else 'N/A'
-            logger.warning(f"⚠️  [PRON] STEP 12: Result reason = NoMatch — Azure heard nothing useful")
             logger.warning(f"⚠️  [PRON] NoMatch details: {no_match_detail}")
             response = {
                 "success": False, "score": 0, "fluency": 0, "completeness": 0,
-                "recognized_text": "",
-                "word_details": [],
-                "feedback": "I couldn't hear you clearly. Please check your microphone and try speaking again.",
             }
         elif result.reason == speechsdk.ResultReason.Canceled:
             try:
                 cancellation = speechsdk.CancellationDetails(result)
-                logger.error(f"❌ [PRON] STEP 12: Result reason = Canceled")
-                logger.error(f"❌ [PRON] Cancellation reason: {cancellation.reason}")
-                logger.error(f"❌ [PRON] Cancellation error code: {cancellation.error_code}")
-                logger.error(f"❌ [PRON] Cancellation error details: {cancellation.error_details}")
-                # Common cancellation codes
-                if cancellation.reason == speechsdk.CancellationReason.Error:
-                    if "401" in str(cancellation.error_details):
-                        logger.error("❌ [PRON] HTTP 401 — Azure key is invalid or expired")
-                    elif "403" in str(cancellation.error_details):
-                        logger.error("❌ [PRON] HTTP 403 — Azure key doesn't have access to this region or feature")
-                    elif "connection" in str(cancellation.error_details).lower():
-                        logger.error("❌ [PRON] Connection error — Hugging Face Space may not have network access to Azure endpoint")
                 response = {
                     "success": False, "score": 0, "fluency": 0, "completeness": 0,
-                    "recognized_text": "",
-                    "word_details": [],
-                    "feedback": f"Recognition canceled: {cancellation.error_details}",
                 }
             except Exception as parse_e:
-                logger.error(f"❌ [PRON] Could not parse CancellationDetails: {parse_e}")
                 response = {
                     "success": False, "score": 0, "fluency": 0, "completeness": 0,
                     "recognized_text": "", "word_details": [],
-                    "feedback": "Recognition was canceled by Azure.",
                 }
         else:
-            logger.error(f"❌ [PRON] STEP 12: Unexpected result reason: {result.reason}")
             response = {
                 "success": False, "score": 0, "fluency": 0, "completeness": 0,
                 "recognized_text": "", "word_details": [],
@@ -763,8 +798,8 @@ def handle_pronunciation(data):
         emit('pronunciation_result', response)
     except Exception as e:
-        logger.error(f"❌ [PRON] UNHANDLED EXCEPTION in handle_pronunciation: {type(e).__name__}: {e}")
         import traceback
         logger.error(f"❌ [PRON] Traceback:\n{traceback.format_exc()}")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
@@ -774,10 +809,10 @@ def handle_pronunciation(data):
     finally:
         if raw_path and os.path.exists(raw_path):
             os.remove(raw_path)
-            logger.info(f"🧹 [PRON] Cleaned up raw file: {raw_path}")
         if clean_path and os.path.exists(clean_path):
             os.remove(clean_path)
-            logger.info(f"🧹 [PRON] Cleaned up clean WAV: {clean_path}")
 def _build_pronunciation_feedback(accuracy: float, fluency: float,
@@ -836,10 +871,10 @@ def handle_restore_session(data):
         logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
         emit('session_restored', {
-            "success":       True,
-            "session_id":    learner_id,
-            "mastery":       learner.mastery,
-            "difficulty":    learner.difficulty,
             "question_count": learner.question_count,
         })

 import struct
 import logging
 import uuid
+import sys
+# ── CRITICAL: eventlet monkey_patch BEFORE all other imports ──────────────────
+# Azure Speech SDK uses native C++ threads internally. If eventlet patches
+# Python threading AFTER the SDK is already loaded, the SDK's recognize_once_async()
+# silently returns ResultReason.Canceled + CancellationReason.Error with no
+# useful error_details — even with valid credentials and good audio.
+#
+# Fix: call monkey_patch() here at the very top, then run all Azure SDK calls
+# inside a real OS thread via concurrent.futures.ThreadPoolExecutor, which
+# is not subject to eventlet's cooperative scheduling.
+import eventlet
+eventlet.monkey_patch()
+import concurrent.futures
+_azure_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
 import cv2
 import numpy as np
 from flask import Flask
 from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
 from question_generator import QuestionGenerator, QTYPE_TO_RULE
 sys.path.append(os.path.dirname(__file__))
 # --- LOGGING SETUP ---
 logging.basicConfig(
     level=logging.INFO,
     """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
     output_path = input_path + "_clean.wav"
+    # --- Log input file info before conversion ---
     try:
         input_size = os.path.getsize(input_path)
         logger.info(f"🔧 [FFmpeg] Input file: {input_path} | Size: {input_size} bytes")
         output_path
     ]
+    logger.info(f"🔧 [FFmpeg] Running: {' '.join(command)}")
     try:
+        result = subprocess.run(command, check=True, capture_output=True, text=True)
         logger.info(f"✅ [FFmpeg] Conversion successful → {output_path}")
         if result.stderr:
+            logger.info(f"🔧 [FFmpeg] stderr:\n{result.stderr[:2000]}")
         output_size = os.path.getsize(output_path)
+        logger.info(f"🔧 [FFmpeg] Output WAV size: {output_size} bytes")
         if output_size == 0:
             logger.error("❌ [FFmpeg] Output WAV is EMPTY — conversion produced no data")
             return None
         return output_path
     except subprocess.CalledProcessError as e:
+        logger.error(f"❌ [FFmpeg] Process failed (returncode={e.returncode})")
         logger.error(f"❌ [FFmpeg] stdout: {e.stdout}")
         logger.error(f"❌ [FFmpeg] stderr: {e.stderr}")
         return None
 def analyze_audio_volume(file_path):
+    """Inspect WAV: log framerate, channels, duration, peak amplitude."""
     try:
         with wave.open(file_path, 'rb') as wf:
+            framerate  = wf.getframerate()
+            nframes    = wf.getnframes()
+            channels   = wf.getnchannels()
+            sampwidth  = wf.getsampwidth()
+            duration_s = nframes / framerate if framerate else 0
             logger.info(
                 f"🔊 [WAV] framerate={framerate}Hz | channels={channels} | "
             )
             if duration_s < 0.2:
+                logger.warning(f"⚠️  [WAV] Very short audio ({duration_s:.2f}s) — may not be enough for recognition")
             raw_data = wf.readframes(nframes)
             if len(raw_data) == 0:
             max_val = max(abs(x) for x in pcm_data)
             avg_val = sum(abs(x) for x in pcm_data) / len(pcm_data)
+            logger.info(f"🔊 [WAV] Peak amplitude: {max_val}/32767 | Avg: {avg_val:.1f}")
             if max_val < 100:
                 logger.warning("⚠️  [WAV] Audio appears SILENT (peak < 100) — microphone may not be working")
             return True
     except wave.Error as e:
+        logger.error(f"❌ [WAV] wave.Error: {e} — file may not be a valid WAV")
         return False
     except Exception as e:
+        logger.warning(f"⚠️  [WAV] Could not analyze audio: {e}")
         return True  # Don't block on analysis failure
             correct   = rule_engine.validate_token_order(submitted, expected)
         elif interaction_mode in ("choose_select", "fill_in"):
+            chosen        = str(data.get("answer", "")).strip()
+            answer_key    = str(data.get("answer_key", "")).strip()
             word_tested   = data.get("word_tested")
             particle_type = data.get("particle_type")
             else:
                 hint = data.get("hint_text", "Review the grammar rule and try again")
+        retry_allowed         = not correct and attempt < 3
         speech_stage_unlocked = correct
         response = {
+            "question_id":           data.get("question_id"),
+            "correct":               correct,
+            "score_delta":           10 if correct else 0,
+            "feedback":              _build_feedback(correct, q_type, grammar_rule),
+            "hint":                  hint,
+            "retry_allowed":         retry_allowed,
+            "attempt_number":        attempt,
             "speech_stage_unlocked": speech_stage_unlocked,
         }
         return random.choice(messages)
     else:
         rule_hints = {
+            "topic_marker":               "Remember: 은 for consonant endings, 는 for vowel endings",
+            "copula":                     "Remember: 이에요 for consonant endings, 예요 for vowel endings",
+            "negative_copula":            "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
+            "indirect_quote_dago":        "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
+            "indirect_quote_commands":    "Review: (으)라고 commands, 지 말라고 negatives",
+            "indirect_quote_questions":   "Review: V/Adj+냐고 (drop ㄹ from stem)",
+            "indirect_quote_suggestions": "Review: V+자고 for suggestions",
+            "regret_expression":          "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
         }
         base = "다시 해 보세요! Let's try again. "
         return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
     lang         = data.get('lang', 'ko-KR')
     grammar_rule = data.get('grammar_rule', '')
+    # ── STEP 1: Validate incoming payload ───────────────────────────────────
     logger.info("=" * 60)
     logger.info("🎤 [PRON] ── Pronunciation Assessment Start ──")
     logger.info(f"🎤 [PRON] ref_text='{ref_text}' | lang='{lang}' | grammar_rule='{grammar_rule}'")
     if not ref_text:
+        logger.error("❌ [PRON] STEP 1 FAILED: No reference text in payload")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
             "recognized_text": "", "word_details": [],
         })
         return
+    logger.info(f"🎤 [PRON] STEP 1 OK: audio_b64 length={len(audio_b64)} chars")
     # ── STEP 2: Validate Azure credentials ──────────────────────────────────
+    logger.info("🎤 [PRON] STEP 2: Checking Azure credentials...")
+    logger.info(f"🎤 [PRON] AZURE_SPEECH_KEY present={bool(AZURE_SPEECH_KEY)} | length={len(AZURE_SPEECH_KEY) if AZURE_SPEECH_KEY else 0}")
+    logger.info(f"🎤 [PRON] AZURE_SPEECH_REGION='{AZURE_SPEECH_REGION}'")
     if not AZURE_SPEECH_KEY:
+        logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_KEY env var not set")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
             "recognized_text": "", "word_details": [],
         return
     if not AZURE_SPEECH_REGION:
+        logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_REGION env var not set")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
             "recognized_text": "", "word_details": [],
         })
         return
+    logger.info("🎤 [PRON] STEP 2 OK: Azure credentials present")
     raw_path   = None
     clean_path = None
     try:
+        # ── STEP 3: Decode base64 ────────────────────────────────────────────
         logger.info("🎤 [PRON] STEP 3: Decoding base64 audio...")
         try:
             if "," in audio_b64:
                 header, audio_b64 = audio_b64.split(",", 1)
+                logger.info(f"🎤 [PRON] Stripped data URI header: '{header[:60]}'")
             audio_bytes = base64.b64decode(audio_b64)
+            logger.info(f"🎤 [PRON] STEP 3 OK: Decoded {len(audio_bytes)} bytes")
         except Exception as e:
             logger.error(f"❌ [PRON] STEP 3 FAILED: base64 decode error: {e}")
             raise
         if len(audio_bytes) < 100:
+            logger.error(f"❌ [PRON] STEP 3: Audio too small ({len(audio_bytes)} bytes) — likely not real audio")
             raise Exception(f"Audio payload too small: {len(audio_bytes)} bytes")
+        # ── STEP 4: Write temp file ──────────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 4: Writing to temp file...")
+        with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
+            temp_raw.write(audio_bytes)
+            raw_path = temp_raw.name
+        logger.info(f"🎤 [PRON] STEP 4 OK: Wrote {os.path.getsize(raw_path)} bytes → {raw_path}")
+        # ── STEP 5: FFmpeg conversion ────────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 5: FFmpeg → 16kHz mono PCM WAV...")
         clean_path = sanitize_audio(raw_path)
         if not clean_path:
+            logger.error("❌ [PRON] STEP 5 FAILED: sanitize_audio() returned None")
+            raise Exception("Audio conversion failed (FFmpeg error — see logs above)")
+        logger.info(f"🎤 [PRON] STEP 5 OK: clean WAV at {clean_path}")
+        # ── STEP 6: WAV integrity check ──────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 6: WAV integrity + volume check...")
         audio_ok = analyze_audio_volume(clean_path)
         if not audio_ok:
+            logger.warning("⚠️  [PRON] STEP 6: Audio silent — Azure will likely return NoMatch")
         else:
+            logger.info("🎤 [PRON] STEP 6 OK: Audible signal confirmed")
+        # ── STEP 7: Build Azure SpeechConfig ────────────────────────────────
+        logger.info(f"🎤 [PRON] STEP 7: Building SpeechConfig (region={AZURE_SPEECH_REGION}, lang={lang})...")
         try:
             speech_config = speechsdk.SpeechConfig(
                 subscription=AZURE_SPEECH_KEY,
                 region=AZURE_SPEECH_REGION
             )
             speech_config.speech_recognition_language = lang
+            logger.info("🎤 [PRON] STEP 7 OK: SpeechConfig built")
         except Exception as e:
+            logger.error(f"❌ [PRON] STEP 7 FAILED: SpeechConfig error: {e}")
             raise
+        # ── STEP 8: Build AudioConfig ────────────────────────────────────────
+        logger.info(f"🎤 [PRON] STEP 8: Building AudioConfig from {clean_path}...")
         try:
             audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
             logger.info("🎤 [PRON] STEP 8 OK: AudioConfig built")
         except Exception as e:
+            logger.error(f"❌ [PRON] STEP 8 FAILED: AudioConfig error: {e}")
             raise
+        # ── STEP 9: Build PronunciationAssessmentConfig ──────────────────────
+        logger.info(f"🎤 [PRON] STEP 9: PronunciationAssessmentConfig for '{ref_text}'...")
         try:
             pronunciation_config = speechsdk.PronunciationAssessmentConfig(
                 reference_text=ref_text,
             logger.error(f"❌ [PRON] STEP 9 FAILED: PronunciationAssessmentConfig error: {e}")
             raise
+        # ── STEP 10: Build SpeechRecognizer ────────────────────────��────────
         logger.info("🎤 [PRON] STEP 10: Building SpeechRecognizer...")
         try:
             recognizer = speechsdk.SpeechRecognizer(
                 audio_config=audio_config
             )
             pronunciation_config.apply_to(recognizer)
+            logger.info("🎤 [PRON] STEP 10 OK: SpeechRecognizer ready")
         except Exception as e:
+            logger.error(f"❌ [PRON] STEP 10 FAILED: SpeechRecognizer error: {e}")
             raise
+        # ── STEP 11: Call Azure in a REAL OS thread ──────────────────────────
+        # recognize_once_async() uses native C++ threads internally.
+        # Calling it directly in an eventlet greenthread causes silent Canceled
+        # errors because eventlet has patched socket/ssl/threading under the SDK.
+        # _azure_executor is a real ThreadPoolExecutor — unpatched OS threads.
+        logger.info("🎤 [PRON] STEP 11: Submitting to real OS thread (bypassing eventlet)...")
         try:
+            def _run_azure():
+                return recognizer.recognize_once_async().get()
+            future = _azure_executor.submit(_run_azure)
+            result = future.result(timeout=30)
+            logger.info("🎤 [PRON] STEP 11 OK: Azure responded from real OS thread")
             logger.info(f"🎤 [PRON] result.reason = {result.reason}")
             logger.info(f"🎤 [PRON] result.text   = '{result.text}'")
+        except concurrent.futures.TimeoutError:
+            logger.error("❌ [PRON] STEP 11 FAILED: Azure timed out after 30s")
+            raise Exception("Azure Speech timed out — check HF Space outbound network access")
         except Exception as e:
+            logger.error(f"❌ [PRON] STEP 11 FAILED: Azure OS thread raised: {e}")
             raise
+        # ── STEP 12: Parse Azure result ──────────────────────────────────────
+        logger.info("🎤 [PRON] STEP 12: Parsing result...")
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
+            logger.info("🎤 [PRON] STEP 12: RecognizedSpeech ✅")
             try:
+                pron_result  = speechsdk.PronunciationAssessmentResult(result)
                 accuracy     = pron_result.accuracy_score
                 fluency      = pron_result.fluency_score
                 completeness = pron_result.completeness_score
                 logger.info(f"🎤 [PRON] Scores → accuracy={accuracy:.1f} fluency={fluency:.1f} completeness={completeness:.1f}")
             except Exception as e:
+                logger.error(f"❌ [PRON] STEP 12 FAILED: PronunciationAssessmentResult parse error: {e}")
                 raise
             detailed_words = []
                 learner.record_outcome(grammar_rule, True, "speak")
                 response["mastery_update"] = dict(learner.mastery)
+            logger.info(f"✅ [PRON] Complete — accuracy={accuracy:.1f}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
+            no_match_detail = getattr(result, 'no_match_details', 'N/A')
+            logger.warning(f"⚠️  [PRON] STEP 12: NoMatch — Azure heard nothing useful")
             logger.warning(f"⚠️  [PRON] NoMatch details: {no_match_detail}")
             response = {
                 "success": False, "score": 0, "fluency": 0, "completeness": 0,
+                "recognized_text": "", "word_details": [],
+                "feedback": "I couldn't hear you clearly. Check your microphone and try again.",
             }
         elif result.reason == speechsdk.ResultReason.Canceled:
+            logger.error("❌ [PRON] STEP 12: Result reason = Canceled")
             try:
                 cancellation = speechsdk.CancellationDetails(result)
+                # Use getattr for all fields — older SDK versions may be missing some
+                c_reason  = getattr(cancellation, 'reason',        'UNKNOWN')
+                c_code    = getattr(cancellation, 'error_code',    'UNKNOWN')
+                c_details = getattr(cancellation, 'error_details', 'UNKNOWN')
+                logger.error(f"❌ [PRON] Cancellation reason:        {c_reason}")
+                logger.error(f"❌ [PRON] Cancellation error_code:    {c_code}")
+                logger.error(f"❌ [PRON] Cancellation error_details: {c_details}")
+                # Dump every attribute on the object so nothing is hidden
+                logger.error(f"❌ [PRON] CancellationDetails dir(): {[a for a in dir(cancellation) if not a.startswith('_')]}")
+                # Also dump the raw result properties dict if available
+                raw_props = getattr(result, 'properties', None)
+                if raw_props:
+                    logger.error(f"❌ [PRON] result.properties: {dict(raw_props)}")
+                details_str = str(c_details or '') + str(c_code or '')
+                if "401" in details_str:
+                    logger.error("❌ [PRON] → HTTP 401: Azure key is INVALID or EXPIRED — check HF Space secrets")
+                elif "403" in details_str:
+                    logger.error("❌ [PRON] → HTTP 403: Key does not have access — wrong region or feature not enabled")
+                elif "ServiceUnavailable" in details_str or "503" in details_str:
+                    logger.error("❌ [PRON] → 503: Azure Speech service is temporarily unavailable")
+                elif "connection" in details_str.lower() or "network" in details_str.lower():
+                    logger.error("❌ [PRON] → Network error: HF Space cannot reach Azure endpoint — check outbound access")
+                elif "AuthenticationFailure" in details_str:
+                    logger.error("❌ [PRON] → AuthenticationFailure: Key/region mismatch — your key was provisioned in a different region than AZURE_SPEECH_REGION")
+                elif details_str in ('', 'UNKNOWN'):
+                    logger.error("❌ [PRON] → Empty error details: likely a key/region mismatch. Verify AZURE_SPEECH_REGION matches the region shown in Azure portal for this key")
                 response = {
                     "success": False, "score": 0, "fluency": 0, "completeness": 0,
+                    "recognized_text": "", "word_details": [],
+                    "feedback": f"Azure canceled: reason={c_reason} code={c_code} details={c_details}",
                 }
             except Exception as parse_e:
+                logger.error(f"❌ [PRON] Could not parse CancellationDetails at all: {parse_e}")
+                # Last resort — try to get anything from the raw result
+                logger.error(f"❌ [PRON] Raw result dir(): {[a for a in dir(result) if not a.startswith('_')]}")
                 response = {
                     "success": False, "score": 0, "fluency": 0, "completeness": 0,
                     "recognized_text": "", "word_details": [],
+                    "feedback": "Recognition was canceled by Azure — check server logs for details.",
                 }
         else:
+            logger.error(f"❌ [PRON] STEP 12: Unexpected result.reason = {result.reason}")
             response = {
                 "success": False, "score": 0, "fluency": 0, "completeness": 0,
                 "recognized_text": "", "word_details": [],
         emit('pronunciation_result', response)
     except Exception as e:
         import traceback
+        logger.error(f"❌ [PRON] UNHANDLED EXCEPTION: {type(e).__name__}: {e}")
         logger.error(f"❌ [PRON] Traceback:\n{traceback.format_exc()}")
         emit('pronunciation_result', {
             "success": False, "score": 0, "fluency": 0, "completeness": 0,
     finally:
         if raw_path and os.path.exists(raw_path):
             os.remove(raw_path)
+            logger.info(f"🧹 [PRON] Cleaned up: {raw_path}")
         if clean_path and os.path.exists(clean_path):
             os.remove(clean_path)
+            logger.info(f"🧹 [PRON] Cleaned up: {clean_path}")
 def _build_pronunciation_feedback(accuracy: float, fluency: float,
         logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
         emit('session_restored', {
+            "success":        True,
+            "session_id":     learner_id,
+            "mastery":        learner.mastery,
+            "difficulty":     learner.difficulty,
             "question_count": learner.question_count,
         })