rairo commited on
Commit
8cfbcbe
·
verified ·
1 Parent(s): c138057

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -129
app.py CHANGED
@@ -8,6 +8,23 @@ import wave
8
  import struct
9
  import logging
10
  import uuid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import cv2
12
  import numpy as np
13
  from flask import Flask
@@ -26,10 +43,9 @@ from content_pack import get_active_pack, replace_active_pack
26
  from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
27
  from question_generator import QuestionGenerator, QTYPE_TO_RULE
28
 
29
- import sys
30
-
31
  sys.path.append(os.path.dirname(__file__))
32
 
 
33
  # --- LOGGING SETUP ---
34
  logging.basicConfig(
35
  level=logging.INFO,
@@ -81,7 +97,7 @@ def sanitize_audio(input_path):
81
  """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
82
  output_path = input_path + "_clean.wav"
83
 
84
- # --- STEP: Log input file info before conversion ---
85
  try:
86
  input_size = os.path.getsize(input_path)
87
  logger.info(f"🔧 [FFmpeg] Input file: {input_path} | Size: {input_size} bytes")
@@ -100,22 +116,16 @@ def sanitize_audio(input_path):
100
  output_path
101
  ]
102
 
103
- logger.info(f"🔧 [FFmpeg] Running command: {' '.join(command)}")
104
 
105
  try:
106
- result = subprocess.run(
107
- command,
108
- check=True,
109
- capture_output=True,
110
- text=True
111
- )
112
  logger.info(f"✅ [FFmpeg] Conversion successful → {output_path}")
113
  if result.stderr:
114
- # ffmpeg writes progress/info to stderr even on success
115
- logger.info(f"🔧 [FFmpeg] stderr output:\n{result.stderr[:2000]}")
116
 
117
  output_size = os.path.getsize(output_path)
118
- logger.info(f"🔧 [FFmpeg] Output file size: {output_size} bytes")
119
  if output_size == 0:
120
  logger.error("❌ [FFmpeg] Output WAV is EMPTY — conversion produced no data")
121
  return None
@@ -123,7 +133,7 @@ def sanitize_audio(input_path):
123
  return output_path
124
 
125
  except subprocess.CalledProcessError as e:
126
- logger.error(f"❌ [FFmpeg] Process failed with return code {e.returncode}")
127
  logger.error(f"❌ [FFmpeg] stdout: {e.stdout}")
128
  logger.error(f"❌ [FFmpeg] stderr: {e.stderr}")
129
  return None
@@ -136,14 +146,14 @@ def sanitize_audio(input_path):
136
 
137
 
138
  def analyze_audio_volume(file_path):
139
- """Inspect a WAV file: log frame rate, channels, duration, and peak amplitude."""
140
  try:
141
  with wave.open(file_path, 'rb') as wf:
142
- framerate = wf.getframerate()
143
- nframes = wf.getnframes()
144
- channels = wf.getnchannels()
145
- sampwidth = wf.getsampwidth()
146
- duration_s = nframes / framerate if framerate else 0
147
 
148
  logger.info(
149
  f"🔊 [WAV] framerate={framerate}Hz | channels={channels} | "
@@ -151,7 +161,7 @@ def analyze_audio_volume(file_path):
151
  )
152
 
153
  if duration_s < 0.2:
154
- logger.warning(f"⚠️ [WAV] Audio is very short ({duration_s:.2f}s) — may not be enough for recognition")
155
 
156
  raw_data = wf.readframes(nframes)
157
  if len(raw_data) == 0:
@@ -167,7 +177,7 @@ def analyze_audio_volume(file_path):
167
 
168
  max_val = max(abs(x) for x in pcm_data)
169
  avg_val = sum(abs(x) for x in pcm_data) / len(pcm_data)
170
- logger.info(f"🔊 [WAV] Peak amplitude: {max_val}/32767 | Avg amplitude: {avg_val:.1f}")
171
 
172
  if max_val < 100:
173
  logger.warning("⚠️ [WAV] Audio appears SILENT (peak < 100) — microphone may not be working")
@@ -178,10 +188,10 @@ def analyze_audio_volume(file_path):
178
  return True
179
 
180
  except wave.Error as e:
181
- logger.error(f"❌ [WAV] wave.Error reading file: {e} — file may not be a valid WAV")
182
  return False
183
  except Exception as e:
184
- logger.warning(f"⚠️ [WAV] Could not analyze audio volume: {e}")
185
  return True # Don't block on analysis failure
186
 
187
 
@@ -388,8 +398,8 @@ def handle_submit_answer(data):
388
  correct = rule_engine.validate_token_order(submitted, expected)
389
 
390
  elif interaction_mode in ("choose_select", "fill_in"):
391
- chosen = str(data.get("answer", "")).strip()
392
- answer_key = str(data.get("answer_key", "")).strip()
393
  word_tested = data.get("word_tested")
394
  particle_type = data.get("particle_type")
395
 
@@ -415,17 +425,17 @@ def handle_submit_answer(data):
415
  else:
416
  hint = data.get("hint_text", "Review the grammar rule and try again")
417
 
418
- retry_allowed = not correct and attempt < 3
419
  speech_stage_unlocked = correct
420
 
421
  response = {
422
- "question_id": data.get("question_id"),
423
- "correct": correct,
424
- "score_delta": 10 if correct else 0,
425
- "feedback": _build_feedback(correct, q_type, grammar_rule),
426
- "hint": hint,
427
- "retry_allowed": retry_allowed,
428
- "attempt_number": attempt,
429
  "speech_stage_unlocked": speech_stage_unlocked,
430
  }
431
 
@@ -481,14 +491,14 @@ def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
481
  return random.choice(messages)
482
  else:
483
  rule_hints = {
484
- "topic_marker": "Remember: 은 for consonant endings, 는 for vowel endings",
485
- "copula": "Remember: 이에요 for consonant endings, 예요 for vowel endings",
486
- "negative_copula": "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
487
- "indirect_quote_dago": "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
488
- "indirect_quote_commands": "Review: (으)라고 commands, 지 말라고 negatives",
489
- "indirect_quote_questions": "Review: V/Adj+냐고 (drop ㄹ from stem)",
490
- "indirect_quote_suggestions":"Review: V+자고 for suggestions",
491
- "regret_expression": "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
492
  }
493
  base = "다시 해 보세요! Let's try again. "
494
  return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
@@ -508,13 +518,13 @@ def handle_pronunciation(data):
508
  lang = data.get('lang', 'ko-KR')
509
  grammar_rule = data.get('grammar_rule', '')
510
 
511
- # ── STEP 1: Validate incoming data ──────────────────────────────────────
512
  logger.info("=" * 60)
513
  logger.info("🎤 [PRON] ── Pronunciation Assessment Start ──")
514
  logger.info(f"🎤 [PRON] ref_text='{ref_text}' | lang='{lang}' | grammar_rule='{grammar_rule}'")
515
 
516
  if not ref_text:
517
- logger.error("❌ [PRON] STEP 1 FAILED: No reference text provided in payload")
518
  emit('pronunciation_result', {
519
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
520
  "recognized_text": "", "word_details": [],
@@ -532,15 +542,15 @@ def handle_pronunciation(data):
532
  })
533
  return
534
 
535
- logger.info(f"🎤 [PRON] STEP 1 OK: ref_text present, audio_b64 length={len(audio_b64)} chars")
536
 
537
  # ── STEP 2: Validate Azure credentials ──────────────────────────────────
538
- logger.info(f"🎤 [PRON] STEP 2: Checking Azure credentials...")
539
- logger.info(f"🎤 [PRON] AZURE_SPEECH_KEY present: {bool(AZURE_SPEECH_KEY)} | length: {len(AZURE_SPEECH_KEY) if AZURE_SPEECH_KEY else 0}")
540
- logger.info(f"🎤 [PRON] AZURE_SPEECH_REGION: '{AZURE_SPEECH_REGION}'")
541
 
542
  if not AZURE_SPEECH_KEY:
543
- logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_KEY env var is not set")
544
  emit('pronunciation_result', {
545
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
546
  "recognized_text": "", "word_details": [],
@@ -549,7 +559,7 @@ def handle_pronunciation(data):
549
  return
550
 
551
  if not AZURE_SPEECH_REGION:
552
- logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_REGION env var is not set")
553
  emit('pronunciation_result', {
554
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
555
  "recognized_text": "", "word_details": [],
@@ -557,80 +567,75 @@ def handle_pronunciation(data):
557
  })
558
  return
559
 
560
- logger.info(f"🎤 [PRON] STEP 2 OK: Azure credentials present")
561
 
562
  raw_path = None
563
  clean_path = None
564
 
565
  try:
566
- # ── STEP 3: Decode base64 audio ──────────────────────────────────────
567
  logger.info("🎤 [PRON] STEP 3: Decoding base64 audio...")
568
  try:
569
  if "," in audio_b64:
570
  header, audio_b64 = audio_b64.split(",", 1)
571
- logger.info(f"🎤 [PRON] Stripped data URI header: '{header[:50]}'")
572
  audio_bytes = base64.b64decode(audio_b64)
573
- logger.info(f"🎤 [PRON] STEP 3 OK: Decoded {len(audio_bytes)} raw bytes")
574
  except Exception as e:
575
  logger.error(f"❌ [PRON] STEP 3 FAILED: base64 decode error: {e}")
576
  raise
577
 
578
  if len(audio_bytes) < 100:
579
- logger.error(f"❌ [PRON] STEP 3 FAILED: Decoded audio is suspiciously small ({len(audio_bytes)} bytes)")
580
  raise Exception(f"Audio payload too small: {len(audio_bytes)} bytes")
581
 
582
- # ── STEP 4: Write to temp file ────────────────────────────────────────
583
- logger.info("🎤 [PRON] STEP 4: Writing audio to temp file...")
584
- try:
585
- with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
586
- temp_raw.write(audio_bytes)
587
- raw_path = temp_raw.name
588
- logger.info(f"🎤 [PRON] STEP 4 OK: Wrote to {raw_path} ({os.path.getsize(raw_path)} bytes)")
589
- except Exception as e:
590
- logger.error(f"❌ [PRON] STEP 4 FAILED: Could not write temp file: {e}")
591
- raise
592
 
593
- # ── STEP 5: FFmpeg conversion ────────────────────────────────────────
594
- logger.info("🎤 [PRON] STEP 5: Running FFmpeg conversion to 16kHz mono PCM WAV...")
595
  clean_path = sanitize_audio(raw_path)
596
  if not clean_path:
597
- logger.error("❌ [PRON] STEP 5 FAILED: sanitize_audio() returned None — see FFmpeg logs above")
598
- raise Exception("Audio conversion failed (FFmpeg error)")
599
- logger.info(f"🎤 [PRON] STEP 5 OK: Clean WAV at {clean_path}")
600
 
601
- # ── STEP 6: Analyze WAV integrity ─────────────────────────────────────
602
- logger.info("🎤 [PRON] STEP 6: Analyzing WAV file integrity and volume...")
603
  audio_ok = analyze_audio_volume(clean_path)
604
  if not audio_ok:
605
- logger.warning("⚠️ [PRON] STEP 6 WARNING: Audio appears silent — Azure will likely return NoMatch")
606
- # Don't raise — let Azure try anyway, it gives a cleaner error
607
  else:
608
- logger.info("🎤 [PRON] STEP 6 OK: WAV has audible signal")
609
 
610
- # ── STEP 7: Build Azure SpeechConfig ────────────────────────────────
611
- logger.info("🎤 [PRON] STEP 7: Building Azure SpeechConfig...")
612
  try:
613
  speech_config = speechsdk.SpeechConfig(
614
  subscription=AZURE_SPEECH_KEY,
615
  region=AZURE_SPEECH_REGION
616
  )
617
  speech_config.speech_recognition_language = lang
618
- logger.info(f"🎤 [PRON] STEP 7 OK: SpeechConfig built — region={AZURE_SPEECH_REGION} lang={lang}")
619
  except Exception as e:
620
- logger.error(f"❌ [PRON] STEP 7 FAILED: SpeechConfig construction error: {e}")
621
  raise
622
 
623
- # ── STEP 8: Build AudioConfig ────────────────────────────────────────
624
- logger.info(f"🎤 [PRON] STEP 8: Building AudioConfig from file: {clean_path}")
625
  try:
626
  audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
627
  logger.info("🎤 [PRON] STEP 8 OK: AudioConfig built")
628
  except Exception as e:
629
- logger.error(f"❌ [PRON] STEP 8 FAILED: AudioConfig construction error: {e}")
630
  raise
631
 
632
- # ── STEP 9: Build PronunciationAssessmentConfig ──────────────────────
633
- logger.info(f"🎤 [PRON] STEP 9: Building PronunciationAssessmentConfig for text: '{ref_text}'")
634
  try:
635
  pronunciation_config = speechsdk.PronunciationAssessmentConfig(
636
  reference_text=ref_text,
@@ -643,7 +648,7 @@ def handle_pronunciation(data):
643
  logger.error(f"❌ [PRON] STEP 9 FAILED: PronunciationAssessmentConfig error: {e}")
644
  raise
645
 
646
- # ── STEP 10: Build SpeechRecognizer ──────────────────────────────────
647
  logger.info("🎤 [PRON] STEP 10: Building SpeechRecognizer...")
648
  try:
649
  recognizer = speechsdk.SpeechRecognizer(
@@ -651,36 +656,47 @@ def handle_pronunciation(data):
651
  audio_config=audio_config
652
  )
653
  pronunciation_config.apply_to(recognizer)
654
- logger.info("🎤 [PRON] STEP 10 OK: SpeechRecognizer built, pronunciation config applied")
655
  except Exception as e:
656
- logger.error(f"❌ [PRON] STEP 10 FAILED: SpeechRecognizer construction error: {e}")
657
  raise
658
 
659
- # ── STEP 11: Call Azure (the actual network call) ─────────────────────
660
- logger.info("🎤 [PRON] STEP 11: Calling Azure recognize_once_async()... (network call)")
 
 
 
 
661
  try:
662
- result = recognizer.recognize_once_async().get()
663
- logger.info(f"🎤 [PRON] STEP 11 OK: Azure returned result")
 
 
 
 
664
  logger.info(f"🎤 [PRON] result.reason = {result.reason}")
665
  logger.info(f"🎤 [PRON] result.text = '{result.text}'")
 
 
 
666
  except Exception as e:
667
- logger.error(f"❌ [PRON] STEP 11 FAILED: recognize_once_async() threw: {e}")
668
  raise
669
 
670
- # ── STEP 12: Parse result ─────────────────────────────────────────────
671
- logger.info("🎤 [PRON] STEP 12: Parsing Azure result...")
672
  response = {}
673
 
674
  if result.reason == speechsdk.ResultReason.RecognizedSpeech:
675
- logger.info("🎤 [PRON] STEP 12: Result reason = RecognizedSpeech ✅")
676
  try:
677
- pron_result = speechsdk.PronunciationAssessmentResult(result)
678
  accuracy = pron_result.accuracy_score
679
  fluency = pron_result.fluency_score
680
  completeness = pron_result.completeness_score
681
  logger.info(f"🎤 [PRON] Scores → accuracy={accuracy:.1f} fluency={fluency:.1f} completeness={completeness:.1f}")
682
  except Exception as e:
683
- logger.error(f"❌ [PRON] STEP 12 FAILED: PronunciationAssessmentResult parsing error: {e}")
684
  raise
685
 
686
  detailed_words = []
@@ -706,52 +722,71 @@ def handle_pronunciation(data):
706
  learner.record_outcome(grammar_rule, True, "speak")
707
  response["mastery_update"] = dict(learner.mastery)
708
 
709
- logger.info(f"✅ [PRON] STEP 12 OK: Assessment complete acc={accuracy:.1f}")
710
 
711
  elif result.reason == speechsdk.ResultReason.NoMatch:
712
- no_match_detail = result.no_match_details if hasattr(result, 'no_match_details') else 'N/A'
713
- logger.warning(f"⚠️ [PRON] STEP 12: Result reason = NoMatch — Azure heard nothing useful")
714
  logger.warning(f"⚠️ [PRON] NoMatch details: {no_match_detail}")
715
  response = {
716
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
717
- "recognized_text": "",
718
- "word_details": [],
719
- "feedback": "I couldn't hear you clearly. Please check your microphone and try speaking again.",
720
  }
721
 
722
  elif result.reason == speechsdk.ResultReason.Canceled:
 
723
  try:
724
  cancellation = speechsdk.CancellationDetails(result)
725
- logger.error(f"❌ [PRON] STEP 12: Result reason = Canceled")
726
- logger.error(f"❌ [PRON] Cancellation reason: {cancellation.reason}")
727
- logger.error(f"❌ [PRON] Cancellation error code: {cancellation.error_code}")
728
- logger.error(f"❌ [PRON] Cancellation error details: {cancellation.error_details}")
729
-
730
- # Common cancellation codes
731
- if cancellation.reason == speechsdk.CancellationReason.Error:
732
- if "401" in str(cancellation.error_details):
733
- logger.error("❌ [PRON] HTTP 401 — Azure key is invalid or expired")
734
- elif "403" in str(cancellation.error_details):
735
- logger.error("❌ [PRON] HTTP 403 Azure key doesn't have access to this region or feature")
736
- elif "connection" in str(cancellation.error_details).lower():
737
- logger.error("❌ [PRON] Connection error — Hugging Face Space may not have network access to Azure endpoint")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
 
739
  response = {
740
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
741
- "recognized_text": "",
742
- "word_details": [],
743
- "feedback": f"Recognition canceled: {cancellation.error_details}",
744
  }
745
  except Exception as parse_e:
746
- logger.error(f"❌ [PRON] Could not parse CancellationDetails: {parse_e}")
 
 
747
  response = {
748
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
749
  "recognized_text": "", "word_details": [],
750
- "feedback": "Recognition was canceled by Azure.",
751
  }
752
 
753
  else:
754
- logger.error(f"❌ [PRON] STEP 12: Unexpected result reason: {result.reason}")
755
  response = {
756
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
757
  "recognized_text": "", "word_details": [],
@@ -763,8 +798,8 @@ def handle_pronunciation(data):
763
  emit('pronunciation_result', response)
764
 
765
  except Exception as e:
766
- logger.error(f"❌ [PRON] UNHANDLED EXCEPTION in handle_pronunciation: {type(e).__name__}: {e}")
767
  import traceback
 
768
  logger.error(f"❌ [PRON] Traceback:\n{traceback.format_exc()}")
769
  emit('pronunciation_result', {
770
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
@@ -774,10 +809,10 @@ def handle_pronunciation(data):
774
  finally:
775
  if raw_path and os.path.exists(raw_path):
776
  os.remove(raw_path)
777
- logger.info(f"🧹 [PRON] Cleaned up raw file: {raw_path}")
778
  if clean_path and os.path.exists(clean_path):
779
  os.remove(clean_path)
780
- logger.info(f"🧹 [PRON] Cleaned up clean WAV: {clean_path}")
781
 
782
 
783
  def _build_pronunciation_feedback(accuracy: float, fluency: float,
@@ -836,10 +871,10 @@ def handle_restore_session(data):
836
  logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
837
 
838
  emit('session_restored', {
839
- "success": True,
840
- "session_id": learner_id,
841
- "mastery": learner.mastery,
842
- "difficulty": learner.difficulty,
843
  "question_count": learner.question_count,
844
  })
845
 
 
8
  import struct
9
  import logging
10
  import uuid
11
+ import sys
12
+
13
+ # ── CRITICAL: eventlet monkey_patch BEFORE all other imports ──────────────────
14
+ # Azure Speech SDK uses native C++ threads internally. If eventlet patches
15
+ # Python threading AFTER the SDK is already loaded, the SDK's recognize_once_async()
16
+ # silently returns ResultReason.Canceled + CancellationReason.Error with no
17
+ # useful error_details — even with valid credentials and good audio.
18
+ #
19
+ # Fix: call monkey_patch() here at the very top, then run all Azure SDK calls
20
+ # inside a real OS thread via concurrent.futures.ThreadPoolExecutor, which
21
+ # is not subject to eventlet's cooperative scheduling.
22
+ import eventlet
23
+ eventlet.monkey_patch()
24
+
25
+ import concurrent.futures
26
+ _azure_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
27
+
28
  import cv2
29
  import numpy as np
30
  from flask import Flask
 
43
  from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
44
  from question_generator import QuestionGenerator, QTYPE_TO_RULE
45
 
 
 
46
  sys.path.append(os.path.dirname(__file__))
47
 
48
+
49
  # --- LOGGING SETUP ---
50
  logging.basicConfig(
51
  level=logging.INFO,
 
97
  """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
98
  output_path = input_path + "_clean.wav"
99
 
100
+ # --- Log input file info before conversion ---
101
  try:
102
  input_size = os.path.getsize(input_path)
103
  logger.info(f"🔧 [FFmpeg] Input file: {input_path} | Size: {input_size} bytes")
 
116
  output_path
117
  ]
118
 
119
+ logger.info(f"🔧 [FFmpeg] Running: {' '.join(command)}")
120
 
121
  try:
122
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
 
 
 
 
 
123
  logger.info(f"✅ [FFmpeg] Conversion successful → {output_path}")
124
  if result.stderr:
125
+ logger.info(f"🔧 [FFmpeg] stderr:\n{result.stderr[:2000]}")
 
126
 
127
  output_size = os.path.getsize(output_path)
128
+ logger.info(f"🔧 [FFmpeg] Output WAV size: {output_size} bytes")
129
  if output_size == 0:
130
  logger.error("❌ [FFmpeg] Output WAV is EMPTY — conversion produced no data")
131
  return None
 
133
  return output_path
134
 
135
  except subprocess.CalledProcessError as e:
136
+ logger.error(f"❌ [FFmpeg] Process failed (returncode={e.returncode})")
137
  logger.error(f"❌ [FFmpeg] stdout: {e.stdout}")
138
  logger.error(f"❌ [FFmpeg] stderr: {e.stderr}")
139
  return None
 
146
 
147
 
148
  def analyze_audio_volume(file_path):
149
+ """Inspect WAV: log framerate, channels, duration, peak amplitude."""
150
  try:
151
  with wave.open(file_path, 'rb') as wf:
152
+ framerate = wf.getframerate()
153
+ nframes = wf.getnframes()
154
+ channels = wf.getnchannels()
155
+ sampwidth = wf.getsampwidth()
156
+ duration_s = nframes / framerate if framerate else 0
157
 
158
  logger.info(
159
  f"🔊 [WAV] framerate={framerate}Hz | channels={channels} | "
 
161
  )
162
 
163
  if duration_s < 0.2:
164
+ logger.warning(f"⚠️ [WAV] Very short audio ({duration_s:.2f}s) — may not be enough for recognition")
165
 
166
  raw_data = wf.readframes(nframes)
167
  if len(raw_data) == 0:
 
177
 
178
  max_val = max(abs(x) for x in pcm_data)
179
  avg_val = sum(abs(x) for x in pcm_data) / len(pcm_data)
180
+ logger.info(f"🔊 [WAV] Peak amplitude: {max_val}/32767 | Avg: {avg_val:.1f}")
181
 
182
  if max_val < 100:
183
  logger.warning("⚠️ [WAV] Audio appears SILENT (peak < 100) — microphone may not be working")
 
188
  return True
189
 
190
  except wave.Error as e:
191
+ logger.error(f"❌ [WAV] wave.Error: {e} — file may not be a valid WAV")
192
  return False
193
  except Exception as e:
194
+ logger.warning(f"⚠️ [WAV] Could not analyze audio: {e}")
195
  return True # Don't block on analysis failure
196
 
197
 
 
398
  correct = rule_engine.validate_token_order(submitted, expected)
399
 
400
  elif interaction_mode in ("choose_select", "fill_in"):
401
+ chosen = str(data.get("answer", "")).strip()
402
+ answer_key = str(data.get("answer_key", "")).strip()
403
  word_tested = data.get("word_tested")
404
  particle_type = data.get("particle_type")
405
 
 
425
  else:
426
  hint = data.get("hint_text", "Review the grammar rule and try again")
427
 
428
+ retry_allowed = not correct and attempt < 3
429
  speech_stage_unlocked = correct
430
 
431
  response = {
432
+ "question_id": data.get("question_id"),
433
+ "correct": correct,
434
+ "score_delta": 10 if correct else 0,
435
+ "feedback": _build_feedback(correct, q_type, grammar_rule),
436
+ "hint": hint,
437
+ "retry_allowed": retry_allowed,
438
+ "attempt_number": attempt,
439
  "speech_stage_unlocked": speech_stage_unlocked,
440
  }
441
 
 
491
  return random.choice(messages)
492
  else:
493
  rule_hints = {
494
+ "topic_marker": "Remember: 은 for consonant endings, 는 for vowel endings",
495
+ "copula": "Remember: 이에요 for consonant endings, 예요 for vowel endings",
496
+ "negative_copula": "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
497
+ "indirect_quote_dago": "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
498
+ "indirect_quote_commands": "Review: (으)라고 commands, 지 말라고 negatives",
499
+ "indirect_quote_questions": "Review: V/Adj+냐고 (drop ㄹ from stem)",
500
+ "indirect_quote_suggestions": "Review: V+자고 for suggestions",
501
+ "regret_expression": "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
502
  }
503
  base = "다시 해 보세요! Let's try again. "
504
  return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
 
518
  lang = data.get('lang', 'ko-KR')
519
  grammar_rule = data.get('grammar_rule', '')
520
 
521
+ # ── STEP 1: Validate incoming payload ───────────────────────────────────
522
  logger.info("=" * 60)
523
  logger.info("🎤 [PRON] ── Pronunciation Assessment Start ──")
524
  logger.info(f"🎤 [PRON] ref_text='{ref_text}' | lang='{lang}' | grammar_rule='{grammar_rule}'")
525
 
526
  if not ref_text:
527
+ logger.error("❌ [PRON] STEP 1 FAILED: No reference text in payload")
528
  emit('pronunciation_result', {
529
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
530
  "recognized_text": "", "word_details": [],
 
542
  })
543
  return
544
 
545
+ logger.info(f"🎤 [PRON] STEP 1 OK: audio_b64 length={len(audio_b64)} chars")
546
 
547
  # ── STEP 2: Validate Azure credentials ──────────────────────────────────
548
+ logger.info("🎤 [PRON] STEP 2: Checking Azure credentials...")
549
+ logger.info(f"🎤 [PRON] AZURE_SPEECH_KEY present={bool(AZURE_SPEECH_KEY)} | length={len(AZURE_SPEECH_KEY) if AZURE_SPEECH_KEY else 0}")
550
+ logger.info(f"🎤 [PRON] AZURE_SPEECH_REGION='{AZURE_SPEECH_REGION}'")
551
 
552
  if not AZURE_SPEECH_KEY:
553
+ logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_KEY env var not set")
554
  emit('pronunciation_result', {
555
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
556
  "recognized_text": "", "word_details": [],
 
559
  return
560
 
561
  if not AZURE_SPEECH_REGION:
562
+ logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_REGION env var not set")
563
  emit('pronunciation_result', {
564
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
565
  "recognized_text": "", "word_details": [],
 
567
  })
568
  return
569
 
570
+ logger.info("🎤 [PRON] STEP 2 OK: Azure credentials present")
571
 
572
  raw_path = None
573
  clean_path = None
574
 
575
  try:
576
+ # ── STEP 3: Decode base64 ────────────────────────────────────────────
577
  logger.info("🎤 [PRON] STEP 3: Decoding base64 audio...")
578
  try:
579
  if "," in audio_b64:
580
  header, audio_b64 = audio_b64.split(",", 1)
581
+ logger.info(f"🎤 [PRON] Stripped data URI header: '{header[:60]}'")
582
  audio_bytes = base64.b64decode(audio_b64)
583
+ logger.info(f"🎤 [PRON] STEP 3 OK: Decoded {len(audio_bytes)} bytes")
584
  except Exception as e:
585
  logger.error(f"❌ [PRON] STEP 3 FAILED: base64 decode error: {e}")
586
  raise
587
 
588
  if len(audio_bytes) < 100:
589
+ logger.error(f"❌ [PRON] STEP 3: Audio too small ({len(audio_bytes)} bytes) — likely not real audio")
590
  raise Exception(f"Audio payload too small: {len(audio_bytes)} bytes")
591
 
592
+ # ── STEP 4: Write temp file ──────────────────────────────────────────
593
+ logger.info("🎤 [PRON] STEP 4: Writing to temp file...")
594
+ with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
595
+ temp_raw.write(audio_bytes)
596
+ raw_path = temp_raw.name
597
+ logger.info(f"🎤 [PRON] STEP 4 OK: Wrote {os.path.getsize(raw_path)} bytes → {raw_path}")
 
 
 
 
598
 
599
+ # ── STEP 5: FFmpeg conversion ────────────────────────────────────────
600
+ logger.info("🎤 [PRON] STEP 5: FFmpeg 16kHz mono PCM WAV...")
601
  clean_path = sanitize_audio(raw_path)
602
  if not clean_path:
603
+ logger.error("❌ [PRON] STEP 5 FAILED: sanitize_audio() returned None")
604
+ raise Exception("Audio conversion failed (FFmpeg error — see logs above)")
605
+ logger.info(f"🎤 [PRON] STEP 5 OK: clean WAV at {clean_path}")
606
 
607
+ # ── STEP 6: WAV integrity check ─────────────────────────────────────
608
+ logger.info("🎤 [PRON] STEP 6: WAV integrity + volume check...")
609
  audio_ok = analyze_audio_volume(clean_path)
610
  if not audio_ok:
611
+ logger.warning("⚠️ [PRON] STEP 6: Audio silent — Azure will likely return NoMatch")
 
612
  else:
613
+ logger.info("🎤 [PRON] STEP 6 OK: Audible signal confirmed")
614
 
615
+ # ── STEP 7: Build Azure SpeechConfig ────────────────────────────────
616
+ logger.info(f"🎤 [PRON] STEP 7: Building SpeechConfig (region={AZURE_SPEECH_REGION}, lang={lang})...")
617
  try:
618
  speech_config = speechsdk.SpeechConfig(
619
  subscription=AZURE_SPEECH_KEY,
620
  region=AZURE_SPEECH_REGION
621
  )
622
  speech_config.speech_recognition_language = lang
623
+ logger.info("🎤 [PRON] STEP 7 OK: SpeechConfig built")
624
  except Exception as e:
625
+ logger.error(f"❌ [PRON] STEP 7 FAILED: SpeechConfig error: {e}")
626
  raise
627
 
628
+ # ── STEP 8: Build AudioConfig ────────────────────────────────────────
629
+ logger.info(f"🎤 [PRON] STEP 8: Building AudioConfig from {clean_path}...")
630
  try:
631
  audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
632
  logger.info("🎤 [PRON] STEP 8 OK: AudioConfig built")
633
  except Exception as e:
634
+ logger.error(f"❌ [PRON] STEP 8 FAILED: AudioConfig error: {e}")
635
  raise
636
 
637
+ # ── STEP 9: Build PronunciationAssessmentConfig ──────────────────────
638
+ logger.info(f"🎤 [PRON] STEP 9: PronunciationAssessmentConfig for '{ref_text}'...")
639
  try:
640
  pronunciation_config = speechsdk.PronunciationAssessmentConfig(
641
  reference_text=ref_text,
 
648
  logger.error(f"❌ [PRON] STEP 9 FAILED: PronunciationAssessmentConfig error: {e}")
649
  raise
650
 
651
+ # ── STEP 10: Build SpeechRecognizer ────────────────────────��────────
652
  logger.info("🎤 [PRON] STEP 10: Building SpeechRecognizer...")
653
  try:
654
  recognizer = speechsdk.SpeechRecognizer(
 
656
  audio_config=audio_config
657
  )
658
  pronunciation_config.apply_to(recognizer)
659
+ logger.info("🎤 [PRON] STEP 10 OK: SpeechRecognizer ready")
660
  except Exception as e:
661
+ logger.error(f"❌ [PRON] STEP 10 FAILED: SpeechRecognizer error: {e}")
662
  raise
663
 
664
+ # ── STEP 11: Call Azure in a REAL OS thread ──────────────────────────
665
+ # recognize_once_async() uses native C++ threads internally.
666
+ # Calling it directly in an eventlet greenthread causes silent Canceled
667
+ # errors because eventlet has patched socket/ssl/threading under the SDK.
668
+ # _azure_executor is a real ThreadPoolExecutor — unpatched OS threads.
669
+ logger.info("🎤 [PRON] STEP 11: Submitting to real OS thread (bypassing eventlet)...")
670
  try:
671
+ def _run_azure():
672
+ return recognizer.recognize_once_async().get()
673
+
674
+ future = _azure_executor.submit(_run_azure)
675
+ result = future.result(timeout=30)
676
+ logger.info("🎤 [PRON] STEP 11 OK: Azure responded from real OS thread")
677
  logger.info(f"🎤 [PRON] result.reason = {result.reason}")
678
  logger.info(f"🎤 [PRON] result.text = '{result.text}'")
679
+ except concurrent.futures.TimeoutError:
680
+ logger.error("❌ [PRON] STEP 11 FAILED: Azure timed out after 30s")
681
+ raise Exception("Azure Speech timed out — check HF Space outbound network access")
682
  except Exception as e:
683
+ logger.error(f"❌ [PRON] STEP 11 FAILED: Azure OS thread raised: {e}")
684
  raise
685
 
686
+ # ── STEP 12: Parse Azure result ──────────────────────────────────────
687
+ logger.info("🎤 [PRON] STEP 12: Parsing result...")
688
  response = {}
689
 
690
  if result.reason == speechsdk.ResultReason.RecognizedSpeech:
691
+ logger.info("🎤 [PRON] STEP 12: RecognizedSpeech ✅")
692
  try:
693
+ pron_result = speechsdk.PronunciationAssessmentResult(result)
694
  accuracy = pron_result.accuracy_score
695
  fluency = pron_result.fluency_score
696
  completeness = pron_result.completeness_score
697
  logger.info(f"🎤 [PRON] Scores → accuracy={accuracy:.1f} fluency={fluency:.1f} completeness={completeness:.1f}")
698
  except Exception as e:
699
+ logger.error(f"❌ [PRON] STEP 12 FAILED: PronunciationAssessmentResult parse error: {e}")
700
  raise
701
 
702
  detailed_words = []
 
722
  learner.record_outcome(grammar_rule, True, "speak")
723
  response["mastery_update"] = dict(learner.mastery)
724
 
725
+ logger.info(f"✅ [PRON] Completeaccuracy={accuracy:.1f}")
726
 
727
  elif result.reason == speechsdk.ResultReason.NoMatch:
728
+ no_match_detail = getattr(result, 'no_match_details', 'N/A')
729
+ logger.warning(f"⚠️ [PRON] STEP 12: NoMatch — Azure heard nothing useful")
730
  logger.warning(f"⚠️ [PRON] NoMatch details: {no_match_detail}")
731
  response = {
732
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
733
+ "recognized_text": "", "word_details": [],
734
+ "feedback": "I couldn't hear you clearly. Check your microphone and try again.",
 
735
  }
736
 
737
  elif result.reason == speechsdk.ResultReason.Canceled:
738
+ logger.error("❌ [PRON] STEP 12: Result reason = Canceled")
739
  try:
740
  cancellation = speechsdk.CancellationDetails(result)
741
+
742
+ # Use getattr for all fields — older SDK versions may be missing some
743
+ c_reason = getattr(cancellation, 'reason', 'UNKNOWN')
744
+ c_code = getattr(cancellation, 'error_code', 'UNKNOWN')
745
+ c_details = getattr(cancellation, 'error_details', 'UNKNOWN')
746
+
747
+ logger.error(f"❌ [PRON] Cancellation reason: {c_reason}")
748
+ logger.error(f" [PRON] Cancellation error_code: {c_code}")
749
+ logger.error(f"❌ [PRON] Cancellation error_details: {c_details}")
750
+
751
+ # Dump every attribute on the object so nothing is hidden
752
+ logger.error(f" [PRON] CancellationDetails dir(): {[a for a in dir(cancellation) if not a.startswith('_')]}")
753
+
754
+ # Also dump the raw result properties dict if available
755
+ raw_props = getattr(result, 'properties', None)
756
+ if raw_props:
757
+ logger.error(f"❌ [PRON] result.properties: {dict(raw_props)}")
758
+
759
+ details_str = str(c_details or '') + str(c_code or '')
760
+ if "401" in details_str:
761
+ logger.error("❌ [PRON] → HTTP 401: Azure key is INVALID or EXPIRED — check HF Space secrets")
762
+ elif "403" in details_str:
763
+ logger.error("❌ [PRON] → HTTP 403: Key does not have access — wrong region or feature not enabled")
764
+ elif "ServiceUnavailable" in details_str or "503" in details_str:
765
+ logger.error("❌ [PRON] → 503: Azure Speech service is temporarily unavailable")
766
+ elif "connection" in details_str.lower() or "network" in details_str.lower():
767
+ logger.error("❌ [PRON] → Network error: HF Space cannot reach Azure endpoint — check outbound access")
768
+ elif "AuthenticationFailure" in details_str:
769
+ logger.error("❌ [PRON] → AuthenticationFailure: Key/region mismatch — your key was provisioned in a different region than AZURE_SPEECH_REGION")
770
+ elif details_str in ('', 'UNKNOWN'):
771
+ logger.error("❌ [PRON] → Empty error details: likely a key/region mismatch. Verify AZURE_SPEECH_REGION matches the region shown in Azure portal for this key")
772
 
773
  response = {
774
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
775
+ "recognized_text": "", "word_details": [],
776
+ "feedback": f"Azure canceled: reason={c_reason} code={c_code} details={c_details}",
 
777
  }
778
  except Exception as parse_e:
779
+ logger.error(f"❌ [PRON] Could not parse CancellationDetails at all: {parse_e}")
780
+ # Last resort — try to get anything from the raw result
781
+ logger.error(f"❌ [PRON] Raw result dir(): {[a for a in dir(result) if not a.startswith('_')]}")
782
  response = {
783
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
784
  "recognized_text": "", "word_details": [],
785
+ "feedback": "Recognition was canceled by Azure — check server logs for details.",
786
  }
787
 
788
  else:
789
+ logger.error(f"❌ [PRON] STEP 12: Unexpected result.reason = {result.reason}")
790
  response = {
791
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
792
  "recognized_text": "", "word_details": [],
 
798
  emit('pronunciation_result', response)
799
 
800
  except Exception as e:
 
801
  import traceback
802
+ logger.error(f"❌ [PRON] UNHANDLED EXCEPTION: {type(e).__name__}: {e}")
803
  logger.error(f"❌ [PRON] Traceback:\n{traceback.format_exc()}")
804
  emit('pronunciation_result', {
805
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
 
809
  finally:
810
  if raw_path and os.path.exists(raw_path):
811
  os.remove(raw_path)
812
+ logger.info(f"🧹 [PRON] Cleaned up: {raw_path}")
813
  if clean_path and os.path.exists(clean_path):
814
  os.remove(clean_path)
815
+ logger.info(f"🧹 [PRON] Cleaned up: {clean_path}")
816
 
817
 
818
  def _build_pronunciation_feedback(accuracy: float, fluency: float,
 
871
  logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
872
 
873
  emit('session_restored', {
874
+ "success": True,
875
+ "session_id": learner_id,
876
+ "mastery": learner.mastery,
877
+ "difficulty": learner.difficulty,
878
  "question_count": learner.question_count,
879
  })
880