rairo commited on
Commit
fe5a9a9
·
verified ·
1 Parent(s): 8cfbcbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -661
app.py CHANGED
@@ -9,35 +9,26 @@ import struct
9
  import logging
10
  import uuid
11
  import sys
 
12
 
13
  # ── CRITICAL: eventlet monkey_patch BEFORE all other imports ──────────────────
14
- # Azure Speech SDK uses native C++ threads internally. If eventlet patches
15
- # Python threading AFTER the SDK is already loaded, the SDK's recognize_once_async()
16
- # silently returns ResultReason.Canceled + CancellationReason.Error with no
17
- # useful error_details — even with valid credentials and good audio.
18
- #
19
- # Fix: call monkey_patch() here at the very top, then run all Azure SDK calls
20
- # inside a real OS thread via concurrent.futures.ThreadPoolExecutor, which
21
- # is not subject to eventlet's cooperative scheduling.
22
  import eventlet
23
  eventlet.monkey_patch()
24
 
25
  import concurrent.futures
26
- _azure_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
 
 
27
 
28
  import cv2
29
  import numpy as np
30
  from flask import Flask
31
  from flask_socketio import SocketIO, emit
32
-
33
  from PIL import Image
34
-
35
- # --- 2025 AI STANDARDS ---
36
  from google import genai
37
  from google.genai import types
38
  import azure.cognitiveservices.speech as speechsdk
39
 
40
- # --- KLP Modules ---
41
  from korean_rules import rule_engine
42
  from content_pack import get_active_pack, replace_active_pack
43
  from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
@@ -45,23 +36,17 @@ from question_generator import QuestionGenerator, QTYPE_TO_RULE
45
 
46
  sys.path.append(os.path.dirname(__file__))
47
 
48
-
49
- # --- LOGGING SETUP ---
50
- logging.basicConfig(
51
- level=logging.INFO,
52
- format='%(asctime)s - %(levelname)s - %(message)s'
53
- )
54
  logger = logging.getLogger(__name__)
55
 
56
- app = Flask(__name__)
57
  socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
58
 
59
- # --- SECRETS ---
60
- GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
61
- AZURE_SPEECH_KEY = os.environ.get("AZURE_SPEECH_KEY")
62
  AZURE_SPEECH_REGION = os.environ.get("AZURE_SPEECH_REGION")
 
63
 
64
- # --- Initialize Gemini Client ---
65
  client = None
66
  try:
67
  client = genai.Client(api_key=GEMINI_API_KEY)
@@ -69,24 +54,48 @@ try:
69
  except Exception as e:
70
  logger.error(f"❌ Failed to init Gemini: {e}")
71
 
72
- # --- Initialize Question Generator ---
73
- question_gen = QuestionGenerator(gemini_client=client)
 
 
 
 
 
 
74
 
75
- # --- Session ID → socket SID mapping ---
76
- _socket_to_learner: dict[str, str] = {}
 
 
 
 
 
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # ===========================================================================
80
- # HELPERS
81
- # ===========================================================================
82
 
83
  def decode_image(base64_string):
84
  try:
85
  if "," in base64_string:
86
  base64_string = base64_string.split(",")[1]
87
  img_bytes = base64.b64decode(base64_string)
88
- np_arr = np.frombuffer(img_bytes, np.uint8)
89
- frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
90
  return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
91
  except Exception as e:
92
  logger.error(f"Image Decode Error: {e}")
@@ -94,136 +103,85 @@ def decode_image(base64_string):
94
 
95
 
96
  def sanitize_audio(input_path):
97
- """Force audio into Azure-compliant format: 16kHz, Mono, 16-bit PCM WAV."""
98
- output_path = input_path + "_clean.wav"
99
-
100
- # --- Log input file info before conversion ---
101
  try:
102
  input_size = os.path.getsize(input_path)
103
- logger.info(f"🔧 [FFmpeg] Input file: {input_path} | Size: {input_size} bytes")
104
- if input_size == 0:
105
- logger.error("❌ [FFmpeg] Input file is EMPTY (0 bytes) — audio was not captured correctly")
106
- return None
107
- except Exception as e:
108
- logger.error(f"❌ [FFmpeg] Could not stat input file: {e}")
109
-
110
- command = [
111
- "ffmpeg", "-y", "-v", "verbose",
112
- "-i", input_path,
113
- "-ac", "1",
114
- "-ar", "16000",
115
- "-acodec", "pcm_s16le",
116
- output_path
117
- ]
118
-
119
- logger.info(f"🔧 [FFmpeg] Running: {' '.join(command)}")
120
 
 
 
 
121
  try:
122
  result = subprocess.run(command, check=True, capture_output=True, text=True)
123
- logger.info(f"✅ [FFmpeg] Conversion successful → {output_path}")
124
- if result.stderr:
125
- logger.info(f"🔧 [FFmpeg] stderr:\n{result.stderr[:2000]}")
126
-
127
  output_size = os.path.getsize(output_path)
128
- logger.info(f"🔧 [FFmpeg] Output WAV size: {output_size} bytes")
129
  if output_size == 0:
130
- logger.error("❌ [FFmpeg] Output WAV is EMPTY — conversion produced no data")
131
  return None
132
-
 
 
133
  return output_path
134
-
135
  except subprocess.CalledProcessError as e:
136
- logger.error(f"❌ [FFmpeg] Process failed (returncode={e.returncode})")
137
- logger.error(f"❌ [FFmpeg] stdout: {e.stdout}")
138
- logger.error(f"❌ [FFmpeg] stderr: {e.stderr}")
139
  return None
140
  except FileNotFoundError:
141
- logger.error("❌ [FFmpeg] ffmpeg binary not found — is it installed in the Docker image?")
142
  return None
143
  except Exception as e:
144
- logger.error(f"❌ [FFmpeg] Unexpected error: {e}")
145
  return None
146
 
147
 
148
- def analyze_audio_volume(file_path):
149
- """Inspect WAV: log framerate, channels, duration, peak amplitude."""
150
  try:
151
  with wave.open(file_path, 'rb') as wf:
152
  framerate = wf.getframerate()
153
  nframes = wf.getnframes()
154
- channels = wf.getnchannels()
155
- sampwidth = wf.getsampwidth()
156
  duration_s = nframes / framerate if framerate else 0
157
-
158
- logger.info(
159
- f"🔊 [WAV] framerate={framerate}Hz | channels={channels} | "
160
- f"sampwidth={sampwidth}B | nframes={nframes} | duration={duration_s:.2f}s"
161
- )
162
-
163
  if duration_s < 0.2:
164
- logger.warning(f"⚠️ [WAV] Very short audio ({duration_s:.2f}s) — may not be enough for recognition")
165
-
166
- raw_data = wf.readframes(nframes)
167
- if len(raw_data) == 0:
168
- logger.error("❌ [WAV] No PCM data in file")
169
- return False
170
-
171
- fmt = "%dh" % (len(raw_data) // 2)
172
- pcm_data = struct.unpack(fmt, raw_data)
173
-
174
- if not pcm_data:
175
- logger.error("❌ [WAV] PCM unpack produced no samples")
176
  return False
177
-
178
- max_val = max(abs(x) for x in pcm_data)
179
- avg_val = sum(abs(x) for x in pcm_data) / len(pcm_data)
180
- logger.info(f"🔊 [WAV] Peak amplitude: {max_val}/32767 | Avg: {avg_val:.1f}")
181
-
182
- if max_val < 100:
183
- logger.warning("⚠️ [WAV] Audio appears SILENT (peak < 100) — microphone may not be working")
184
  return False
185
- if max_val < 500:
186
- logger.warning(f"⚠️ [WAV] Audio is very quiet (peak={max_val}) — may affect recognition accuracy")
187
-
188
  return True
189
-
190
- except wave.Error as e:
191
- logger.error(f"❌ [WAV] wave.Error: {e} — file may not be a valid WAV")
192
- return False
193
  except Exception as e:
194
- logger.warning(f"⚠️ [WAV] Could not analyze audio: {e}")
195
- return True # Don't block on analysis failure
196
 
197
 
198
- def get_learner(socket_sid: str):
199
- """Get learner model for the current socket connection."""
200
  learner_id = _socket_to_learner.get(socket_sid)
201
  if learner_id:
202
  return get_session(learner_id)
203
  return None
204
 
205
 
206
- # ===========================================================================
207
- # CONNECTION HANDLERS
208
- # ===========================================================================
209
-
210
  @socketio.on('connect')
211
  def handle_connect():
212
  from flask import request
213
- sid = request.sid
214
  learner_id = str(uuid.uuid4())
215
  _socket_to_learner[sid] = learner_id
216
  model = get_or_create_session(learner_id)
217
  logger.info(f"✅ Client connected: socket={sid} learner={learner_id}")
218
-
219
  emit('session_ready', {
220
- "session_id": learner_id,
221
- "message": "Connected to KLP AI Service",
222
- "mastery": model.mastery,
223
- "difficulty": model.difficulty,
224
  "content_pack": {
225
- "lesson": get_active_pack().get("lesson"),
226
- "version": get_active_pack().get("version"),
227
  "vocab_count": len(get_active_pack().get("vocab", [])),
228
  }
229
  })
@@ -232,120 +190,73 @@ def handle_connect():
232
  @socketio.on('disconnect')
233
  def handle_disconnect():
234
  from flask import request
235
- sid = request.sid
236
  learner_id = _socket_to_learner.pop(sid, None)
237
  if learner_id:
 
238
  logger.info(f"Client disconnected: socket={sid} learner={learner_id}")
239
  else:
240
  logger.info(f"Client disconnected: socket={sid}")
241
 
242
 
243
- # ===========================================================================
244
- # 1. CONTENT PACK LOADER
245
- # ===========================================================================
246
-
247
  @socketio.on('load_content_pack')
248
  def handle_load_content_pack(data):
249
  logger.info("📦 Content pack upload received")
250
-
251
  try:
252
  file_type = data.get("file_type", "json").lower()
253
  file_b64 = data.get("file_bytes", "")
254
  lesson = data.get("lesson", "custom")
255
  description = data.get("description", "Custom content pack")
256
-
257
  if "," in file_b64:
258
  file_b64 = file_b64.split(",")[1]
259
  file_bytes = base64.b64decode(file_b64)
260
 
261
  if file_type == "json":
262
- raw = json.loads(file_bytes.decode("utf-8"))
263
- new_pack = replace_active_pack({
264
- **raw,
265
- "lesson": lesson,
266
- "description": description,
267
- })
268
  emit('content_pack_loaded', {
269
- "success": True,
270
- "lesson": new_pack["lesson"],
271
  "vocab_count": len(new_pack["vocab"]),
272
- "grammar_rules": list(new_pack["grammar_rules"].keys()),
273
- "source": "json_upload",
274
  })
275
-
276
  elif file_type in ("docx", "pdf"):
277
  if not client:
278
  emit('content_pack_loaded', {"success": False, "error": "Gemini not available"})
279
  return
280
-
281
- mime = "application/pdf" if file_type == "pdf" else \
282
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
283
-
284
- parse_prompt = """You are parsing a Korean language teaching document.
285
- Extract all vocabulary items and grammar rules.
286
-
287
- Return ONLY valid JSON in this exact structure:
288
- {
289
- "vocab": [
290
- {"korean": "학생", "english": "student", "type": "noun"}
291
- ],
292
- "grammar_rules": {
293
- "rule_id": {
294
- "id": "rule_id",
295
- "name": "Rule Name",
296
- "description": "What the rule does",
297
- "examples": [{"sentence": "...", "translation": "..."}],
298
- "difficulty": 1
299
- }
300
- },
301
- "lesson": "lesson name/number",
302
- "description": "brief description"
303
- }
304
-
305
- Types for vocab: noun, verb, adjective, pronoun, adverb, expression
306
- Grammar rule IDs should be snake_case."""
307
-
308
  response = client.models.generate_content(
309
- model="gemini-2.5-flash",
310
- contents=[
311
- parse_prompt,
312
- types.Part.from_bytes(data=file_bytes, mime_type=mime)
313
- ],
314
  )
315
-
316
  text = response.text.strip()
317
  if "```" in text:
318
  text = text.split("```")[1]
319
  if text.startswith("json"):
320
  text = text[4:]
321
-
322
- parsed = json.loads(text.strip())
323
- new_pack = replace_active_pack(parsed)
324
-
325
  emit('content_pack_loaded', {
326
- "success": True,
327
- "lesson": new_pack["lesson"],
328
  "vocab_count": len(new_pack["vocab"]),
329
- "grammar_rules": list(new_pack["grammar_rules"].keys()),
330
- "source": "gemini_parsed",
331
  })
332
-
333
  else:
334
- emit('content_pack_loaded', {"success": False, "error": f"Unsupported file type: {file_type}"})
335
-
336
  except Exception as e:
337
  logger.error(f"Content pack load error: {e}")
338
  emit('content_pack_loaded', {"success": False, "error": str(e)})
339
 
340
 
341
- # ===========================================================================
342
- # 2. QUESTION GENERATION
343
- # ===========================================================================
344
-
345
  @socketio.on('request_question')
346
  def handle_request_question(data):
347
  from flask import request as req
348
- sid = req.sid
 
349
  learner = get_learner(sid)
350
 
351
  if not learner:
@@ -354,19 +265,30 @@ def handle_request_question(data):
354
 
355
  try:
356
  forced_rule = data.get("grammar_rule") if data else None
357
- override_difficulty = data.get("difficulty") if data else None
358
  difficulty = override_difficulty or learner.difficulty
359
  grammar_rule = forced_rule or learner.get_recommended_rule()
 
360
 
361
- logger.info(f"🎯 Generating question: rule={grammar_rule} difficulty={difficulty} session={learner.session_id}")
 
 
 
 
 
 
 
 
 
 
 
362
 
 
363
  payload = question_gen.generate(
364
- difficulty=difficulty,
365
- grammar_rule=grammar_rule,
366
- history=learner.history,
367
- session_id=learner.session_id,
368
  )
369
-
370
  emit('question_payload', payload)
371
 
372
  except Exception as e:
@@ -374,10 +296,6 @@ def handle_request_question(data):
374
  emit('question_payload', {"error": "Could not generate question. Please try again."})
375
 
376
 
377
- # ===========================================================================
378
- # 3. ANSWER VALIDATION
379
- # ===========================================================================
380
-
381
  @socketio.on('submit_answer')
382
  def handle_submit_answer(data):
383
  from flask import request as req
@@ -391,18 +309,13 @@ def handle_submit_answer(data):
391
 
392
  try:
393
  correct = False
394
-
395
  if interaction_mode == "assemble":
396
- submitted = data.get("token_order", [])
397
- expected = data.get("correct_order", [])
398
- correct = rule_engine.validate_token_order(submitted, expected)
399
-
400
  elif interaction_mode in ("choose_select", "fill_in"):
401
  chosen = str(data.get("answer", "")).strip()
402
  answer_key = str(data.get("answer_key", "")).strip()
403
  word_tested = data.get("word_tested")
404
  particle_type = data.get("particle_type")
405
-
406
  if word_tested and particle_type:
407
  correct = rule_engine.validate_particle_choice(word_tested, chosen, particle_type)
408
  else:
@@ -420,13 +333,8 @@ def handle_submit_answer(data):
420
  if not correct:
421
  word = data.get("word_tested")
422
  ptype = data.get("particle_type")
423
- if word and ptype:
424
- hint = rule_engine.get_hint(word, ptype)
425
- else:
426
- hint = data.get("hint_text", "Review the grammar rule and try again")
427
-
428
- retry_allowed = not correct and attempt < 3
429
- speech_stage_unlocked = correct
430
 
431
  response = {
432
  "question_id": data.get("question_id"),
@@ -434,83 +342,70 @@ def handle_submit_answer(data):
434
  "score_delta": 10 if correct else 0,
435
  "feedback": _build_feedback(correct, q_type, grammar_rule),
436
  "hint": hint,
437
- "retry_allowed": retry_allowed,
438
  "attempt_number": attempt,
439
- "speech_stage_unlocked": speech_stage_unlocked,
440
  }
441
-
442
  if learner:
443
  response["mastery_update"] = dict(learner.mastery)
444
  response["streak"] = learner.streak
445
 
446
  emit('answer_result', response)
447
 
 
 
 
 
 
 
 
 
 
 
 
448
  except Exception as e:
449
  logger.error(f"Answer validation error: {e}")
450
- emit('answer_result', {
451
- "correct": False,
452
- "score_delta": 0,
453
- "feedback": "Server error during validation",
454
- "retry_allowed": True,
455
- })
456
 
457
 
458
- def _gemini_recheck(data: dict) -> bool:
459
  try:
460
- prompt = f"""You are a Korean language grammar validator.
461
-
462
- Direct speech: {data.get('direct_speech', '')}
463
- Student's indirect speech: {data.get('answer', '')}
464
- Expected indirect speech: {data.get('answer_key', '')}
465
-
466
- Is the student's answer grammatically correct as an indirect quotation?
467
- Consider: minor spacing differences are OK, but wrong particles or wrong verb endings are not.
468
-
469
- Reply with ONLY valid JSON: {{"correct": true}} or {{"correct": false, "reason": "explanation"}}"""
470
-
471
- response = client.models.generate_content(
472
- model="gemini-2.5-flash",
473
- contents=prompt,
474
- )
475
- result = json.loads(response.text.strip())
476
- return result.get("correct", False)
477
  except Exception as e:
478
  logger.warning(f"Gemini recheck failed: {e}")
479
  return False
480
 
481
 
482
- def _build_feedback(correct: bool, q_type: str, grammar_rule: str) -> str:
483
  if correct:
484
- messages = [
485
- "정확해요! Great job! 🎉",
486
- "맞아요! That's correct! ⭐",
487
- "완벽해요! Perfect! 🌟",
488
- "잘했어요! Well done! 👏",
489
- ]
490
  import random
491
- return random.choice(messages)
492
- else:
493
- rule_hints = {
494
- "topic_marker": "Remember: 은 for consonant endings, 는 for vowel endings",
495
- "copula": "Remember: 이에요 for consonant endings, 예요 for vowel endings",
496
- "negative_copula": "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
497
- "indirect_quote_dago": "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
498
- "indirect_quote_commands": "Review: (으)라고 commands, 지 말라고 negatives",
499
- "indirect_quote_questions": "Review: V/Adj+냐고 (drop ㄹ from stem)",
500
- "indirect_quote_suggestions": "Review: V+자고 for suggestions",
501
- "regret_expression": "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
502
- }
503
- base = "다시 해 보세요! Let's try again. "
504
- return base + rule_hints.get(grammar_rule, "Review the grammar rule.")
505
-
506
 
507
- # ===========================================================================
508
- # 4. PRONUNCIATION ASSESSMENT
509
- # ===========================================================================
510
 
511
  @socketio.on('assess_pronunciation')
512
  def handle_pronunciation(data):
513
  from flask import request as req
 
514
  sid = req.sid
515
  learner = get_learner(sid)
516
 
@@ -518,340 +413,159 @@ def handle_pronunciation(data):
518
  lang = data.get('lang', 'ko-KR')
519
  grammar_rule = data.get('grammar_rule', '')
520
 
521
- # ── STEP 1: Validate incoming payload ───────────────────────────────────
522
- logger.info("=" * 60)
523
- logger.info("🎤 [PRON] ── Pronunciation Assessment Start ──")
524
- logger.info(f"🎤 [PRON] ref_text='{ref_text}' | lang='{lang}' | grammar_rule='{grammar_rule}'")
525
 
526
- if not ref_text:
527
- logger.error("❌ [PRON] STEP 1 FAILED: No reference text in payload")
528
  emit('pronunciation_result', {
529
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
530
- "recognized_text": "", "word_details": [],
531
- "feedback": "No reference text provided.",
532
  })
533
- return
 
 
534
 
535
  audio_b64 = data.get('audio', '')
536
  if not audio_b64:
537
- logger.error("❌ [PRON] STEP 1 FAILED: No audio data in payload")
538
- emit('pronunciation_result', {
539
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
540
- "recognized_text": "", "word_details": [],
541
- "feedback": "No audio data received.",
542
- })
543
- return
544
 
545
- logger.info(f"🎤 [PRON] STEP 1 OK: audio_b64 length={len(audio_b64)} chars")
546
-
547
- # ── STEP 2: Validate Azure credentials ──────────────────────────────────
548
- logger.info("🎤 [PRON] STEP 2: Checking Azure credentials...")
549
- logger.info(f"🎤 [PRON] AZURE_SPEECH_KEY present={bool(AZURE_SPEECH_KEY)} | length={len(AZURE_SPEECH_KEY) if AZURE_SPEECH_KEY else 0}")
550
- logger.info(f"🎤 [PRON] AZURE_SPEECH_REGION='{AZURE_SPEECH_REGION}'")
551
-
552
- if not AZURE_SPEECH_KEY:
553
- logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_KEY env var not set")
554
- emit('pronunciation_result', {
555
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
556
- "recognized_text": "", "word_details": [],
557
- "feedback": "Azure Speech key not configured on server.",
558
- })
559
- return
560
-
561
- if not AZURE_SPEECH_REGION:
562
- logger.error("❌ [PRON] STEP 2 FAILED: AZURE_SPEECH_REGION env var not set")
563
- emit('pronunciation_result', {
564
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
565
- "recognized_text": "", "word_details": [],
566
- "feedback": "Azure Speech region not configured on server.",
567
- })
568
- return
569
-
570
- logger.info("🎤 [PRON] STEP 2 OK: Azure credentials present")
571
-
572
- raw_path = None
573
- clean_path = None
574
 
 
575
  try:
576
- # ── STEP 3: Decode base64 ────────────────────────────────────────────
577
- logger.info("🎤 [PRON] STEP 3: Decoding base64 audio...")
578
- try:
579
- if "," in audio_b64:
580
- header, audio_b64 = audio_b64.split(",", 1)
581
- logger.info(f"🎤 [PRON] Stripped data URI header: '{header[:60]}'")
582
- audio_bytes = base64.b64decode(audio_b64)
583
- logger.info(f"🎤 [PRON] STEP 3 OK: Decoded {len(audio_bytes)} bytes")
584
- except Exception as e:
585
- logger.error(f"❌ [PRON] STEP 3 FAILED: base64 decode error: {e}")
586
- raise
587
-
588
  if len(audio_bytes) < 100:
589
- logger.error(f"❌ [PRON] STEP 3: Audio too small ({len(audio_bytes)} bytes) — likely not real audio")
590
- raise Exception(f"Audio payload too small: {len(audio_bytes)} bytes")
591
-
592
- # ── STEP 4: Write temp file ──────────────────────────────────────────
593
- logger.info("🎤 [PRON] STEP 4: Writing to temp file...")
594
- with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
595
- temp_raw.write(audio_bytes)
596
- raw_path = temp_raw.name
597
- logger.info(f"🎤 [PRON] STEP 4 OK: Wrote {os.path.getsize(raw_path)} bytes → {raw_path}")
598
-
599
- # ── STEP 5: FFmpeg conversion ────────────────────────────────────────
600
- logger.info("🎤 [PRON] STEP 5: FFmpeg → 16kHz mono PCM WAV...")
601
  clean_path = sanitize_audio(raw_path)
 
602
  if not clean_path:
603
- logger.error(" [PRON] STEP 5 FAILED: sanitize_audio() returned None")
604
- raise Exception("Audio conversion failed (FFmpeg error — see logs above)")
605
- logger.info(f"🎤 [PRON] STEP 5 OK: clean WAV at {clean_path}")
606
-
607
- # ── STEP 6: WAV integrity check ──────────────────────────────────────
608
- logger.info("🎤 [PRON] STEP 6: WAV integrity + volume check...")
609
- audio_ok = analyze_audio_volume(clean_path)
610
- if not audio_ok:
611
- logger.warning("⚠️ [PRON] STEP 6: Audio silent — Azure will likely return NoMatch")
612
- else:
613
- logger.info("🎤 [PRON] STEP 6 OK: Audible signal confirmed")
614
-
615
- # ── STEP 7: Build Azure SpeechConfig ────────────────────────────────
616
- logger.info(f"🎤 [PRON] STEP 7: Building SpeechConfig (region={AZURE_SPEECH_REGION}, lang={lang})...")
617
- try:
618
- speech_config = speechsdk.SpeechConfig(
619
- subscription=AZURE_SPEECH_KEY,
620
- region=AZURE_SPEECH_REGION
621
- )
622
- speech_config.speech_recognition_language = lang
623
- logger.info("🎤 [PRON] STEP 7 OK: SpeechConfig built")
624
- except Exception as e:
625
- logger.error(f"❌ [PRON] STEP 7 FAILED: SpeechConfig error: {e}")
626
- raise
627
 
628
- # ��─ STEP 8: Build AudioConfig ────────────────────────────────────────
629
- logger.info(f"🎤 [PRON] STEP 8: Building AudioConfig from {clean_path}...")
630
- try:
631
- audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
632
- logger.info("🎤 [PRON] STEP 8 OK: AudioConfig built")
633
- except Exception as e:
634
- logger.error(f"❌ [PRON] STEP 8 FAILED: AudioConfig error: {e}")
635
- raise
636
 
637
- # ── STEP 9: Build PronunciationAssessmentConfig ──────────────────────
638
- logger.info(f"🎤 [PRON] STEP 9: PronunciationAssessmentConfig for '{ref_text}'...")
639
- try:
640
- pronunciation_config = speechsdk.PronunciationAssessmentConfig(
641
- reference_text=ref_text,
642
- grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
643
- granularity=speechsdk.PronunciationAssessmentGranularity.Word,
644
- enable_miscue=True
645
- )
646
- logger.info("🎤 [PRON] STEP 9 OK: PronunciationAssessmentConfig built")
647
- except Exception as e:
648
- logger.error(f"❌ [PRON] STEP 9 FAILED: PronunciationAssessmentConfig error: {e}")
649
- raise
650
 
651
- # ── STEP 10: Build SpeechRecognizer ─────────────────────────────────
652
- logger.info("🎤 [PRON] STEP 10: Building SpeechRecognizer...")
653
  try:
654
- recognizer = speechsdk.SpeechRecognizer(
655
- speech_config=speech_config,
656
- audio_config=audio_config
657
- )
658
- pronunciation_config.apply_to(recognizer)
659
- logger.info("🎤 [PRON] STEP 10 OK: SpeechRecognizer ready")
660
- except Exception as e:
661
- logger.error(f"❌ [PRON] STEP 10 FAILED: SpeechRecognizer error: {e}")
662
- raise
663
-
664
- # ── STEP 11: Call Azure in a REAL OS thread ──────────────────────────
665
- # recognize_once_async() uses native C++ threads internally.
666
- # Calling it directly in an eventlet greenthread causes silent Canceled
667
- # errors because eventlet has patched socket/ssl/threading under the SDK.
668
- # _azure_executor is a real ThreadPoolExecutor — unpatched OS threads.
669
- logger.info("🎤 [PRON] STEP 11: Submitting to real OS thread (bypassing eventlet)...")
670
- try:
671
- def _run_azure():
672
- return recognizer.recognize_once_async().get()
673
-
674
- future = _azure_executor.submit(_run_azure)
675
- result = future.result(timeout=30)
676
- logger.info("🎤 [PRON] STEP 11 OK: Azure responded from real OS thread")
677
- logger.info(f"🎤 [PRON] result.reason = {result.reason}")
678
- logger.info(f"🎤 [PRON] result.text = '{result.text}'")
679
  except concurrent.futures.TimeoutError:
680
- logger.error("❌ [PRON] STEP 11 FAILED: Azure timed out after 30s")
681
- raise Exception("Azure Speech timed out — check HF Space outbound network access")
682
- except Exception as e:
683
- logger.error(f"❌ [PRON] STEP 11 FAILED: Azure OS thread raised: {e}")
684
- raise
685
 
686
- # ── STEP 12: Parse Azure result ──────────────────────────────────────
687
- logger.info("🎤 [PRON] STEP 12: Parsing result...")
688
- response = {}
689
 
 
690
  if result.reason == speechsdk.ResultReason.RecognizedSpeech:
691
- logger.info("🎤 [PRON] STEP 12: RecognizedSpeech ✅")
692
- try:
693
- pron_result = speechsdk.PronunciationAssessmentResult(result)
694
- accuracy = pron_result.accuracy_score
695
- fluency = pron_result.fluency_score
696
- completeness = pron_result.completeness_score
697
- logger.info(f"🎤 [PRON] Scores → accuracy={accuracy:.1f} fluency={fluency:.1f} completeness={completeness:.1f}")
698
- except Exception as e:
699
- logger.error(f"❌ [PRON] STEP 12 FAILED: PronunciationAssessmentResult parse error: {e}")
700
- raise
701
-
702
- detailed_words = []
703
- for word in pron_result.words:
704
- w = {"word": word.word, "score": word.accuracy_score, "error": word.error_type}
705
- detailed_words.append(w)
706
- logger.info(f"🎤 [PRON] Word: '{word.word}' score={word.accuracy_score:.1f} error='{word.error_type}'")
707
-
708
- feedback = _build_pronunciation_feedback(accuracy, fluency, completeness, detailed_words, ref_text)
709
-
710
  response = {
711
- "success": True,
712
- "score": accuracy,
713
- "fluency": fluency,
714
- "completeness": completeness,
715
- "recognized_text": result.text,
716
- "word_details": detailed_words,
717
- "feedback": feedback,
718
- "question_id": data.get("question_id"),
719
  }
720
-
721
  if learner and grammar_rule and accuracy >= 70:
722
  learner.record_outcome(grammar_rule, True, "speak")
723
  response["mastery_update"] = dict(learner.mastery)
724
-
725
- logger.info(f"✅ [PRON] Complete — accuracy={accuracy:.1f}")
726
 
727
  elif result.reason == speechsdk.ResultReason.NoMatch:
728
- no_match_detail = getattr(result, 'no_match_details', 'N/A')
729
- logger.warning(f"⚠️ [PRON] STEP 12: NoMatch Azure heard nothing useful")
730
- logger.warning(f"⚠️ [PRON] NoMatch details: {no_match_detail}")
731
- response = {
732
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
733
- "recognized_text": "", "word_details": [],
734
- "feedback": "I couldn't hear you clearly. Check your microphone and try again.",
735
- }
736
 
737
  elif result.reason == speechsdk.ResultReason.Canceled:
738
- logger.error("❌ [PRON] STEP 12: Result reason = Canceled")
739
  try:
740
- cancellation = speechsdk.CancellationDetails(result)
741
-
742
- # Use getattr for all fields — older SDK versions may be missing some
743
- c_reason = getattr(cancellation, 'reason', 'UNKNOWN')
744
- c_code = getattr(cancellation, 'error_code', 'UNKNOWN')
745
- c_details = getattr(cancellation, 'error_details', 'UNKNOWN')
746
-
747
- logger.error(f"❌ [PRON] Cancellation reason: {c_reason}")
748
- logger.error(f" [PRON] Cancellation error_code: {c_code}")
749
- logger.error(f" [PRON] Cancellation error_details: {c_details}")
750
-
751
- # Dump every attribute on the object so nothing is hidden
752
- logger.error(f"❌ [PRON] CancellationDetails dir(): {[a for a in dir(cancellation) if not a.startswith('_')]}")
753
-
754
- # Also dump the raw result properties dict if available
755
- raw_props = getattr(result, 'properties', None)
756
- if raw_props:
757
- logger.error(f"❌ [PRON] result.properties: {dict(raw_props)}")
758
-
759
- details_str = str(c_details or '') + str(c_code or '')
760
- if "401" in details_str:
761
- logger.error("❌ [PRON] → HTTP 401: Azure key is INVALID or EXPIRED — check HF Space secrets")
762
- elif "403" in details_str:
763
- logger.error("❌ [PRON] → HTTP 403: Key does not have access — wrong region or feature not enabled")
764
- elif "ServiceUnavailable" in details_str or "503" in details_str:
765
- logger.error("❌ [PRON] → 503: Azure Speech service is temporarily unavailable")
766
- elif "connection" in details_str.lower() or "network" in details_str.lower():
767
- logger.error("❌ [PRON] → Network error: HF Space cannot reach Azure endpoint — check outbound access")
768
- elif "AuthenticationFailure" in details_str:
769
- logger.error("❌ [PRON] → AuthenticationFailure: Key/region mismatch — your key was provisioned in a different region than AZURE_SPEECH_REGION")
770
- elif details_str in ('', 'UNKNOWN'):
771
- logger.error("❌ [PRON] → Empty error details: likely a key/region mismatch. Verify AZURE_SPEECH_REGION matches the region shown in Azure portal for this key")
772
-
773
- response = {
774
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
775
- "recognized_text": "", "word_details": [],
776
- "feedback": f"Azure canceled: reason={c_reason} code={c_code} details={c_details}",
777
- }
778
- except Exception as parse_e:
779
- logger.error(f"❌ [PRON] Could not parse CancellationDetails at all: {parse_e}")
780
- # Last resort — try to get anything from the raw result
781
- logger.error(f"❌ [PRON] Raw result dir(): {[a for a in dir(result) if not a.startswith('_')]}")
782
- response = {
783
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
784
- "recognized_text": "", "word_details": [],
785
- "feedback": "Recognition was canceled by Azure — check server logs for details.",
786
- }
787
-
788
  else:
789
- logger.error(f"❌ [PRON] STEP 12: Unexpected result.reason = {result.reason}")
790
- response = {
791
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
792
- "recognized_text": "", "word_details": [],
793
- "feedback": f"Unexpected recognition result: {result.reason}",
794
- }
795
 
796
- logger.info("🎤 [PRON] ── Assessment End ──")
797
- logger.info("=" * 60)
798
  emit('pronunciation_result', response)
799
 
800
  except Exception as e:
801
  import traceback
802
- logger.error(f"❌ [PRON] UNHANDLED EXCEPTION: {type(e).__name__}: {e}")
803
- logger.error(f" [PRON] Traceback:\n{traceback.format_exc()}")
804
- emit('pronunciation_result', {
805
- "success": False, "score": 0, "fluency": 0, "completeness": 0,
806
- "recognized_text": "", "word_details": [],
807
- "feedback": "Server error during assessment.",
808
- })
809
  finally:
810
- if raw_path and os.path.exists(raw_path):
811
- os.remove(raw_path)
812
- logger.info(f"🧹 [PRON] Cleaned up: {raw_path}")
813
- if clean_path and os.path.exists(clean_path):
814
- os.remove(clean_path)
815
- logger.info(f"🧹 [PRON] Cleaned up: {clean_path}")
816
-
817
-
818
- def _build_pronunciation_feedback(accuracy: float, fluency: float,
819
- completeness: float, words: list,
820
- ref_text: str) -> str:
821
- issues = [w for w in words if w.get("error") not in (None, "None", "") or w.get("score", 100) < 60]
822
-
823
- if accuracy >= 85:
824
- base = "훌륭해요! Excellent pronunciation! 🌟"
825
- elif accuracy >= 70:
826
- base = "잘했어요! Good pronunciation! Keep practicing."
827
- elif accuracy >= 50:
828
- base = "괜찮아요! Not bad, but let's work on a few sounds."
829
- else:
830
- base = "다시 해 보세요! Let's practice this together."
831
-
832
  if issues:
833
- problem_words = [w["word"] for w in issues[:3]]
834
- base += f" Pay attention to: {', '.join(problem_words)}"
835
-
836
  if fluency < 60:
837
- base += " Try to speak more smoothly without pausing between words."
838
-
839
  return base
840
 
841
 
842
- # ===========================================================================
843
- # 5. MASTERY & SESSION MANAGEMENT
844
- # ===========================================================================
845
-
846
  @socketio.on('get_mastery')
847
  def handle_get_mastery(data):
848
  from flask import request as req
849
  learner = get_learner(req.sid)
850
-
851
  if not learner:
852
  emit('mastery_state', {"error": "No active session"})
853
  return
854
-
855
  emit('mastery_state', learner.get_state())
856
 
857
 
@@ -859,25 +573,17 @@ def handle_get_mastery(data):
859
  def handle_restore_session(data):
860
  from flask import request as req
861
  sid = req.sid
862
-
863
  try:
864
  learner_id = _socket_to_learner.get(sid)
865
  if not learner_id:
866
  emit('session_restored', {"success": False, "error": "No active socket session"})
867
  return
868
-
869
  learner = get_or_create_session(learner_id)
870
  learner.set_state(data)
871
- logger.info(f"♻️ Session restored for {learner_id}: difficulty={learner.difficulty}")
872
-
873
- emit('session_restored', {
874
- "success": True,
875
- "session_id": learner_id,
876
- "mastery": learner.mastery,
877
- "difficulty": learner.difficulty,
878
- "question_count": learner.question_count,
879
- })
880
-
881
  except Exception as e:
882
  logger.error(f"Session restore error: {e}")
883
  emit('session_restored', {"success": False, "error": str(e)})
@@ -888,15 +594,11 @@ def handle_reset_session(data):
888
  from flask import request as req
889
  sid = req.sid
890
  learner = get_learner(sid)
891
-
892
  if learner:
893
  learner.reset()
 
894
  logger.info(f"🔄 Session reset: {learner.session_id}")
895
- emit('session_reset', {
896
- "success": True,
897
- "mastery": learner.mastery,
898
- "difficulty": learner.difficulty,
899
- })
900
  else:
901
  emit('session_reset', {"success": False, "error": "No active session"})
902
 
@@ -905,159 +607,87 @@ def handle_reset_session(data):
905
  def handle_update_mastery(data):
906
  from flask import request as req
907
  learner = get_learner(req.sid)
908
-
909
  if not learner:
910
  emit('mastery_updated', {"error": "No active session"})
911
  return
912
-
913
  grammar_rule = data.get("grammar_rule", "")
914
  correct = data.get("correct", False)
915
  mode = data.get("interaction_mode", "")
916
-
917
  if grammar_rule:
918
  learner.record_outcome(grammar_rule, correct, mode)
 
919
 
920
- emit('mastery_updated', {
921
- "mastery": learner.mastery,
922
- "difficulty": learner.difficulty,
923
- "streak": learner.streak,
924
- })
925
-
926
-
927
- # ===========================================================================
928
- # 6. VISUAL RECOGNITION
929
- # ===========================================================================
930
 
931
  @socketio.on('verify_object')
932
  def handle_object_verification(data):
933
  target = data.get('target', 'magic wand')
934
- logger.info(f"👁️ Vision Request: Checking for '{target}'")
935
-
936
  try:
937
  pil_image = decode_image(data.get('image'))
938
  if not pil_image:
939
  emit('vision_result', {"verified": False, "feedback": "Could not decode image"})
940
  return
941
-
942
  img_byte_arr = io.BytesIO()
943
  pil_image.save(img_byte_arr, format='JPEG', quality=80)
944
- img_bytes = img_byte_arr.getvalue()
945
-
946
- schema = {
947
- "type": "OBJECT",
948
- "properties": {
949
- "verified": {"type": "BOOLEAN"},
950
- "confidence": {"type": "NUMBER"},
951
- "feedback": {"type": "STRING"}
952
- },
953
- "required": ["verified", "feedback"]
954
- }
955
-
956
- prompt = f"""You are the 'Eye of the Spellbook'.
957
- Look at this image. Is the user holding a '{target}'?
958
- IMPORTANT: Be lenient. If target is 'wand', accept a pen, pencil, chopstick, or stick.
959
- Return JSON matching the schema."""
960
-
961
  response = client.models.generate_content(
962
- model="gemini-2.5-flash",
963
- contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
964
- config=types.GenerateContentConfig(
965
- response_mime_type="application/json",
966
- response_schema=schema,
967
- temperature=0.1
968
- )
969
  )
970
-
971
- result = json.loads(response.text)
972
- logger.info(f"👁️ Vision Result: {result}")
973
- emit('vision_result', result)
974
-
975
  except Exception as e:
976
  logger.error(f"Vision Error: {e}")
977
  emit('vision_result', {"verified": False, "feedback": "The magic eye is clouded (Server Error)."})
978
 
979
 
980
- # ===========================================================================
981
- # 7. HANDWRITING / OCR
982
- # ===========================================================================
983
-
984
  @socketio.on('verify_writing')
985
  def handle_writing_verification(data):
986
  expected = data.get('expected_word', '')
987
- logger.info(f"📖 Handwriting Check: Expecting '{expected}'")
988
-
989
  try:
990
  pil_image = decode_image(data.get('image'))
991
  if not pil_image:
992
  emit('writing_result', {"correct": False, "detected_text": "Could not decode image"})
993
  return
994
-
995
  img_byte_arr = io.BytesIO()
996
  pil_image.save(img_byte_arr, format='JPEG', quality=80)
997
- img_bytes = img_byte_arr.getvalue()
998
-
999
- schema = {
1000
- "type": "OBJECT",
1001
- "properties": {
1002
- "correct": {"type": "BOOLEAN"},
1003
- "detected_text": {"type": "STRING"},
1004
- "feedback": {"type": "STRING"}
1005
- },
1006
- "required": ["correct", "detected_text"]
1007
- }
1008
-
1009
- prompt = f"""Read the handwriting in this image.
1010
- Does it spell '{expected}'?
1011
- Be lenient with stroke variation but strict about the actual characters.
1012
- Return JSON with: correct (bool), detected_text (what you read), feedback (brief comment)."""
1013
-
1014
  response = client.models.generate_content(
1015
- model="gemini-2.5-flash",
1016
- contents=[prompt, types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")],
1017
- config=types.GenerateContentConfig(
1018
- response_mime_type="application/json",
1019
- response_schema=schema,
1020
- )
1021
  )
1022
-
1023
- result = json.loads(response.text)
1024
- logger.info(f"📖 Writing Result: {result}")
1025
- emit('writing_result', result)
1026
-
1027
  except Exception as e:
1028
  logger.error(f"OCR Error: {e}")
1029
  emit('writing_result', {"correct": False, "detected_text": "Error", "feedback": "Server error"})
1030
 
1031
 
1032
- # ===========================================================================
1033
- # 8. GRAMMAR RULE INFO
1034
- # ===========================================================================
1035
-
1036
  @socketio.on('get_grammar_rules')
1037
  def handle_get_grammar_rules(data):
1038
  pack = get_active_pack()
1039
- emit('grammar_rules', {
1040
- "rules": pack.get("grammar_rules", {}),
1041
- "lesson": pack.get("lesson"),
1042
- })
1043
 
1044
 
1045
  @socketio.on('get_content_pack_info')
1046
  def handle_get_content_pack_info(data):
1047
  pack = get_active_pack()
1048
  emit('content_pack_info', {
1049
- "lesson": pack.get("lesson"),
1050
- "version": pack.get("version"),
1051
- "vocab_count": len(pack.get("vocab", [])),
1052
  "grammar_rules": list(pack.get("grammar_rules", {}).keys()),
1053
- "metadata": pack.get("metadata", {}),
1054
  })
1055
 
1056
 
1057
- # ===========================================================================
1058
- # ENTRY POINT
1059
- # ===========================================================================
1060
-
1061
  if __name__ == '__main__':
1062
  purge_stale_sessions()
1063
  logger.info("🚀 KLP AI Service starting on port 7860")
 
9
  import logging
10
  import uuid
11
  import sys
12
+ import time
13
 
14
  # ── CRITICAL: eventlet monkey_patch BEFORE all other imports ──────────────────
 
 
 
 
 
 
 
 
15
  import eventlet
16
  eventlet.monkey_patch()
17
 
18
  import concurrent.futures
19
+
20
+ _azure_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
21
+ _gemini_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
22
 
23
  import cv2
24
  import numpy as np
25
  from flask import Flask
26
  from flask_socketio import SocketIO, emit
 
27
  from PIL import Image
 
 
28
  from google import genai
29
  from google.genai import types
30
  import azure.cognitiveservices.speech as speechsdk
31
 
 
32
  from korean_rules import rule_engine
33
  from content_pack import get_active_pack, replace_active_pack
34
  from learner_model import get_or_create_session, get_session, delete_session, purge_stale_sessions
 
36
 
37
  sys.path.append(os.path.dirname(__file__))
38
 
39
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
40
  logger = logging.getLogger(__name__)
41
 
42
+ app = Flask(__name__)
43
  socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
44
 
45
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
46
+ AZURE_SPEECH_KEY = os.environ.get("AZURE_SPEECH_KEY")
 
47
  AZURE_SPEECH_REGION = os.environ.get("AZURE_SPEECH_REGION")
48
+ GEMINI_MODEL = "gemini-2.5-flash-lite"
49
 
 
50
  client = None
51
  try:
52
  client = genai.Client(api_key=GEMINI_API_KEY)
 
54
  except Exception as e:
55
  logger.error(f"❌ Failed to init Gemini: {e}")
56
 
57
+ _speech_config = None
58
+
59
+ def _build_speech_config():
60
+ if not AZURE_SPEECH_KEY or not AZURE_SPEECH_REGION:
61
+ return None
62
+ cfg = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
63
+ cfg.speech_recognition_language = "ko-KR"
64
+ return cfg
65
 
66
+ try:
67
+ _speech_config = _build_speech_config()
68
+ if _speech_config:
69
+ logger.info(f"✅ Azure SpeechConfig pre-built (region={AZURE_SPEECH_REGION})")
70
+ else:
71
+ logger.warning("⚠️ Azure SpeechConfig not built — credentials missing")
72
+ except Exception as e:
73
+ logger.error(f"❌ Azure SpeechConfig init failed: {e}")
74
 
75
+ question_gen = QuestionGenerator(gemini_client=client)
76
+ _socket_to_learner = {}
77
+ _question_cache = {}
78
+
79
+
80
+ def _prefetch_question(learner_id, grammar_rule, difficulty, history, session_id):
81
+ def _gen():
82
+ try:
83
+ return question_gen.generate(difficulty=difficulty, grammar_rule=grammar_rule,
84
+ history=history, session_id=session_id)
85
+ except Exception as e:
86
+ logger.warning(f"⚡ Prefetch failed: {e}")
87
+ return None
88
+ _question_cache[learner_id] = _gemini_executor.submit(_gen)
89
+ logger.info(f"⚡ Prefetch queued: rule={grammar_rule} diff={difficulty}")
90
 
 
 
 
91
 
92
  def decode_image(base64_string):
93
  try:
94
  if "," in base64_string:
95
  base64_string = base64_string.split(",")[1]
96
  img_bytes = base64.b64decode(base64_string)
97
+ np_arr = np.frombuffer(img_bytes, np.uint8)
98
+ frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
99
  return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
100
  except Exception as e:
101
  logger.error(f"Image Decode Error: {e}")
 
103
 
104
 
105
  def sanitize_audio(input_path):
 
 
 
 
106
  try:
107
  input_size = os.path.getsize(input_path)
108
+ except Exception:
109
+ logger.error("❌ [FFmpeg] Cannot stat input file")
110
+ return None
111
+ if input_size == 0:
112
+ logger.error("❌ [FFmpeg] Input file is empty")
113
+ return None
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ output_path = input_path + "_clean.wav"
116
+ command = ["ffmpeg", "-y", "-v", "error", "-i", input_path,
117
+ "-ac", "1", "-ar", "16000", "-acodec", "pcm_s16le", output_path]
118
  try:
119
  result = subprocess.run(command, check=True, capture_output=True, text=True)
 
 
 
 
120
  output_size = os.path.getsize(output_path)
 
121
  if output_size == 0:
122
+ logger.error("❌ [FFmpeg] Output WAV is empty")
123
  return None
124
+ logger.info(f"✅ [FFmpeg] {input_size}B → {output_size}B WAV")
125
+ if result.stderr:
126
+ logger.warning(f"[FFmpeg] stderr: {result.stderr[:500]}")
127
  return output_path
 
128
  except subprocess.CalledProcessError as e:
129
+ logger.error(f"❌ [FFmpeg] Failed (rc={e.returncode}): {e.stderr[:500]}")
 
 
130
  return None
131
  except FileNotFoundError:
132
+ logger.error("❌ [FFmpeg] ffmpeg not found")
133
  return None
134
  except Exception as e:
135
+ logger.error(f"❌ [FFmpeg] Unexpected: {e}")
136
  return None
137
 
138
 
139
+ def quick_audio_check(file_path):
 
140
  try:
141
  with wave.open(file_path, 'rb') as wf:
142
  framerate = wf.getframerate()
143
  nframes = wf.getnframes()
 
 
144
  duration_s = nframes / framerate if framerate else 0
145
+ logger.info(f"🔊 [WAV] {framerate}Hz | {duration_s:.2f}s | {nframes} frames")
 
 
 
 
 
146
  if duration_s < 0.2:
147
+ logger.warning(f"⚠️ [WAV] Very short ({duration_s:.2f}s)")
148
+ sample_frames = min(4096, nframes)
149
+ raw_sample = wf.readframes(sample_frames)
150
+ if not raw_sample:
 
 
 
 
 
 
 
 
151
  return False
152
+ samples = struct.unpack("%dh" % (len(raw_sample) // 2), raw_sample)
153
+ peak = max(abs(x) for x in samples)
154
+ logger.info(f"🔊 [WAV] Sample peak: {peak}/32767")
155
+ if peak < 100:
156
+ logger.warning("⚠️ [WAV] Audio appears silent")
 
 
157
  return False
 
 
 
158
  return True
 
 
 
 
159
  except Exception as e:
160
+ logger.warning(f"⚠️ [WAV] Check skipped: {e}")
161
+ return True
162
 
163
 
164
+ def get_learner(socket_sid):
 
165
  learner_id = _socket_to_learner.get(socket_sid)
166
  if learner_id:
167
  return get_session(learner_id)
168
  return None
169
 
170
 
 
 
 
 
171
  @socketio.on('connect')
172
  def handle_connect():
173
  from flask import request
174
+ sid = request.sid
175
  learner_id = str(uuid.uuid4())
176
  _socket_to_learner[sid] = learner_id
177
  model = get_or_create_session(learner_id)
178
  logger.info(f"✅ Client connected: socket={sid} learner={learner_id}")
 
179
  emit('session_ready', {
180
+ "session_id": learner_id, "message": "Connected to KLP AI Service",
181
+ "mastery": model.mastery, "difficulty": model.difficulty,
 
 
182
  "content_pack": {
183
+ "lesson": get_active_pack().get("lesson"),
184
+ "version": get_active_pack().get("version"),
185
  "vocab_count": len(get_active_pack().get("vocab", [])),
186
  }
187
  })
 
190
  @socketio.on('disconnect')
191
  def handle_disconnect():
192
  from flask import request
193
+ sid = request.sid
194
  learner_id = _socket_to_learner.pop(sid, None)
195
  if learner_id:
196
+ _question_cache.pop(learner_id, None)
197
  logger.info(f"Client disconnected: socket={sid} learner={learner_id}")
198
  else:
199
  logger.info(f"Client disconnected: socket={sid}")
200
 
201
 
 
 
 
 
202
  @socketio.on('load_content_pack')
203
  def handle_load_content_pack(data):
204
  logger.info("📦 Content pack upload received")
 
205
  try:
206
  file_type = data.get("file_type", "json").lower()
207
  file_b64 = data.get("file_bytes", "")
208
  lesson = data.get("lesson", "custom")
209
  description = data.get("description", "Custom content pack")
 
210
  if "," in file_b64:
211
  file_b64 = file_b64.split(",")[1]
212
  file_bytes = base64.b64decode(file_b64)
213
 
214
  if file_type == "json":
215
+ raw = json.loads(file_bytes.decode("utf-8"))
216
+ new_pack = replace_active_pack({**raw, "lesson": lesson, "description": description})
 
 
 
 
217
  emit('content_pack_loaded', {
218
+ "success": True, "lesson": new_pack["lesson"],
 
219
  "vocab_count": len(new_pack["vocab"]),
220
+ "grammar_rules": list(new_pack["grammar_rules"].keys()), "source": "json_upload",
 
221
  })
 
222
  elif file_type in ("docx", "pdf"):
223
  if not client:
224
  emit('content_pack_loaded', {"success": False, "error": "Gemini not available"})
225
  return
226
+ mime = ("application/pdf" if file_type == "pdf"
227
+ else "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
228
+ parse_prompt = ('You are parsing a Korean language teaching document. '
229
+ 'Return ONLY valid JSON: {"vocab": [{"korean":"...","english":"...","type":"noun"}], '
230
+ '"grammar_rules": {"rule_id": {"id":"...","name":"...","description":"...",'
231
+ '"examples":[{"sentence":"...","translation":"..."}],"difficulty":1}}, '
232
+ '"lesson": "...", "description": "..."}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  response = client.models.generate_content(
234
+ model=GEMINI_MODEL,
235
+ contents=[parse_prompt, types.Part.from_bytes(data=file_bytes, mime_type=mime)],
 
 
 
236
  )
 
237
  text = response.text.strip()
238
  if "```" in text:
239
  text = text.split("```")[1]
240
  if text.startswith("json"):
241
  text = text[4:]
242
+ new_pack = replace_active_pack(json.loads(text.strip()))
 
 
 
243
  emit('content_pack_loaded', {
244
+ "success": True, "lesson": new_pack["lesson"],
 
245
  "vocab_count": len(new_pack["vocab"]),
246
+ "grammar_rules": list(new_pack["grammar_rules"].keys()), "source": "gemini_parsed",
 
247
  })
 
248
  else:
249
+ emit('content_pack_loaded', {"success": False, "error": f"Unsupported: {file_type}"})
 
250
  except Exception as e:
251
  logger.error(f"Content pack load error: {e}")
252
  emit('content_pack_loaded', {"success": False, "error": str(e)})
253
 
254
 
 
 
 
 
255
  @socketio.on('request_question')
256
  def handle_request_question(data):
257
  from flask import request as req
258
+ t0 = time.monotonic()
259
+ sid = req.sid
260
  learner = get_learner(sid)
261
 
262
  if not learner:
 
265
 
266
  try:
267
  forced_rule = data.get("grammar_rule") if data else None
268
+ override_difficulty = data.get("difficulty") if data else None
269
  difficulty = override_difficulty or learner.difficulty
270
  grammar_rule = forced_rule or learner.get_recommended_rule()
271
+ learner_id = _socket_to_learner.get(sid)
272
 
273
+ cached_future = _question_cache.pop(learner_id, None) if learner_id else None
274
+ if cached_future is not None:
275
+ try:
276
+ payload = cached_future.result(timeout=6)
277
+ if payload:
278
+ logger.info(f"⚡ Cache HIT — {(time.monotonic()-t0)*1000:.0f}ms")
279
+ emit('question_payload', payload)
280
+ return
281
+ except concurrent.futures.TimeoutError:
282
+ logger.warning("⚡ Cache future timed out — live generation")
283
+ except Exception as e:
284
+ logger.warning(f"⚡ Cache future errored: {e} — live generation")
285
 
286
+ logger.info(f"🎯 Generating: rule={grammar_rule} diff={difficulty}")
287
  payload = question_gen.generate(
288
+ difficulty=difficulty, grammar_rule=grammar_rule,
289
+ history=learner.history, session_id=learner.session_id,
 
 
290
  )
291
+ logger.info(f"🎯 Generated in {(time.monotonic()-t0)*1000:.0f}ms")
292
  emit('question_payload', payload)
293
 
294
  except Exception as e:
 
296
  emit('question_payload', {"error": "Could not generate question. Please try again."})
297
 
298
 
 
 
 
 
299
  @socketio.on('submit_answer')
300
  def handle_submit_answer(data):
301
  from flask import request as req
 
309
 
310
  try:
311
  correct = False
 
312
  if interaction_mode == "assemble":
313
+ correct = rule_engine.validate_token_order(data.get("token_order", []), data.get("correct_order", []))
 
 
 
314
  elif interaction_mode in ("choose_select", "fill_in"):
315
  chosen = str(data.get("answer", "")).strip()
316
  answer_key = str(data.get("answer_key", "")).strip()
317
  word_tested = data.get("word_tested")
318
  particle_type = data.get("particle_type")
 
319
  if word_tested and particle_type:
320
  correct = rule_engine.validate_particle_choice(word_tested, chosen, particle_type)
321
  else:
 
333
  if not correct:
334
  word = data.get("word_tested")
335
  ptype = data.get("particle_type")
336
+ hint = (rule_engine.get_hint(word, ptype) if (word and ptype)
337
+ else data.get("hint_text", "Review the grammar rule and try again"))
 
 
 
 
 
338
 
339
  response = {
340
  "question_id": data.get("question_id"),
 
342
  "score_delta": 10 if correct else 0,
343
  "feedback": _build_feedback(correct, q_type, grammar_rule),
344
  "hint": hint,
345
+ "retry_allowed": not correct and attempt < 3,
346
  "attempt_number": attempt,
347
+ "speech_stage_unlocked": correct,
348
  }
 
349
  if learner:
350
  response["mastery_update"] = dict(learner.mastery)
351
  response["streak"] = learner.streak
352
 
353
  emit('answer_result', response)
354
 
355
+ if correct and learner:
356
+ learner_id = _socket_to_learner.get(sid)
357
+ if learner_id:
358
+ _prefetch_question(
359
+ learner_id=learner_id,
360
+ grammar_rule=learner.get_recommended_rule(),
361
+ difficulty=learner.difficulty,
362
+ history=list(learner.history),
363
+ session_id=learner.session_id,
364
+ )
365
+
366
  except Exception as e:
367
  logger.error(f"Answer validation error: {e}")
368
+ emit('answer_result', {"correct": False, "score_delta": 0,
369
+ "feedback": "Server error during validation", "retry_allowed": True})
 
 
 
 
370
 
371
 
372
+ def _gemini_recheck(data):
373
  try:
374
+ prompt = (f"You are a Korean grammar validator.\n"
375
+ f"Direct speech: {data.get('direct_speech','')}\n"
376
+ f"Student answer: {data.get('answer','')}\n"
377
+ f"Expected: {data.get('answer_key','')}\n"
378
+ f"Is the student correct as indirect quotation? Minor spacing OK, wrong particles/endings not.\n"
379
+ f'Reply ONLY: {{"correct": true}} or {{"correct": false, "reason": "..."}}')
380
+ response = client.models.generate_content(model=GEMINI_MODEL, contents=prompt)
381
+ return json.loads(response.text.strip()).get("correct", False)
 
 
 
 
 
 
 
 
 
382
  except Exception as e:
383
  logger.warning(f"Gemini recheck failed: {e}")
384
  return False
385
 
386
 
387
+ def _build_feedback(correct, q_type, grammar_rule):
388
  if correct:
 
 
 
 
 
 
389
  import random
390
+ return random.choice(["정확해요! Great job! 🎉","맞아요! That's correct! ⭐",
391
+ "완벽해요! Perfect! 🌟","잘했어요! Well done! 👏"])
392
+ rule_hints = {
393
+ "topic_marker": "Remember: 은 for consonant endings, 는 for vowel endings",
394
+ "copula": "Remember: 이에요 for consonant endings, 예요 for vowel endings",
395
+ "negative_copula": "Remember: 이 아니에요 for consonant, 가 아니에요 for vowel/ㄹ",
396
+ "indirect_quote_dago": "Review: V+는다고/ㄴ다고, Adj+다고, Past+었다고",
397
+ "indirect_quote_commands": "Review: (으)라고 commands, 지 말라고 negatives",
398
+ "indirect_quote_questions": "Review: V/Adj+냐고 (drop ㄹ from stem)",
399
+ "indirect_quote_suggestions": "Review: V+자고 for suggestions",
400
+ "regret_expression": "Review: (으)ㄹ 걸 그랬다 = should have; 지 말 걸 = shouldn't have",
401
+ }
402
+ return "다시 해 보세요! Let's try again. " + rule_hints.get(grammar_rule, "Review the grammar rule.")
 
 
403
 
 
 
 
404
 
405
  @socketio.on('assess_pronunciation')
406
  def handle_pronunciation(data):
407
  from flask import request as req
408
+ t0 = time.monotonic()
409
  sid = req.sid
410
  learner = get_learner(sid)
411
 
 
413
  lang = data.get('lang', 'ko-KR')
414
  grammar_rule = data.get('grammar_rule', '')
415
 
416
+ logger.info("=" * 56)
417
+ logger.info(f"🎤 [PRON] Start | text='{ref_text}' lang='{lang}'")
 
 
418
 
419
+ def _fail(msg):
 
420
  emit('pronunciation_result', {
421
  "success": False, "score": 0, "fluency": 0, "completeness": 0,
422
+ "recognized_text": "", "word_details": [], "feedback": msg,
 
423
  })
424
+
425
+ if not ref_text:
426
+ return _fail("No reference text provided.")
427
 
428
  audio_b64 = data.get('audio', '')
429
  if not audio_b64:
430
+ return _fail("No audio data received.")
 
 
 
 
 
 
431
 
432
+ if not AZURE_SPEECH_KEY or not AZURE_SPEECH_REGION:
433
+ logger.error("❌ [PRON] Azure credentials missing")
434
+ return _fail("Azure Speech not configured on server.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
+ raw_path = clean_path = None
437
  try:
438
+ if "," in audio_b64:
439
+ _, audio_b64 = audio_b64.split(",", 1)
440
+ audio_bytes = base64.b64decode(audio_b64)
441
+ logger.info(f"🎤 [PRON] Payload: {len(audio_bytes)} bytes")
 
 
 
 
 
 
 
 
442
  if len(audio_bytes) < 100:
443
+ raise Exception(f"Audio too small: {len(audio_bytes)} bytes")
444
+
445
+ with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
446
+ f.write(audio_bytes)
447
+ raw_path = f.name
448
+
449
+ t_ffmpeg = time.monotonic()
 
 
 
 
 
450
  clean_path = sanitize_audio(raw_path)
451
+ logger.info(f"🎤 [PRON] FFmpeg: {(time.monotonic()-t_ffmpeg)*1000:.0f}ms")
452
  if not clean_path:
453
+ raise Exception("Audio conversion failed")
454
+
455
+ quick_audio_check(clean_path)
456
+
457
+ speech_cfg = _speech_config or _build_speech_config()
458
+ if not speech_cfg:
459
+ raise Exception("Azure SpeechConfig unavailable — check HF Space secrets")
460
+ if lang != "ko-KR":
461
+ speech_cfg.speech_recognition_language = lang
462
+
463
+ audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
464
+ pron_config = speechsdk.PronunciationAssessmentConfig(
465
+ reference_text=ref_text,
466
+ grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
467
+ granularity=speechsdk.PronunciationAssessmentGranularity.Word,
468
+ enable_miscue=True
469
+ )
470
+ recognizer = speechsdk.SpeechRecognizer(speech_config=speech_cfg, audio_config=audio_config)
471
+ pron_config.apply_to(recognizer)
 
 
 
 
 
472
 
473
+ t_azure = time.monotonic()
474
+ logger.info("🎤 [PRON] Submitting to Azure OS thread...")
 
 
 
 
 
 
475
 
476
+ def _run_azure():
477
+ return recognizer.recognize_once_async().get()
 
 
 
 
 
 
 
 
 
 
 
478
 
 
 
479
  try:
480
+ result = _azure_executor.submit(_run_azure).result(timeout=30)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  except concurrent.futures.TimeoutError:
482
+ raise Exception("Azure Speech timed out after 30s")
 
 
 
 
483
 
484
+ logger.info(f"🎤 [PRON] Azure: {(time.monotonic()-t_azure)*1000:.0f}ms | reason={result.reason}")
 
 
485
 
486
+ response = {}
487
  if result.reason == speechsdk.ResultReason.RecognizedSpeech:
488
+ pr = speechsdk.PronunciationAssessmentResult(result)
489
+ accuracy = pr.accuracy_score
490
+ fluency = pr.fluency_score
491
+ completeness = pr.completeness_score
492
+ words = [{"word": w.word, "score": w.accuracy_score, "error": w.error_type}
493
+ for w in pr.words]
494
+ for w in words:
495
+ logger.info(f"🎤 [PRON] Word: '{w['word']}' score={w['score']:.1f} error='{w['error']}'")
 
 
 
 
 
 
 
 
 
 
 
496
  response = {
497
+ "success": True, "score": accuracy, "fluency": fluency,
498
+ "completeness": completeness, "recognized_text": result.text,
499
+ "word_details": words,
500
+ "feedback": _build_pronunciation_feedback(accuracy, fluency, completeness, words),
501
+ "question_id": data.get("question_id"),
 
 
 
502
  }
 
503
  if learner and grammar_rule and accuracy >= 70:
504
  learner.record_outcome(grammar_rule, True, "speak")
505
  response["mastery_update"] = dict(learner.mastery)
506
+ logger.info(f"✅ [PRON] acc={accuracy:.1f} flu={fluency:.1f} comp={completeness:.1f}")
 
507
 
508
  elif result.reason == speechsdk.ResultReason.NoMatch:
509
+ logger.warning("⚠️ [PRON] NoMatch")
510
+ response = {"success": False, "score": 0, "fluency": 0, "completeness": 0,
511
+ "recognized_text": "", "word_details": [],
512
+ "feedback": "I couldn't hear you clearly. Check your microphone and try again."}
 
 
 
 
513
 
514
  elif result.reason == speechsdk.ResultReason.Canceled:
 
515
  try:
516
+ c = speechsdk.CancellationDetails(result)
517
+ ds = str(getattr(c,'error_details','')) + str(getattr(c,'error_code',''))
518
+ logger.error(f"❌ [PRON] Canceled: {ds}")
519
+ if "401" in ds: logger.error("❌ [PRON] → Key INVALID or EXPIRED")
520
+ elif "403" in ds: logger.error("❌ [PRON] → Key lacks access")
521
+ elif "AuthenticationFailure" in ds: logger.error("❌ [PRON] → Key/region mismatch")
522
+ except Exception:
523
+ pass
524
+ response = {"success": False, "score": 0, "fluency": 0, "completeness": 0,
525
+ "recognized_text": "", "word_details": [],
526
+ "feedback": "Recognition canceled — check server logs."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  else:
528
+ response = {"success": False, "score": 0, "fluency": 0, "completeness": 0,
529
+ "recognized_text": "", "word_details": [],
530
+ "feedback": f"Unexpected result: {result.reason}"}
 
 
 
531
 
532
+ logger.info(f"🎤 [PRON] Total: {(time.monotonic()-t0)*1000:.0f}ms")
533
+ logger.info("=" * 56)
534
  emit('pronunciation_result', response)
535
 
536
  except Exception as e:
537
  import traceback
538
+ logger.error(f"❌ [PRON] Unhandled: {type(e).__name__}: {e}\n{traceback.format_exc()}")
539
+ emit('pronunciation_result', {"success": False, "score": 0, "fluency": 0, "completeness": 0,
540
+ "recognized_text": "", "word_details": [],
541
+ "feedback": "Server error during assessment."})
 
 
 
542
  finally:
543
+ for p in [raw_path, clean_path]:
544
+ if p and os.path.exists(p):
545
+ try: os.remove(p)
546
+ except Exception: pass
547
+
548
+
549
+ def _build_pronunciation_feedback(accuracy, fluency, completeness, words):
550
+ if accuracy >= 85: base = "훌륭해요! Excellent pronunciation! 🌟"
551
+ elif accuracy >= 70: base = "잘했어요! Good pronunciation! Keep practicing."
552
+ elif accuracy >= 50: base = "괜찮아요! Not bad, let's work on a few sounds."
553
+ else: base = "다시 해 보세���! Let's practice this together."
554
+ issues = [w for w in words if w.get("error") not in (None,"None","") or w.get("score",100) < 60]
 
 
 
 
 
 
 
 
 
 
555
  if issues:
556
+ base += f" Pay attention to: {', '.join(w['word'] for w in issues[:3])}"
 
 
557
  if fluency < 60:
558
+ base += " Try to speak more smoothly without pausing."
 
559
  return base
560
 
561
 
 
 
 
 
562
  @socketio.on('get_mastery')
563
  def handle_get_mastery(data):
564
  from flask import request as req
565
  learner = get_learner(req.sid)
 
566
  if not learner:
567
  emit('mastery_state', {"error": "No active session"})
568
  return
 
569
  emit('mastery_state', learner.get_state())
570
 
571
 
 
573
  def handle_restore_session(data):
574
  from flask import request as req
575
  sid = req.sid
 
576
  try:
577
  learner_id = _socket_to_learner.get(sid)
578
  if not learner_id:
579
  emit('session_restored', {"success": False, "error": "No active socket session"})
580
  return
 
581
  learner = get_or_create_session(learner_id)
582
  learner.set_state(data)
583
+ logger.info(f"♻️ Session restored: {learner_id}")
584
+ emit('session_restored', {"success": True, "session_id": learner_id,
585
+ "mastery": learner.mastery, "difficulty": learner.difficulty,
586
+ "question_count": learner.question_count})
 
 
 
 
 
 
587
  except Exception as e:
588
  logger.error(f"Session restore error: {e}")
589
  emit('session_restored', {"success": False, "error": str(e)})
 
594
  from flask import request as req
595
  sid = req.sid
596
  learner = get_learner(sid)
 
597
  if learner:
598
  learner.reset()
599
+ _question_cache.pop(_socket_to_learner.get(sid), None)
600
  logger.info(f"🔄 Session reset: {learner.session_id}")
601
+ emit('session_reset', {"success": True, "mastery": learner.mastery, "difficulty": learner.difficulty})
 
 
 
 
602
  else:
603
  emit('session_reset', {"success": False, "error": "No active session"})
604
 
 
607
  def handle_update_mastery(data):
608
  from flask import request as req
609
  learner = get_learner(req.sid)
 
610
  if not learner:
611
  emit('mastery_updated', {"error": "No active session"})
612
  return
 
613
  grammar_rule = data.get("grammar_rule", "")
614
  correct = data.get("correct", False)
615
  mode = data.get("interaction_mode", "")
 
616
  if grammar_rule:
617
  learner.record_outcome(grammar_rule, correct, mode)
618
+ emit('mastery_updated', {"mastery": learner.mastery, "difficulty": learner.difficulty, "streak": learner.streak})
619
 
 
 
 
 
 
 
 
 
 
 
620
 
621
  @socketio.on('verify_object')
622
  def handle_object_verification(data):
623
  target = data.get('target', 'magic wand')
624
+ logger.info(f"👁️ Vision: '{target}'")
 
625
  try:
626
  pil_image = decode_image(data.get('image'))
627
  if not pil_image:
628
  emit('vision_result', {"verified": False, "feedback": "Could not decode image"})
629
  return
 
630
  img_byte_arr = io.BytesIO()
631
  pil_image.save(img_byte_arr, format='JPEG', quality=80)
632
+ schema = {"type":"OBJECT","properties":{"verified":{"type":"BOOLEAN"},
633
+ "confidence":{"type":"NUMBER"},"feedback":{"type":"STRING"}},
634
+ "required":["verified","feedback"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  response = client.models.generate_content(
636
+ model=GEMINI_MODEL,
637
+ contents=[f"Eye of the Spellbook: is user holding '{target}'? Lenient — pen/pencil/stick OK for wand. Return JSON.",
638
+ types.Part.from_bytes(data=img_byte_arr.getvalue(), mime_type="image/jpeg")],
639
+ config=types.GenerateContentConfig(response_mime_type="application/json",
640
+ response_schema=schema, temperature=0.1)
 
 
641
  )
642
+ emit('vision_result', json.loads(response.text))
 
 
 
 
643
  except Exception as e:
644
  logger.error(f"Vision Error: {e}")
645
  emit('vision_result', {"verified": False, "feedback": "The magic eye is clouded (Server Error)."})
646
 
647
 
 
 
 
 
648
  @socketio.on('verify_writing')
649
  def handle_writing_verification(data):
650
  expected = data.get('expected_word', '')
651
+ logger.info(f"📖 Handwriting: '{expected}'")
 
652
  try:
653
  pil_image = decode_image(data.get('image'))
654
  if not pil_image:
655
  emit('writing_result', {"correct": False, "detected_text": "Could not decode image"})
656
  return
 
657
  img_byte_arr = io.BytesIO()
658
  pil_image.save(img_byte_arr, format='JPEG', quality=80)
659
+ schema = {"type":"OBJECT","properties":{"correct":{"type":"BOOLEAN"},
660
+ "detected_text":{"type":"STRING"},"feedback":{"type":"STRING"}},
661
+ "required":["correct","detected_text"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  response = client.models.generate_content(
663
+ model=GEMINI_MODEL,
664
+ contents=[f"Read handwriting. Does it spell '{expected}'? Lenient on strokes, strict on characters. Return JSON.",
665
+ types.Part.from_bytes(data=img_byte_arr.getvalue(), mime_type="image/jpeg")],
666
+ config=types.GenerateContentConfig(response_mime_type="application/json", response_schema=schema)
 
 
667
  )
668
+ emit('writing_result', json.loads(response.text))
 
 
 
 
669
  except Exception as e:
670
  logger.error(f"OCR Error: {e}")
671
  emit('writing_result', {"correct": False, "detected_text": "Error", "feedback": "Server error"})
672
 
673
 
 
 
 
 
674
  @socketio.on('get_grammar_rules')
675
  def handle_get_grammar_rules(data):
676
  pack = get_active_pack()
677
+ emit('grammar_rules', {"rules": pack.get("grammar_rules", {}), "lesson": pack.get("lesson")})
 
 
 
678
 
679
 
680
  @socketio.on('get_content_pack_info')
681
  def handle_get_content_pack_info(data):
682
  pack = get_active_pack()
683
  emit('content_pack_info', {
684
+ "lesson": pack.get("lesson"), "version": pack.get("version"),
685
+ "vocab_count": len(pack.get("vocab", [])),
 
686
  "grammar_rules": list(pack.get("grammar_rules", {}).keys()),
687
+ "metadata": pack.get("metadata", {}),
688
  })
689
 
690
 
 
 
 
 
691
  if __name__ == '__main__':
692
  purge_stale_sessions()
693
  logger.info("🚀 KLP AI Service starting on port 7860")