KoreAI-API

Sleeping

App Files Files Community

rairo commited on Dec 3, 2025

Commit

a77dd77

verified ·

1 Parent(s): a251e69

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -30

app.py CHANGED Viewed

@@ -184,81 +184,78 @@ def handle_pronunciation(data):
     clean_path = None
     try:
-        # 1. Decode Base64
         audio_b64 = data.get('audio')
         if "," in audio_b64:
             audio_b64 = audio_b64.split(",")[1]
         audio_bytes = base64.b64decode(audio_b64)
-        # Save as .webm initially because browsers usually send WebM/Opus inside the blob
-        # even if they claim it's wav. FFmpeg will handle the detection.
         with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
             temp_raw.write(audio_bytes)
             raw_path = temp_raw.name
-        logger.info(f"💾 Saved raw audio: {len(audio_bytes)} bytes")
-        # 2. Sanitize (FFmpeg Conversion)
         clean_path = sanitize_audio(raw_path)
-        if not clean_path:
-            raise Exception("Audio conversion failed")
-        # 3. Check Volume
-        analyze_audio_volume(clean_path)
-        # 4. Azure Speech Config
         speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
         speech_config.speech_recognition_language = lang
         audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
         pronunciation_config = speechsdk.PronunciationAssessmentConfig(
             reference_text=ref_text,
             grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
-            granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme,
             enable_miscue=True
         )
         recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
         pronunciation_config.apply_to(recognizer)
-        # 5. Recognize
-        logger.info("☁️ Sending to Azure...")
         result = recognizer.recognize_once_async().get()
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
             pron_result = speechsdk.PronunciationAssessmentResult(result)
             response = {
                 "success": True,
                 "score": pron_result.accuracy_score,
                 "fluency": pron_result.fluency_score,
-                "recognized_text": result.text
             }
-            logger.info(f"✅ Score: {pron_result.accuracy_score} | Text: {result.text}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
-            logger.warning("❌ Azure: No Match (Silence/Noise)")
             response = {"success": False, "score": 0, "recognized_text": "I couldn't hear you clearly."}
-        elif result.reason == speechsdk.ResultReason.Canceled:
-            cancellation = result.cancellation_details
-            logger.error(f"❌ Azure Canceled: {cancellation.reason} | {cancellation.error_details}")
-            response = {"success": False, "score": 0, "recognized_text": "The spell fizzled (API Error)."}
         emit('pronunciation_result', response)
     except Exception as e:
         logger.error(f"Audio Exception: {e}")
-        emit('pronunciation_result', {"success": False, "score": 0, "recognized_text": "Magical interference (Server Error)."})
     finally:
-        # Cleanup files
-        if raw_path and os.path.exists(raw_path):
-            os.remove(raw_path)
-        if clean_path and os.path.exists(clean_path):
-            os.remove(clean_path)
 # ==========================================

     clean_path = None
     try:
+        # 1. Decode and Save
         audio_b64 = data.get('audio')
         if "," in audio_b64:
             audio_b64 = audio_b64.split(",")[1]
         audio_bytes = base64.b64decode(audio_b64)
         with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
             temp_raw.write(audio_bytes)
             raw_path = temp_raw.name
+        # 2. Sanitize
         clean_path = sanitize_audio(raw_path)
+        if not clean_path: raise Exception("Audio conversion failed")
+        # 3. Configure Azure
         speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
         speech_config.speech_recognition_language = lang
         audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
+        # Enable granular details
         pronunciation_config = speechsdk.PronunciationAssessmentConfig(
             reference_text=ref_text,
             grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
+            granularity=speechsdk.PronunciationAssessmentGranularity.Word, # Get Word-level details
             enable_miscue=True
         )
         recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
         pronunciation_config.apply_to(recognizer)
+        # 4. Recognize
         result = recognizer.recognize_once_async().get()
         response = {}
         if result.reason == speechsdk.ResultReason.RecognizedSpeech:
             pron_result = speechsdk.PronunciationAssessmentResult(result)
+            # --- EXTRACT WORD DETAILS ---
+            detailed_words = []
+            for word in pron_result.words:
+                detailed_words.append({
+                    "word": word.word,
+                    "score": word.accuracy_score,
+                    "error": word.error_type # 'None', 'Omission', 'Insertion', 'Mispronunciation'
+                })
+            # ---------------------------
             response = {
                 "success": True,
                 "score": pron_result.accuracy_score,
                 "fluency": pron_result.fluency_score,
+                "completeness": pron_result.completeness_score,
+                "recognized_text": result.text,
+                "word_details": detailed_words # Send this array to UI
             }
+            logger.info(f"✅ Score: {pron_result.accuracy_score}")
         elif result.reason == speechsdk.ResultReason.NoMatch:
             response = {"success": False, "score": 0, "recognized_text": "I couldn't hear you clearly."}
+        else:
+            response = {"success": False, "score": 0, "recognized_text": "Error during recognition."}
         emit('pronunciation_result', response)
     except Exception as e:
         logger.error(f"Audio Exception: {e}")
+        emit('pronunciation_result', {"success": False, "score": 0, "recognized_text": "Server Error"})
     finally:
+        if raw_path and os.path.exists(raw_path): os.remove(raw_path)
+        if clean_path and os.path.exists(clean_path): os.remove(clean_path)
 # ==========================================