rairo commited on
Commit
a77dd77
·
verified ·
1 Parent(s): a251e69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -30
app.py CHANGED
@@ -184,81 +184,78 @@ def handle_pronunciation(data):
184
  clean_path = None
185
 
186
  try:
187
- # 1. Decode Base64
188
  audio_b64 = data.get('audio')
189
  if "," in audio_b64:
190
  audio_b64 = audio_b64.split(",")[1]
191
-
192
  audio_bytes = base64.b64decode(audio_b64)
193
 
194
- # Save as .webm initially because browsers usually send WebM/Opus inside the blob
195
- # even if they claim it's wav. FFmpeg will handle the detection.
196
  with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
197
  temp_raw.write(audio_bytes)
198
  raw_path = temp_raw.name
199
-
200
- logger.info(f"💾 Saved raw audio: {len(audio_bytes)} bytes")
201
 
202
- # 2. Sanitize (FFmpeg Conversion)
203
  clean_path = sanitize_audio(raw_path)
204
-
205
- if not clean_path:
206
- raise Exception("Audio conversion failed")
207
 
208
- # 3. Check Volume
209
- analyze_audio_volume(clean_path)
210
-
211
- # 4. Azure Speech Config
212
  speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
213
  speech_config.speech_recognition_language = lang
214
  audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
215
 
 
216
  pronunciation_config = speechsdk.PronunciationAssessmentConfig(
217
  reference_text=ref_text,
218
  grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
219
- granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme,
220
  enable_miscue=True
221
  )
222
 
223
  recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
224
  pronunciation_config.apply_to(recognizer)
225
 
226
- # 5. Recognize
227
- logger.info("☁️ Sending to Azure...")
228
  result = recognizer.recognize_once_async().get()
229
 
230
  response = {}
231
  if result.reason == speechsdk.ResultReason.RecognizedSpeech:
232
  pron_result = speechsdk.PronunciationAssessmentResult(result)
 
 
 
 
 
 
 
 
 
 
 
233
  response = {
234
  "success": True,
235
  "score": pron_result.accuracy_score,
236
  "fluency": pron_result.fluency_score,
237
- "recognized_text": result.text
 
 
238
  }
239
- logger.info(f"✅ Score: {pron_result.accuracy_score} | Text: {result.text}")
240
 
241
  elif result.reason == speechsdk.ResultReason.NoMatch:
242
- logger.warning("❌ Azure: No Match (Silence/Noise)")
243
  response = {"success": False, "score": 0, "recognized_text": "I couldn't hear you clearly."}
244
 
245
- elif result.reason == speechsdk.ResultReason.Canceled:
246
- cancellation = result.cancellation_details
247
- logger.error(f"❌ Azure Canceled: {cancellation.reason} | {cancellation.error_details}")
248
- response = {"success": False, "score": 0, "recognized_text": "The spell fizzled (API Error)."}
249
 
250
  emit('pronunciation_result', response)
251
 
252
  except Exception as e:
253
  logger.error(f"Audio Exception: {e}")
254
- emit('pronunciation_result', {"success": False, "score": 0, "recognized_text": "Magical interference (Server Error)."})
255
 
256
  finally:
257
- # Cleanup files
258
- if raw_path and os.path.exists(raw_path):
259
- os.remove(raw_path)
260
- if clean_path and os.path.exists(clean_path):
261
- os.remove(clean_path)
262
 
263
 
264
  # ==========================================
 
184
  clean_path = None
185
 
186
  try:
187
+ # 1. Decode and Save
188
  audio_b64 = data.get('audio')
189
  if "," in audio_b64:
190
  audio_b64 = audio_b64.split(",")[1]
 
191
  audio_bytes = base64.b64decode(audio_b64)
192
 
 
 
193
  with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_raw:
194
  temp_raw.write(audio_bytes)
195
  raw_path = temp_raw.name
 
 
196
 
197
+ # 2. Sanitize
198
  clean_path = sanitize_audio(raw_path)
199
+ if not clean_path: raise Exception("Audio conversion failed")
 
 
200
 
201
+ # 3. Configure Azure
 
 
 
202
  speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
203
  speech_config.speech_recognition_language = lang
204
  audio_config = speechsdk.audio.AudioConfig(filename=clean_path)
205
 
206
+ # Enable granular details
207
  pronunciation_config = speechsdk.PronunciationAssessmentConfig(
208
  reference_text=ref_text,
209
  grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
210
+ granularity=speechsdk.PronunciationAssessmentGranularity.Word, # Get Word-level details
211
  enable_miscue=True
212
  )
213
 
214
  recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
215
  pronunciation_config.apply_to(recognizer)
216
 
217
+ # 4. Recognize
 
218
  result = recognizer.recognize_once_async().get()
219
 
220
  response = {}
221
  if result.reason == speechsdk.ResultReason.RecognizedSpeech:
222
  pron_result = speechsdk.PronunciationAssessmentResult(result)
223
+
224
+ # --- EXTRACT WORD DETAILS ---
225
+ detailed_words = []
226
+ for word in pron_result.words:
227
+ detailed_words.append({
228
+ "word": word.word,
229
+ "score": word.accuracy_score,
230
+ "error": word.error_type # 'None', 'Omission', 'Insertion', 'Mispronunciation'
231
+ })
232
+ # ---------------------------
233
+
234
  response = {
235
  "success": True,
236
  "score": pron_result.accuracy_score,
237
  "fluency": pron_result.fluency_score,
238
+ "completeness": pron_result.completeness_score,
239
+ "recognized_text": result.text,
240
+ "word_details": detailed_words # Send this array to UI
241
  }
242
+ logger.info(f"✅ Score: {pron_result.accuracy_score}")
243
 
244
  elif result.reason == speechsdk.ResultReason.NoMatch:
 
245
  response = {"success": False, "score": 0, "recognized_text": "I couldn't hear you clearly."}
246
 
247
+ else:
248
+ response = {"success": False, "score": 0, "recognized_text": "Error during recognition."}
 
 
249
 
250
  emit('pronunciation_result', response)
251
 
252
  except Exception as e:
253
  logger.error(f"Audio Exception: {e}")
254
+ emit('pronunciation_result', {"success": False, "score": 0, "recognized_text": "Server Error"})
255
 
256
  finally:
257
+ if raw_path and os.path.exists(raw_path): os.remove(raw_path)
258
+ if clean_path and os.path.exists(clean_path): os.remove(clean_path)
 
 
 
259
 
260
 
261
  # ==========================================