mj-learn-backend

Sleeping

App Files Files Community

Oviya commited on Nov 28, 2025

Commit

2673ee9

1 Parent(s): 7383c72

add pronunciation module

Browse files

Files changed (3) hide show

pron.py +729 -0
static/references/voice1.wav +3 -0
verification.py +2 -0

pron.py ADDED Viewed

	@@ -0,0 +1,729 @@

+"""
+Pronunciation Trainer – Final Version
+Real IPA • Whisper small.en • Phoneme Substitution Detection
+Dynamic Feedback System for Children & Adults
+"""
+import os
+import io
+import re
+import uuid
+import tempfile
+import numpy as np
+import librosa
+from flask import Blueprint, request, jsonify, send_file
+from difflib import SequenceMatcher
+from werkzeug.utils import secure_filename
+from pydub import AudioSegment
+from pathlib import Path
+# -------------------------------------------------------------------------
+# IMPORTANT: Patch torch.load so XTTS can load on PyTorch 2.6 (HF Space)
+# -------------------------------------------------------------------------
+import torch
+_original_torch_load = torch.load
+def _torch_load_allow_weights(*args, **kwargs):
+    """
+    Global patch: force weights_only=False for all torch.load calls.
+    This follows option (1) from the PyTorch warning and is safe here
+    because we trust the XTTS checkpoint.
+    """
+    # Always override to False, regardless of what is passed
+    kwargs["weights_only"] = False
+    return _original_torch_load(*args, **kwargs)
+torch.load = _torch_load_allow_weights
+print(">>> [PRON] Patched torch.load to use weights_only=False for XTTS.", flush=True)
+# Use the same XTTS helper that already works in ragg
+from ragg.tts import xtts_speak_to_file
+# -------------------------------------------------------------------------
+# OPTIONAL MODULES
+# -------------------------------------------------------------------------
+try:
+    import whisper
+    WHISPER_AVAILABLE = True
+    WHISPER_MODEL = None
+    def get_whisper():
+        global WHISPER_MODEL
+        if WHISPER_MODEL is None:
+            # Use small.en as requested
+            WHISPER_MODEL = whisper.load_model("small.en")
+        return WHISPER_MODEL
+except Exception:
+    WHISPER_AVAILABLE = False
+try:
+    from phonemizer import phonemize
+    PHONEMIZER_AVAILABLE = True
+except Exception:
+    PHONEMIZER_AVAILABLE = False
+# -------------------------------------------------------------------------
+# PATHS
+# -------------------------------------------------------------------------
+BASE = os.path.dirname(os.path.abspath(__file__))
+STATIC_DIR = os.path.join(BASE, "static")
+AUDIO_DIR = os.path.join(STATIC_DIR, "audio")
+REF_DIR = os.path.join(STATIC_DIR, "references")
+os.makedirs(AUDIO_DIR, exist_ok=True)
+os.makedirs(REF_DIR, exist_ok=True)
+# Use the same base/trim logic as in ragg/tts.py
+BASE_DIR = Path(__file__).resolve().parent.parent
+XTTS_REF_DIR = Path(os.getenv("XTTS_REF_DIR", str(BASE_DIR / "trim")))
+# Optional local default reference under this blueprint
+DEFAULT_REFERENCE = Path(REF_DIR) / "voice1.wav"
+pron_bp = Blueprint("pron", __name__)
+# -------------------------------------------------------------------------
+# HELPERS
+# -------------------------------------------------------------------------
+def normalize(text):
+    if not text:
+        return ""
+    text = text.lower().strip()
+    text = re.sub(r"[^a-z ]", "", text)
+    return text.strip()
+def read_numpy(file, sr=16000):
+    file.stream.seek(0)
+    raw = file.stream.read()
+    b = io.BytesIO(raw)
+    ext = os.path.splitext(file.filename)[1].replace(".", "") or "wav"
+    try:
+        audio = AudioSegment.from_file(b, format=ext)
+    except Exception:
+        b.seek(0)
+        audio = AudioSegment.from_file(b)
+    audio = audio.set_channels(1).set_frame_rate(sr)
+    arr = np.array(audio.get_array_of_samples(), dtype=np.float32)
+    max_val = float(1 << (audio.sample_width * 8 - 1))
+    return arr / max_val, sr
+def detect_silence(y, sr):
+    if y is None or len(y) == 0:
+        return True, "no_audio"
+    duration = len(y) / sr
+    max_amp = np.max(np.abs(y))
+    if duration < 0.3:
+        return True, "too_short"
+    if max_amp < 0.015:
+        return True, "too_quiet"
+    return False, None
+def _make_suggestion_payload(message):
+    """
+    Small helper to create suggestion/feedback arrays so frontend always receives
+    structured feedback even on error paths.
+    """
+    return [{"title": "Notice", "message": message}]
+def error_response(error_key, message, status=400, extra=None):
+    payload = {
+        "error": error_key,
+        "message": message,
+        "suggestion": _make_suggestion_payload(message),
+        "feedback": _make_suggestion_payload(message),
+    }
+    if extra:
+        payload.update(extra)
+    return jsonify(payload), status
+def structured_feedback_error(error_key, message, extra=None, status=200):
+    """
+    Return a structured JSON payload that frontends can always bind to.
+    Used for user-facing ASR/validation issues (not server failures).
+    """
+    payload = {
+        "error": error_key,
+        "message": message,
+        "silent": False,
+        "word": None,
+        "heard_word": None,
+        "phoneme_teacher": None,
+        "phoneme_student": None,
+        "phoneme_similarity": 0.0,
+        "phonemeSimilarity": 0.0,
+        "phoneme_score": 0.0,
+        "phonemeScore": 0.0,
+        "feedback": _make_suggestion_payload(message),
+        "suggestion": _make_suggestion_payload(message),
+        "audio_url": None,
+    }
+    if extra:
+        payload.update(extra)
+    return jsonify(payload), status
+# -------------------------------------------------------------------------
+# REAL IPA PHONEMES
+# -------------------------------------------------------------------------
+def ipa_phonemes(text):
+    if not text:
+        return ""
+    if PHONEMIZER_AVAILABLE:
+        try:
+            ipa = phonemize(
+                text,
+                language="en-us",
+                backend="espeak",
+                strip=True,
+                preserve_punctuation=False,
+                ipa=True,
+                with_stress=True,
+            )
+            ipa = ipa.replace("ˈ", " ˈ").replace("ˌ", " ˌ")
+            return " ".join(ipa.split())
+        except Exception:
+            return text
+    return text
+# -------------------------------------------------------------------------
+# ASR OVERRIDE FOR SHORT WORDS
+# -------------------------------------------------------------------------
+def strong_word_match(word, heard, teacher_ph, student_ph):
+    ws = SequenceMatcher(None, heard, word).ratio()
+    ps = SequenceMatcher(None, teacher_ph, student_ph).ratio()
+    if ps >= 0.80:
+        return True
+    teacher_split = teacher_ph.split()
+    student_split = student_ph.split()
+    if teacher_split and student_split and teacher_split[0] == student_split[0]:
+        return True
+    if len(word) <= 5 and ws >= 0.60:
+        return True
+    return False
+# -------------------------------------------------------------------------
+# TTS (Teacher Voice) – using shared xtts_speak_to_file
+# -------------------------------------------------------------------------
+def clone_voice(text, out_path, reference: Path | str | None = None):
+    """
+    Generate teacher audio for 'text' into out_path using XTTS.
+    Priority:
+      1) Uploaded reference file.
+      2) DEFAULT_REFERENCE (static/references/voice1.wav).
+      3) Finally, XTTS_REF_DIR folder (trim) if nothing else is available.
+    """
+    # 1) explicit reference from caller
+    if reference is not None:
+        ref_path = Path(str(reference))
+        if ref_path.is_file():
+            return xtts_speak_to_file(
+                text=text,
+                out_file=out_path,
+                reference_files=[ref_path],
+                language="en",
+            )
+    # 2) default local reference
+    if DEFAULT_REFERENCE.is_file():
+        return xtts_speak_to_file(
+            text=text,
+            out_file=out_path,
+            reference_files=[DEFAULT_REFERENCE],
+            language="en",
+        )
+    # 3) fallback to XTTS_REF_DIR / trim as in RAG part
+    return xtts_speak_to_file(
+        text=text,
+        out_file=out_path,
+        reference_dir=XTTS_REF_DIR,
+        language="en",
+    )
+def clone_voice_bytes(text, reference: Path | str | None = None):
+    """
+    Generate teacher audio for 'text' and return raw bytes.
+    """
+    tmp_path = Path(tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name)
+    try:
+        clone_voice(text, tmp_path, reference=reference)
+        with open(tmp_path, "rb") as f:
+            data = f.read()
+    finally:
+        try:
+            tmp_path.unlink()
+        except Exception:
+            pass
+    return data
+# -------------------------------------------------------------------------
+# WAVEFORM / SPECTROGRAM HELPERS
+# -------------------------------------------------------------------------
+def load_audio_from_bytes(data_bytes: bytes, sr=16000):
+    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    try:
+        tmp.write(data_bytes)
+        tmp.flush()
+        tmp.close()
+        y, sr_loaded = librosa.load(tmp.name, sr=sr, mono=True)
+    finally:
+        try:
+            os.remove(tmp.name)
+        except Exception:
+            pass
+    return y, sr_loaded
+def compute_waveform_similarity(y_ref, y_stud, sr=16000):
+    result = {
+        "similarity": 0.0,
+        "dtw_dist": None,
+        "dtw_norm": None,
+        "dtw_sim": None,
+        "corr": None,
+        "corr_sim": None,
+    }
+    try:
+        y_ref_trim, _ = librosa.effects.trim(y_ref, top_db=20)
+    except Exception:
+        y_ref_trim = y_ref
+    try:
+        y_stud_trim, _ = librosa.effects.trim(y_stud, top_db=20)
+    except Exception:
+        y_stud_trim = y_stud
+    if y_ref_trim is None or y_stud_trim is None or len(y_ref_trim) < 10 or len(y_stud_trim) < 10:
+        return result
+    try:
+        mfcc_ref = librosa.feature.mfcc(y_ref_trim, sr=sr, n_mfcc=13)
+        mfcc_stud = librosa.feature.mfcc(y_stud_trim, sr=sr, n_mfcc=13)
+        D, wp = librosa.sequence.dtw(X=mfcc_ref, Y=mfcc_stud, metric="euclidean")
+        dtw_dist = float(D[-1, -1])
+        denom = (mfcc_ref.shape[1] + mfcc_stud.shape[1]) if (mfcc_ref.shape[1] + mfcc_stud.shape[1]) > 0 else 1.0
+        dtw_norm = dtw_dist / denom
+        dtw_sim = max(0.0, 100.0 - dtw_norm * 30.0)
+        result["dtw_dist"] = dtw_dist
+        result["dtw_norm"] = dtw_norm
+        result["dtw_sim"] = max(0.0, min(100.0, dtw_sim))
+    except Exception:
+        result["dtw_dist"] = None
+        result["dtw_norm"] = None
+        result["dtw_sim"] = 0.0
+    try:
+        min_len = min(len(y_ref_trim), len(y_stud_trim))
+        if min_len <= 1:
+            corr = 0.0
+        else:
+            r = y_ref_trim[:min_len]
+            s = y_stud_trim[:min_len]
+            r = (r - np.mean(r)) / (np.std(r) + 1e-9)
+            s = (s - np.mean(s)) / (np.std(s) + 1e-9)
+            corr = float(np.corrcoef(r, s)[0, 1])
+            if np.isnan(corr):
+                corr = 0.0
+        corr_sim = ((corr + 1.0) / 2.0) * 100.0
+        result["corr"] = corr
+        result["corr_sim"] = max(0.0, min(100.0, corr_sim))
+    except Exception:
+        result["corr"] = None
+        result["corr_sim"] = 0.0
+    dtw_component = float(result["dtw_sim"] or 0.0)
+    corr_component = float(result["corr_sim"] or 0.0)
+    combined = 0.65 * dtw_component + 0.35 * corr_component
+    result["similarity"] = round(float(max(0.0, min(100.0, combined))), 2)
+    return result
+def build_waveform_feedback(word: str, sim_dict: dict, threshold: float):
+    score = float(sim_dict.get("similarity") or 0.0)
+    dtw_sim = float(sim_dict.get("dtw_sim") or 0.0)
+    corr_sim = float(sim_dict.get("corr_sim") or 0.0)
+    feedback = []
+    if score >= 90:
+        feedback.append({
+            "title": "Overall Pronunciation",
+            "message": f"Excellent. Your waveform for '{word}' is almost the same as the teacher."
+        })
+    elif score >= 75:
+        feedback.append({
+            "title": "Overall Pronunciation",
+            "message": f"Very good. Your pronunciation of '{word}' is close to the teacher. Small improvements are possible."
+        })
+    elif score >= 60:
+        feedback.append({
+            "title": "Overall Pronunciation",
+            "message": f"Good attempt. You are understandable, but you can still improve clarity and smoothness for '{word}'."
+        })
+    else:
+        feedback.append({
+            "title": "Overall Pronunciation",
+            "message": f"You are trying well, but the sound of '{word}' is still far from the teacher. Please practise a few more times."
+        })
+    if dtw_sim >= 75:
+        feedback.append({
+            "title": "Rhythm and Timing",
+            "message": "Your timing and rhythm are close to the teacher. You are stressing the word in a similar way."
+        })
+    elif dtw_sim >= 55:
+        feedback.append({
+            "title": "Rhythm and Timing",
+            "message": "Your timing is acceptable, but you can make the word smoother. Try saying the word in one smooth breath."
+        })
+    else:
+        feedback.append({
+            "title": "Rhythm and Timing",
+            "message": "Your timing is quite different. Try to copy when the teacher starts and stops the word and keep a steady pace."
+        })
+    if corr_sim >= 75:
+        feedback.append({
+            "title": "Clarity of Sound",
+            "message": "Your sound shape is clear and close to the teacher. Mouth and tongue positions are mostly correct."
+        })
+    elif corr_sim >= 55:
+        feedback.append({
+            "title": "Clarity of Sound",
+            "message": "Your sound is partly clear. Try opening your mouth a little more and speak a bit more clearly."
+        })
+    else:
+        feedback.append({
+            "title": "Clarity of Sound",
+            "message": "The sound shape is quite different. Try to listen carefully and slowly copy the teacher sound."
+        })
+    feedback.append({
+        "title": "Practice Tip",
+        "message": "Listen to the teacher audio 2–3 times and then repeat slowly. Focus on copying the length and loudness of the sound."
+    })
+    passed_text = "You passed the target for this word." if score >= threshold else "You did not yet pass the target. Try again."
+    feedback.append({
+        "title": "Score",
+        "message": f"Waveform score: {score:.1f}/100. Target: {threshold:.1f}. {passed_text}"
+    })
+    return feedback
+# -------------------------------------------------------------------------
+# ROUTE: Generate Teacher Audio (download)
+# -------------------------------------------------------------------------
+@pron_bp.route("/generate_teacher_audio", methods=["POST"])
+def generate_teacher_audio():
+    word = request.form.get("word", "").strip().lower()
+    if not word:
+        return error_response("word_required", "Word required", 400)
+    ref = None
+    if "reference" in request.files:
+        rf = request.files["reference"]
+        fname = secure_filename(rf.filename)
+        path = os.path.join(REF_DIR, fname)
+        rf.save(path)
+        ref = path
+    out = os.path.join(AUDIO_DIR, f"{word}-{uuid.uuid4().hex}.wav")
+    try:
+        clone_voice(word, out, reference=ref)
+    except FileNotFoundError as e:
+        return error_response("reference_not_found", f"Reference audio not found: {e}", 500)
+    except RuntimeError as e:
+        return error_response("tts_unavailable", f"TTS unavailable: {e}", 503)
+    except Exception as e:
+        return error_response("tts_generation_failed", f"TTS generation failed: {e}", 500)
+    rel = os.path.relpath(out, STATIC_DIR).replace("\\", "/")
+    return jsonify({"url": rel})
+# -------------------------------------------------------------------------
+# ROUTE: Teacher Audio Stream
+# -------------------------------------------------------------------------
+@pron_bp.route("/generate_teacher_audio_stream", methods=["POST"])
+def generate_teacher_audio_stream():
+    word = request.form.get("word", "").strip().lower()
+    if not word:
+        return error_response("word_required", "Word required", 400)
+    ref_path = None
+    if "reference" in request.files:
+        try:
+            rf = request.files["reference"]
+            fname = secure_filename(rf.filename)
+            path = os.path.join(REF_DIR, fname)
+            rf.save(path)
+            ref_path = path
+        except Exception as e:
+            app_msg = f"reference save failed: {e}"
+            print(app_msg)
+            return error_response("reference_save_failed", app_msg, 500)
+    try:
+        data = clone_voice_bytes(word, reference=ref_path)
+        bio = io.BytesIO(data)
+        bio.seek(0)
+        return send_file(bio, mimetype="audio/wav", as_attachment=False)
+    except FileNotFoundError as e:
+        msg = f"Reference audio not found: {e}"
+        print("generate_teacher_audio_stream FileNotFoundError:", e)
+        return error_response("reference_not_found", msg, 500)
+    except RuntimeError as e:
+        msg = (
+            "Teacher voice model is not available on this server. "
+            "You can still practise pronunciation, but teacher audio cannot be generated."
+        )
+        print("generate_teacher_audio_stream RuntimeError (XTTS):", e)
+        return structured_feedback_error("tts_unavailable", msg, status=200)
+    except Exception as exc:
+        print("generate_teacher_audio_stream error:", exc)
+        return error_response("tts_generation_failed", f"TTS generation failed: {exc}", 500)
+# -------------------------------------------------------------------------
+# ROUTE: PRONUNCIATION CHECK
+# -------------------------------------------------------------------------
+@pron_bp.route("/check_pronunciation", methods=["POST"])
+def check_pronunciation():
+    if "audio" not in request.files:
+        return error_response("audio_required", "Audio required. Please record and try again.", 400)
+    word = request.form.get("word", "").strip().lower()
+    if not word:
+        return error_response("word_required", "Word required", 400)
+    mode = request.form.get("mode", "phonetics")
+    file = request.files["audio"]
+    y_student, sr = read_numpy(file)
+    silent, reason = detect_silence(y_student, sr)
+    if silent:
+        if reason == "too_short":
+            msg = "Recording was too short. Please speak clearly for at least 0.3 seconds."
+        elif reason == "too_quiet":
+            msg = "Recording too quiet. Increase microphone volume or speak louder."
+        else:
+            msg = "No audio detected. Please record again."
+        return jsonify({
+            "silent": True,
+            "reason": reason,
+            "suggestion": _make_suggestion_payload(msg),
+            "feedback": _make_suggestion_payload(msg),
+            "message": msg,
+        })
+    if mode == "waveform":
+        teacher_bytes = None
+        if "reference" in request.files:
+            try:
+                rf = request.files["reference"]
+                teacher_bytes = rf.read()
+            except Exception:
+                teacher_bytes = None
+        if teacher_bytes is None:
+            try:
+                teacher_bytes = clone_voice_bytes(word, reference=None)
+            except Exception:
+                teacher_bytes = None
+        if teacher_bytes is None:
+            return error_response("teacher_audio_unavailable", "Teacher audio not available", 500)
+        try:
+            y_teacher, sr_teacher = load_audio_from_bytes(teacher_bytes, sr=sr)
+        except Exception as e:
+            return error_response("teacher_load_failed", f"Failed to load teacher audio: {e}", 500)
+        sim = compute_waveform_similarity(y_teacher, y_student, sr=sr)
+        threshold = float(request.form.get("threshold", 65.0))
+        matched = (sim.get("similarity", 0.0) >= threshold)
+        feedback = build_waveform_feedback(word, sim, threshold)
+        return jsonify({
+            "mode": "waveform",
+            "silent": False,
+            "word": word,
+            "waveform_similarity": float(sim.get("similarity") or 0.0),
+            "waveformScore": float(sim.get("similarity") or 0.0),
+            "waveform_match": bool(matched),
+            "feedback": feedback,
+            "suggestion": feedback,
+            "details": {
+                "dtw_dist": sim.get("dtw_dist"),
+                "dtw_norm": sim.get("dtw_norm"),
+                "dtw_sim": sim.get("dtw_sim"),
+                "corr": sim.get("corr"),
+                "corr_sim": sim.get("corr_sim"),
+            },
+        })
+    heard = ""
+    if WHISPER_AVAILABLE:
+        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+        file.stream.seek(0)
+        with open(tmp, "wb") as f:
+            f.write(file.read())
+        result = get_whisper().transcribe(tmp, language="en")
+        os.remove(tmp)
+        heard = normalize(result.get("text", ""))
+    if not heard:
+        return structured_feedback_error("no_asr", "Could not understand speech. Please try again.")
+    parts = heard.split()
+    if len(parts) > 1:
+        msg = f"Detected multiple words: '{heard}'. Please say only '{word}'."
+        return structured_feedback_error(
+            "multiple_words",
+            msg,
+            extra={"word": word, "heard_word": heard},
+        )
+    heard_word = parts[0]
+    teacher_ph = ipa_phonemes(word)
+    student_ph = ipa_phonemes(heard_word)
+    if not strong_word_match(word, heard_word, teacher_ph, student_ph):
+        msg = f"You said '{heard_word}'. Please say only '{word}'."
+        return structured_feedback_error(
+            "incorrect_word",
+            msg,
+            extra={"word": word, "heard_word": heard_word},
+        )
+    feedback = []
+    t_tokens = teacher_ph.split()
+    s_tokens = student_ph.split()
+    sm = SequenceMatcher(None, t_tokens, s_tokens)
+    for tag, i1, i2, j1, j2 in sm.get_opcodes():
+        if tag == "delete":
+            missing = t_tokens[i1:i2]
+            feedback.append({
+                "title": "Missing Sounds",
+                "message": f"You missed these sounds: {' '.join(missing)}. Try to say each sound clearly."
+            })
+        elif tag == "insert":
+            extra = s_tokens[j1:j2]
+            feedback.append({
+                "title": "Extra Sounds",
+                "message": f"You added extra sounds: {' '.join(extra)}. Try to keep only the sounds from the teacher word."
+            })
+        elif tag == "replace":
+            exp = t_tokens[i1:i2]
+            rec = s_tokens[j1:j2]
+            feedback.append({
+                "title": "Sound Substitution",
+                "message": f"Expected {' '.join(exp)} but you said {' '.join(rec)}. Listen again and copy the teacher sound."
+            })
+    vowels = "æɪiːʌəɑɒɔːeɜːuːʊɛ"
+    v_t = [p for p in teacher_ph if p in vowels]
+    v_s = [p for p in student_ph if p in vowels]
+    if v_t != v_s:
+        feedback.append({
+            "title": "Vowel Accuracy",
+            "message": "Your vowel sound is different. Open your mouth and copy the long or short sound of the teacher."
+        })
+    else:
+        feedback.append({
+            "title": "Vowel Accuracy",
+            "message": "Your vowel pronunciation is accurate and matches the teacher."
+        })
+    cons_t = [p for p in t_tokens if p and p[0] not in vowels]
+    cons_s = [p for p in s_tokens if p and p[0] not in vowels]
+    if cons_t != cons_s:
+        feedback.append({
+            "title": "Consonant Accuracy",
+            "message": "Some consonant sounds are different. Focus on the first and last sound of the word."
+        })
+    else:
+        feedback.append({
+            "title": "Consonant Accuracy",
+            "message": "Your consonant sounds match well with the teacher."
+        })
+    ph_sim = SequenceMatcher(None, teacher_ph, student_ph).ratio()
+    score = round(ph_sim * 100, 2)
+    if score >= 90:
+        overall_msg = f"Excellent. Your pronunciation of '{word}' is almost perfect."
+    elif score >= 75:
+        overall_msg = f"Very good. Your pronunciation of '{word}' is clear with small differences."
+    elif score >= 60:
+        overall_msg = f"Good attempt. People can understand '{word}', but you can improve some sounds."
+    else:
+        overall_msg = f"You are trying well, but you need more practice to say '{word}' like the teacher."
+    feedback.insert(0, {
+        "title": "Overall Score",
+        "message": f"Phoneme score: {score:.1f}/100. {overall_msg}"
+    })
+    feedback.append({
+        "title": "How To Say It",
+        "message": f"Correct IPA for '{word}': {teacher_ph}"
+    })
+    feedback.append({
+        "title": "Practice Tip",
+        "message": "Listen to the teacher voice, then repeat slowly 3 times. Focus on the first sound and the vowel in the middle."
+    })
+    return jsonify({
+        "silent": False,
+        "word": word,
+        "heard_word": heard_word,
+        "phoneme_teacher": teacher_ph,
+        "phoneme_student": student_ph,
+        "phoneme_similarity": float(ph_sim),
+        "phonemeSimilarity": float(ph_sim),
+        "phoneme_score": float(score),
+        "phonemeScore": float(score),
+        "feedback": feedback,
+        "suggestion": feedback,
+        "audio_url": None,
+    })

static/references/voice1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09d064bc2bd4880ceb1c6c4a69cb941a1b5e2ea05b151b721aab4cc17c34f56b
+size 5364878

verification.py CHANGED Viewed

@@ -495,6 +495,7 @@ from vocabularyBuilder import vocab_bp
 from findingword import finding_bp
 from listen import listen_bp
 from ragg.app import rag_bp
 from ragg.ingest_trigger import ingest_trigger_bp
 app.register_blueprint(movie_bp, url_prefix="/media")
 app.register_blueprint(questions_bp, url_prefix="/media")
@@ -505,6 +506,7 @@ app.register_blueprint(finding_bp, url_prefix="/media")
 app.register_blueprint(listen_bp, url_prefix="/media")
 app.register_blueprint(rag_bp, url_prefix="/rag")
 app.register_blueprint(ingest_trigger_bp, url_prefix="/rag")
 # app.register_blueprint(questions_bp, url_prefix="/media")  # <-- add this
 # ------------------------------------------------------------------------------
 # Local run (Gunicorn will import `verification:app` on Spaces)

 from findingword import finding_bp
 from listen import listen_bp
 from ragg.app import rag_bp
+from pron import pron_bp
 from ragg.ingest_trigger import ingest_trigger_bp
 app.register_blueprint(movie_bp, url_prefix="/media")
 app.register_blueprint(questions_bp, url_prefix="/media")
 app.register_blueprint(listen_bp, url_prefix="/media")
 app.register_blueprint(rag_bp, url_prefix="/rag")
 app.register_blueprint(ingest_trigger_bp, url_prefix="/rag")
+app.register_blueprint(pron_bp, url_prefix="/pron")
 # app.register_blueprint(questions_bp, url_prefix="/media")  # <-- add this
 # ------------------------------------------------------------------------------
 # Local run (Gunicorn will import `verification:app` on Spaces)