Spaces:

pykara
/

py-learn-backend

Running

App Files Files Community

Oviya commited on about 1 month ago

Commit

80adf3e

1 Parent(s): 8eeff6c

update pronragg

Browse files

Files changed (12) hide show

chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/data_level0.bin +3 -0
chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/header.bin +3 -0
chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/length.bin +3 -0
chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/link_lists.bin +0 -0
chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/data_level0.bin +3 -0
chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/header.bin +3 -0
chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/length.bin +3 -0
chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/link_lists.bin +0 -0
pronragg.py +263 -0
pronvideo.py +359 -0
teacher_feedback_sentences_category.json +338 -0
verification.py +4 -0

chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db8f9dd08c89ad45ed5b37e53fb7096c1f0be75e0c9377baede6add3ae3b97c6
+size 167600

chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0e81c3b22454233bc12d0762f06dcca48261a75231cf87c79b75e69a6c00150
+size 100

chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27571dbe52639d675f7ce26f4bf06ca84d65a2c943ad57727b90871d758a0d4d
+size 400

chroma_db/6bb1d18d-491e-4b83-bb53-aa5824da7394/link_lists.bin ADDED Viewed

File without changes

chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4f6e0dd5ee483e09b514559e6411fbc53b886ea77d8b25559576d80e4642179
+size 167600

chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0e81c3b22454233bc12d0762f06dcca48261a75231cf87c79b75e69a6c00150
+size 100

chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acc1ab79cf9173b7ffaa20ccc92926b3f13299fc8c1fcc191a99c6a56cb2cebd
+size 400

chroma_db/a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384/link_lists.bin ADDED Viewed

File without changes

pronragg.py ADDED Viewed

	@@ -0,0 +1,263 @@

+import os
+import json
+import base64
+import tempfile
+import subprocess
+import re
+import random
+from flask import Blueprint, request, jsonify
+from flask_cors import CORS
+from pydub import AudioSegment
+from faster_whisper import WhisperModel
+from rapidfuzz.distance import Levenshtein
+import chromadb
+pronragg_bp = Blueprint("pronragg", __name__)
+# --------------------------------------------------
+# CONFIG
+# --------------------------------------------------
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+VIDEO_PATH = os.path.join(BASE_DIR, "feedback.mp4")
+JSON_PATH = os.path.join(BASE_DIR, "teacher_feedback_sentences_category.json")
+CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
+WHISPER_MODEL = "base"
+SAFE_PADDING = 0.05
+PAUSE_SECONDS = 0.5
+MAX_SEGMENTS_PER_CATEGORY = 3
+# Issue priority (VERY IMPORTANT)
+ISSUE_PRIORITY = [
+    "silence",
+    "multipleword",
+    "wrong_word",
+    "consonant",
+    "vowel",
+    "ending",
+    "syllable",
+    "stress",
+    "success"
+]
+# --------------------------------------------------
+# INIT MODELS
+# --------------------------------------------------
+whisper = WhisperModel(
+    WHISPER_MODEL,
+    device="cpu",
+    compute_type="int8"
+)
+# --------------------------------------------------
+# CHROMA INIT
+# --------------------------------------------------
+client = chromadb.PersistentClient(path=CHROMA_DIR)
+collection = client.get_or_create_collection("feedback")
+def init_segments():
+    if collection.count() > 0:
+        return
+    with open(JSON_PATH, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    for item in data:
+        collection.add(
+            ids=[item["id"]],
+            documents=[item["text"]],
+            metadatas=[{
+                "category": item["category"],
+                "start": item["start"],
+                "end": item["end"]
+            }]
+        )
+init_segments()
+# --------------------------------------------------
+# HELPERS
+# --------------------------------------------------
+def normalize_text(text: str) -> str:
+    return re.sub(r"[^a-z]", "", text.lower().strip())
+def transcribe(wav_path: str) -> str:
+    segments, _ = whisper.transcribe(
+        wav_path,
+        language="en",
+        beam_size=5,
+        vad_filter=True
+    )
+    return "".join(s.text for s in segments).strip().lower()
+# --------------------------------------------------
+# PRONUNCIATION LOGIC (FIXED)
+# --------------------------------------------------
+def analyze(expected: str, heard_raw: str):
+    expected_n = normalize_text(expected)
+    heard_n = normalize_text(heard_raw)
+    if not heard_n:
+        return ["silence"], 0
+    if len(heard_raw.strip().split()) > 1:
+        return ["multipleword"], 20
+    similarity = Levenshtein.normalized_similarity(expected_n, heard_n)
+    score = int(similarity * 100)
+    if similarity < 0.30:
+        return ["wrong_word"], score
+    detected = []
+    vowels = "aeiou"
+    def is_vowel(ch: str) -> bool:
+        return ch in vowels
+    # First-letter mismatch: classify based on expected character category
+    if expected_n[0] != heard_n[0]:
+        if is_vowel(expected_n[0]):
+            detected.append("vowel")
+        else:
+            detected.append("consonant")
+    # Vowel sequence mismatch (only add if not already classified as a vowel)
+    expected_vowels = [c for c in expected_n if c in vowels]
+    heard_vowels = [c for c in heard_n if c in vowels]
+    if expected_vowels != heard_vowels and "vowel" not in detected:
+        detected.append("vowel")
+    # Ending error
+    if expected_n[-1] != heard_n[-1]:
+        detected.append("ending")
+    # Syllable error
+    if abs(len(expected_n) - len(heard_n)) >= 2:
+        detected.append("syllable")
+    # Stress error
+    if similarity < 0.85 and not detected:
+        detected.append("stress")
+    if not detected:
+        return ["success"], score
+    # Pick ONLY ONE issue using priority
+    for p in ISSUE_PRIORITY:
+        if p in detected:
+            return [p], score
+    return ["success"], score
+# --------------------------------------------------
+# FETCH SEGMENTS (STRICT)
+# --------------------------------------------------
+def fetch_segments(categories):
+    if not categories:
+        return []
+    category = categories[0]
+    result = collection.get(where={"category": category})
+    metas = result.get("metadatas", [])
+    # STRICT FILTER (important)
+    metas = [m for m in metas if m.get("category") == category]
+    if not metas:
+        return []
+    random.shuffle(metas)
+    return metas[:MAX_SEGMENTS_PER_CATEGORY]
+# --------------------------------------------------
+# BUILD VIDEO WITH FREEZE-HOLD PAUSE
+# --------------------------------------------------
+def build_video(segments):
+    if not segments:
+        return ""
+    segments = sorted(segments, key=lambda x: x["start"])
+    clips = []
+    for i, seg in enumerate(segments):
+        clip = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+        pause = PAUSE_SECONDS if i < len(segments) - 1 else 0
+        subprocess.run(
+            [
+                "ffmpeg", "-y",
+                "-ss", str(max(0, seg["start"] - SAFE_PADDING)),
+                "-to", str(seg["end"] + SAFE_PADDING),
+                "-i", VIDEO_PATH,
+                "-vf", f"tpad=stop_mode=clone:stop_duration={pause}",
+                "-af", f"apad=pad_dur={pause}",
+                "-c:v", "libx264",
+                "-c:a", "aac",
+                "-movflags", "+faststart",
+                clip.name
+            ],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        )
+        clips.append(clip.name)
+    concat_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
+    with open(concat_file.name, "w") as f:
+        for c in clips:
+            f.write(f"file '{c}'\n")
+    final_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    subprocess.run(
+        [
+            "ffmpeg", "-y",
+            "-f", "concat",
+            "-safe", "0",
+            "-i", concat_file.name,
+            "-c:v", "libx264",
+            "-c:a", "aac",
+            final_video.name
+        ],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL
+    )
+    with open(final_video.name, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+# --------------------------------------------------
+# API
+# --------------------------------------------------
+@pronragg_bp.route("/score", methods=["POST"])
+def score_pronunciation():
+    expected = request.form.get("word", "").strip()
+    audio = request.files.get("audio")
+    if not expected or not audio:
+        return jsonify({"error": "Missing input"}), 400
+    temp = tempfile.NamedTemporaryFile(delete=False, suffix=".webm")
+    audio.save(temp.name)
+    wav = temp.name.replace(".webm", ".wav")
+    AudioSegment.from_file(temp.name).export(wav, format="wav")
+    heard = transcribe(wav)
+    issues, score = analyze(expected, heard)
+    segments = fetch_segments(issues) or fetch_segments(["silence"])
+    video_blob = build_video(segments)
+    return jsonify({
+        "expected": expected,
+        "heard": heard,
+        "issues": issues,
+        "score": score,
+        "videoBlobBase64": video_blob
+    })

pronvideo.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import os
+import io
+import tempfile
+from flask import Flask, Blueprint, request, jsonify
+from flask_cors import CORS
+from pydub import AudioSegment
+from rapidfuzz.distance import Levenshtein
+# ASR - WhisperX (or Faster Whisper for Forced Alignment)
+try:
+    from faster_whisper import WhisperModel
+    HAS_WHISPER = True
+except Exception:
+    HAS_WHISPER = False
+# Initialize the Flask app and Blueprint
+pronvideo_bp = Blueprint("pronvideo", __name__)
+# -----------------------------
+# Load Whisper model (CPU friendly)
+# -----------------------------
+WHISPER_MODEL_SIZE = os.getenv("WHISPER_MODEL_SIZE", "base")
+whisper_model = None
+if HAS_WHISPER:
+    whisper_model = WhisperModel(
+        WHISPER_MODEL_SIZE,
+        device="cpu",
+        compute_type="int8"
+    )
+# -----------------------------
+# Helpers
+# -----------------------------
+def normalize(text: str) -> str:
+    return "".join(ch for ch in text.lower().strip() if ch.isalpha() or ch.isspace())
+def phoneme_similarity_score(expected_ph: str, spoken_ph: str) -> int:
+    if not expected_ph or not spoken_ph:
+        return 0
+    dist = Levenshtein.distance(expected_ph, spoken_ph)
+    max_len = max(len(expected_ph), len(spoken_ph))
+    similarity = 1 - (dist / max_len)
+    score = int(round(similarity * 100))
+    return max(0, min(100, score))
+def convert_to_wav_temp(upload_file) -> str:
+    upload_file.stream.seek(0)
+    raw = upload_file.stream.read()
+    bio = io.BytesIO(raw)
+    ext = os.path.splitext(upload_file.filename)[1].replace(".", "").lower() or None
+    try:
+        audio = AudioSegment.from_file(bio, format=ext if ext else None)
+    except Exception:
+        bio.seek(0)
+        audio = AudioSegment.from_file(bio)
+    audio = audio.set_channels(1).set_frame_rate(16000)
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    audio.export(tmp.name, format="wav")
+    return tmp.name
+def transcribe_audio(audio_path: str) -> str:
+    if not HAS_WHISPER or whisper_model is None:
+        raise RuntimeError("Whisper ASR is not installed/available.")
+    segments, info = whisper_model.transcribe(
+        audio_path,
+        language="en",
+        vad_filter=True
+    )
+    text_parts = []
+    for seg in segments:
+        if seg.text:
+            text_parts.append(seg.text.strip())
+    return " ".join(text_parts).strip()
+# -----------------------------
+# Video feedback helpers
+# -----------------------------
+def static_video_for(kind: str):
+    mapping = {
+        "success": {"videoId": "video-success", "videoUrl": "/assets/pronvideo/feedback/success.mp4", "hint": "Great job! Keep going."},
+        "silence": {"videoId": "video-silence", "videoUrl": "/assets/pronvideo/feedback/silence.mp4", "hint": "Speak clearly into the mic for at least a second."},
+        "wrong_word": {"videoId": "video-wrong-word", "videoUrl": "/assets/pronvideo/feedback/wrongword.mp4", "hint": "Please say only the target word."},
+        "vowel": {"videoId": "video-vowel", "videoUrl": "/assets/pronvideo/feedback/vowel.mp4", "hint": "Work on vowel shape and length."},
+        "consonant": {"videoId": "video-consonant", "videoUrl": "/assets/pronvideo/feedback/consonant.mp4", "hint": "Focus on consonant articulation, especially start/end sounds."},
+        "stress": {"videoId": "video-stress", "videoUrl": "/assets/pronvideo/feedback/stress.mp4", "hint": "Emphasize the primary stressed syllable."},
+        "syllable": {"videoId": "video-syllable", "videoUrl": "/assets/pronvideo/feedback/syllable.mp4", "hint": "Match the number of syllables and rhythm."},
+        "ending": {"videoId": "video-ending", "videoUrl": "/assets/pronvideo/feedback/ending.mp4", "hint": "Work on the final sound—try to finish the word cleanly."},
+        "multipleword": {"videoId": "video-multipleword", "videoUrl": "/assets/pronvideo/feedback/multipleword.mp4", "hint": "Please say only the target word, not multiple words."},
+    }
+    return mapping.get(kind, {"videoId": None, "videoUrl": None, "hint": None})
+# -----------------------------
+# Function to detect feedback based on pronunciation
+# -----------------------------
+def vowel_consonant_feedback(teacher_ph: str, student_ph: str):
+    feedback = []
+    # Split the IPA tokens into vowels and consonants
+    t_tokens = split_ipa_tokens(teacher_ph)
+    s_tokens = split_ipa_tokens(student_ph)
+    # Vowel sequence check
+    v_t = extract_vowel_sequence(teacher_ph)
+    v_s = extract_vowel_sequence(student_ph)
+    if v_t != v_s:
+        feedback.append({
+            "title": "Vowel Accuracy",
+            "message": "Your vowel sound is different. Focus on long/short quality and mouth opening."
+        })
+    # Consonant sequence check
+    cons_t = extract_consonant_tokens(t_tokens)
+    cons_s = extract_consonant_tokens(s_tokens)
+    if cons_t != cons_s:
+        feedback.append({
+            "title": "Consonant Accuracy",
+            "message": "Some consonant sounds differ. Pay attention to start and end sounds."
+        })
+    # Ending sound check
+    end_t = last_ending_token(t_tokens)
+    end_s = last_ending_token(s_tokens)
+    if end_t and end_s and end_t != end_s:
+        feedback.append({
+            "title": "Ending Sound",
+            "message": f"The final sound differs. Try to end with '{end_t}'."
+        })
+    return feedback
+# -----------------------------
+# Syllable estimation logic
+# -----------------------------
+def syllable_estimate(ipa: str):
+    count = 0
+    in_vowel = False
+    for ch in ipa:
+        if ch in VOWELS:
+            if not in_vowel:
+                count += 1
+                in_vowel = True
+            else:
+                in_vowel = False
+        else:
+            in_vowel = False
+    return max(1, count)  # at least 1 syllable
+def select_video_for_vc(teacher_ph: str, student_ph: str) -> str:
+    # Early check: if overall similarity is very low, treat as wrong word
+    score = phoneme_similarity_score(teacher_ph, student_ph)
+    # threshold chosen empirically; adjust (0-100). <40 => likely a different word.
+    if score < 40:
+        return "wrong_word"
+    tokens_t = split_ipa_tokens(teacher_ph)
+    tokens_s = split_ipa_tokens(student_ph)
+    v_t = extract_vowel_sequence(teacher_ph)
+    v_s = extract_vowel_sequence(student_ph)
+    cons_t = extract_consonant_tokens(tokens_t)
+    cons_s = extract_consonant_tokens(tokens_s)
+    end_t = last_ending_token(tokens_t)
+    end_s = last_ending_token(tokens_s)
+    stress_t = primary_stress_position(tokens_t)
+    stress_s = primary_stress_position(tokens_s)
+    syl_t = syllable_estimate(teacher_ph)
+    syl_s = syllable_estimate(student_ph)
+    flags = []
+    if v_t != v_s:
+        flags.append("vowel")
+    if cons_t != cons_s:
+        flags.append("consonant")
+    if end_t and end_s and end_t != end_s:
+        flags.append("ending")
+    if stress_t is not None and stress_s is not None and stress_t != stress_s:
+        flags.append("stress")
+    if syl_t != syl_s:
+        flags.append("syllable")
+    if not flags:
+        return "success"  # Correct pronunciation
+    if len(flags) == 1:
+        return flags[0]  # Return the first mismatch type
+    return "mixed"  # Return mixed if multiple issues are found
+# -----------------------------
+# Route: Score pronunciation with targeted feedback
+# -----------------------------
+@pronvideo_bp.route("/score", methods=["POST"])
+def score_pronunciation():
+    if "audio" not in request.files:
+        return jsonify({"score": 0, "error": "audio_required"}), 400
+    expected_word = request.form.get("word", "").strip().lower()
+    if not expected_word:
+        return jsonify({"score": 0, "error": "word_required"}), 400
+    audio_file = request.files["audio"]
+    temp_wav = None
+    try:
+        temp_wav = convert_to_wav_temp(audio_file)
+        # Transcribe the audio and get spoken text
+        spoken_text = transcribe_audio(temp_wav)
+        spoken_text = normalize(spoken_text)
+        # If no speech detected
+        if not spoken_text:
+            vid = static_video_for("silence")
+            return jsonify({
+                "score": 0,
+                "error": "no_asr_text",
+                "message": "No speech detected.",
+                "hint": vid["hint"],
+                "videoId": vid["videoId"],
+                "videoUrl": vid["videoUrl"],
+                "expected": expected_word,
+                "heard": ""
+            }), 200
+        # If multiple words detected
+        if len(spoken_text.split()) > 1:
+            vid = static_video_for("multipleword")
+            return jsonify({
+                "score": 0,
+                "error": "multiple_words",
+                "message": f"Detected multiple words: '{spoken_text}'. Please say only '{expected_word}'.",
+                "hint": vid["hint"],
+                "videoId": vid["videoId"],
+                "videoUrl": vid["videoUrl"],
+                "expected": expected_word,
+                "heard": spoken_text
+            }), 200
+        # Calculate phoneme similarity
+        expected_ph = expected_word  # Assuming expected word phoneme
+        spoken_ph = spoken_text  # Assuming spoken text phoneme
+        score = phoneme_similarity_score(expected_ph, spoken_ph)
+        # Success only when exact match and high score
+        if spoken_text == expected_word and score >= 90:
+            vid = static_video_for("success")
+            return jsonify({
+                "score": score,
+                "message": f"Excellent. You pronounced '{expected_word}' correctly.",
+                "hint": vid["hint"],
+                "videoId": vid["videoId"],
+                "videoUrl": vid["videoUrl"],
+                "expected": expected_word,
+                "heard": spoken_text
+            }), 200
+        # Phoneme mismatch -> provide targeted feedback for vowel, consonant, stress, or syllable
+        kind = select_video_for_vc(expected_ph, spoken_ph)
+        vid = static_video_for(kind)
+        return jsonify({
+            "score": score,
+            "message": "Good try. Some sounds need practice.",
+            "hint": vid["hint"],
+            "videoId": vid["videoId"],
+            "videoUrl": vid["videoUrl"],
+            "expected": expected_word,
+            "heard": spoken_text
+        }), 200
+    except Exception as e:
+        return jsonify({"score": 0, "error": "server_exception", "message": str(e)}), 500
+    finally:
+        if temp_wav:
+            try:
+                os.remove(temp_wav)
+            except Exception:
+                pass
+# IPA helpers and constants (adds split_ipa_tokens and related helpers)
+VOWELS = set("aeiouɪʊɛæɔɑəɜɒeɪoʊaɪɔɪ")  # extend with additional IPA symbols as needed
+PRIMARY_STRESS = "ˈ"
+SECONDARY_STRESS = "ˌ"
+IPA_DIGRAPHS = {"tʃ", "dʒ", "t͡ʃ", "d͡ʒ"}  # common multi-char IPA consonants
+def split_ipa_tokens(ipa: str):
+    """
+    Tokenize an IPA or simple-orthography string into a list of tokens.
+    - Preserves stress markers as separate tokens.
+    - Combines common digraphs (e.g. 'tʃ', 'dʒ').
+    - If input contains spaces, splits on words and tokenizes each chunk.
+    Works acceptably for plain words (will return characters) and basic IPA.
+    """
+    if not ipa:
+        return []
+    ipa = ipa.strip()
+    # If whitespace-separated, preserve word boundaries as contiguous tokens
+    if " " in ipa:
+        parts = []
+        for part in ipa.split():
+            parts.extend(_tokenize_chunk(part))
+        return parts
+    return _tokenize_chunk(ipa)
+def _tokenize_chunk(chunk: str):
+    tokens = []
+    i = 0
+    while i < len(chunk):
+        ch = chunk[i]
+        # stress markers
+        if ch in (PRIMARY_STRESS, SECONDARY_STRESS):
+            tokens.append(ch)
+            i += 1
+            continue
+        # try two-character digraphs first
+        if i + 1 < len(chunk):
+            pair = chunk[i : i + 2]
+            if pair in IPA_DIGRAPHS:
+                tokens.append(pair)
+                i += 2
+                continue
+        # fallback single character token
+        tokens.append(ch)
+        i += 1
+    return tokens
+def extract_vowel_sequence(ipa: str):
+    """Return concatenated vowel tokens in order (string)."""
+    tokens = split_ipa_tokens(ipa)
+    return "".join(t for t in tokens if t in VOWELS)
+def extract_consonant_tokens(tokens):
+    """Filter out vowels and stress markers from a tokens list, return consonant tokens list."""
+    return [t for t in tokens if t not in VOWELS and t not in (PRIMARY_STRESS, SECONDARY_STRESS) and t.strip()]
+def last_ending_token(tokens):
+    """Return the last non-stress, non-empty token (approx. final sound)."""
+    for t in reversed(tokens):
+        if not t or t in (PRIMARY_STRESS, SECONDARY_STRESS):
+            continue
+        return t
+    return None
+def primary_stress_position(tokens):
+    """
+    Return index of primary stress marker if present, otherwise None.
+    This is a coarse approximation used to compare stress positions between expected and spoken forms.
+    """
+    try:
+        return tokens.index(PRIMARY_STRESS)
+    except ValueError:
+        return None

teacher_feedback_sentences_category.json ADDED Viewed

	@@ -0,0 +1,338 @@

+[
+  {
+    "id": "feedback.mp4_sent_0",
+    "video_file": "feedback.mp4",
+    "start": 0.167,
+    "end": 1.689,
+    "text": "I could not hear your voice.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_1",
+    "video_file": "feedback.mp4",
+    "start": 2.771,
+    "end": 5.735,
+    "text": "Hold the record button and say the word.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_2",
+    "video_file": "feedback.mp4",
+    "start": 6.936,
+    "end": 9.319,
+    "text": "Speak clearly and try again.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_3",
+    "video_file": "feedback.mp4",
+    "start": 9.24,
+    "end": 12.304,
+    "text": "It sounds like a different word.",
+    "category": "wrong_word"
+  },
+  {
+    "id": "feedback.mp4_sent_4",
+    "video_file": "feedback.mp4",
+    "start": 13.305,
+    "end": 15.568,
+    "text": "Please say the correct word again.",
+    "category": "wrong_word"
+  },
+  {
+    "id": "feedback.mp4_sent_5",
+    "video_file": "feedback.mp4",
+    "start": 15.548,
+    "end": 19.153,
+    "text": "Listen to the example and repeat.",
+    "category": "wrong_word"
+  },
+  {
+    "id": "feedback.mp4_sent_6",
+    "video_file": "feedback.mp4",
+    "start": 20.255,
+    "end": 20.956,
+    "text": "Excellent!",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_7",
+    "video_file": "feedback.mp4",
+    "start": 21.456,
+    "end": 23.078,
+    "text": "You said it correctly.",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_8",
+    "video_file": "feedback.mp4",
+    "start": 24.22,
+    "end": 26.463,
+    "text": "Your pronunciation is clear.",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_9",
+    "video_file": "feedback.mp4",
+    "start": 27.585,
+    "end": 28.586,
+    "text": "Great job!",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_10",
+    "video_file": "feedback.mp4",
+    "start": 28.989,
+    "end": 30.631,
+    "text": "Move to the next word",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_11",
+    "video_file": "feedback.mp4",
+    "start": 30.571,
+    "end": 33.514,
+    "text": "Focus on the vowel sound",
+    "category": "vowel"
+  },
+  {
+    "id": "feedback.mp4_sent_12",
+    "video_file": "feedback.mp4",
+    "start": 33.454,
+    "end": 36.717,
+    "text": "Open your mouth a little more",
+    "category": "vowel"
+  },
+  {
+    "id": "feedback.mp4_sent_13",
+    "video_file": "feedback.mp4",
+    "start": 37.998,
+    "end": 40.441,
+    "text": "Say the word slowly once",
+    "category": "vowel"
+  },
+  {
+    "id": "feedback.mp4_sent_14",
+    "video_file": "feedback.mp4",
+    "start": 30.571,
+    "end": 44.105,
+    "text": "Focus on the first sound",
+    "category": "consonant"
+  },
+  {
+    "id": "feedback.mp4_sent_15",
+    "video_file": "feedback.mp4",
+    "start": 44.045,
+    "end": 47.388,
+    "text": "Make the consonant clear",
+    "category": "consonant"
+  },
+  {
+    "id": "feedback.mp4_sent_16",
+    "video_file": "feedback.mp4",
+    "start": 47.328,
+    "end": 50.812,
+    "text": "Repeat the word slowly",
+    "category": "consonant"
+  },
+  {
+    "id": "feedback.mp4_sent_17",
+    "video_file": "feedback.mp4",
+    "start": 50.732,
+    "end": 54.075,
+    "text": "Do not stop early",
+    "category": "ending"
+  },
+  {
+    "id": "feedback.mp4_sent_18",
+    "video_file": "feedback.mp4",
+    "start": 37.998,
+    "end": 57.999,
+    "text": "Say the last sound clearly",
+    "category": "ending"
+  },
+  {
+    "id": "feedback.mp4_sent_19",
+    "video_file": "feedback.mp4",
+    "start": 59.145,
+    "end": 60.647,
+    "text": "Try the word again.",
+    "category": "ending"
+  },
+  {
+    "id": "feedback.mp4_sent_20",
+    "video_file": "feedback.mp4",
+    "start": 60.587,
+    "end": 64.873,
+    "text": "Say the strong part a little louder.",
+    "category": "stress"
+  },
+  {
+    "id": "feedback.mp4_sent_21",
+    "video_file": "feedback.mp4",
+    "start": 64.813,
+    "end": 69.018,
+    "text": "Keep the rest of the word smooth.",
+    "category": "stress"
+  },
+  {
+    "id": "feedback.mp4_sent_22",
+    "video_file": "feedback.mp4",
+    "start": 70.34,
+    "end": 72.863,
+    "text": "Try again with clear stress.",
+    "category": "stress"
+  },
+  {
+    "id": "feedback.mp4_sent_23",
+    "video_file": "feedback.mp4",
+    "start": 74.365,
+    "end": 76.328,
+    "text": "Break the word into parts.",
+    "category": "syllable"
+  },
+  {
+    "id": "feedback.mp4_sent_24",
+    "video_file": "feedback.mp4",
+    "start": 77.73,
+    "end": 80.013,
+    "text": "Say each part slowly.",
+    "category": "syllable"
+  },
+  {
+    "id": "feedback.mp4_sent_25",
+    "video_file": "feedback.mp4",
+    "start": 81.154,
+    "end": 83.177,
+    "text": "Then say the full word.",
+    "category": "syllable"
+  },
+  {
+    "id": "feedback.mp4_sent_26",
+    "video_file": "feedback.mp4",
+    "start": 84.525,
+    "end": 88.95,
+    "text": "Say only the target word without extra words.",
+    "category": "multipleword"
+  },
+  {
+    "id": "feedback.mp4_sent_27",
+    "video_file": "feedback.mp4",
+    "start": 89.311,
+    "end": 92.474,
+    "text": "Use a big a sound at the start.",
+    "category": "apple"
+  },
+  {
+    "id": "feedback.mp4_sent_28",
+    "video_file": "feedback.mp4",
+    "start": 93.275,
+    "end": 95.738,
+    "text": "Apple, not apple.",
+    "category": "apple"
+  },
+  {
+    "id": "feedback.mp4_sent_29",
+    "video_file": "feedback.mp4",
+    "start": 97.079,
+    "end": 103.226,
+    "text": "Open your mouth more for a, like apple, not apple.",
+    "category": "apple"
+  },
+  {
+    "id": "feedback.mp4_sent_30",
+    "video_file": "feedback.mp4",
+    "start": 104.548,
+    "end": 108.372,
+    "text": "Make a short o sound, not o.",
+    "category": "ball"
+  },
+  {
+    "id": "feedback.mp4_sent_31",
+    "video_file": "feedback.mp4",
+    "start": 109.173,
+    "end": 110.775,
+    "text": "Say ball.",
+    "category": "ball"
+  },
+  {
+    "id": "feedback.mp4_sent_32",
+    "video_file": "feedback.mp4",
+    "start": 110.695,
+    "end": 114.159,
+    "text": "Start with a strong b.",
+    "category": "ball"
+  },
+  {
+    "id": "feedback.mp4_sent_33",
+    "video_file": "feedback.mp4",
+    "start": 114.326,
+    "end": 114.987,
+    "text": "sound.",
+    "category": "ball"
+  },
+  {
+    "id": "feedback.mp4_sent_34",
+    "video_file": "feedback.mp4",
+    "start": 116.629,
+    "end": 118.572,
+    "text": "I could not hear your voice.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_35",
+    "video_file": "feedback.mp4",
+    "start": 119.233,
+    "end": 121.837,
+    "text": "Please hold the record button and say the word.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_36",
+    "video_file": "feedback.mp4",
+    "start": 121.777,
+    "end": 124.4,
+    "text": "It was very quiet.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_37",
+    "video_file": "feedback.mp4",
+    "start": 125.081,
+    "end": 127.425,
+    "text": "Speak a little louder and try again.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_38",
+    "video_file": "feedback.mp4",
+    "start": 127.345,
+    "end": 130.329,
+    "text": "No sound was recorded.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_39",
+    "video_file": "feedback.mp4",
+    "start": 130.95,
+    "end": 133.594,
+    "text": "Check your microphone and say the word again.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_40",
+    "video_file": "feedback.mp4",
+    "start": 133.514,
+    "end": 136.177,
+    "text": "I think you whispered.",
+    "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_41",
+    "video_file": "feedback.mp4",
+    "start": 136.958,
+    "end": 139.522,
+    "text": "Use your clear classroom voice.",
+    "category": "silence"
+  }
+]

verification.py CHANGED Viewed

@@ -496,6 +496,8 @@ from findingword import finding_bp
 from listen import listen_bp
 from ragg.app import rag_bp
 from pron import pron_bp
 from ragg.ingest_trigger import ingest_trigger_bp
 app.register_blueprint(movie_bp, url_prefix="/media")
 app.register_blueprint(questions_bp, url_prefix="/media")
@@ -507,6 +509,8 @@ app.register_blueprint(listen_bp, url_prefix="/media")
 app.register_blueprint(rag_bp, url_prefix="/rag")
 app.register_blueprint(ingest_trigger_bp, url_prefix="/rag")
 app.register_blueprint(pron_bp, url_prefix="/pron")
 # app.register_blueprint(questions_bp, url_prefix="/media")  # <-- add this
 # ------------------------------------------------------------------------------
 # Local run (Gunicorn will import `verification:app` on Spaces)

 from listen import listen_bp
 from ragg.app import rag_bp
 from pron import pron_bp
+from pronvideo import pronvideo_bp
+from pronragg import pronragg_bp
 from ragg.ingest_trigger import ingest_trigger_bp
 app.register_blueprint(movie_bp, url_prefix="/media")
 app.register_blueprint(questions_bp, url_prefix="/media")
 app.register_blueprint(rag_bp, url_prefix="/rag")
 app.register_blueprint(ingest_trigger_bp, url_prefix="/rag")
 app.register_blueprint(pron_bp, url_prefix="/pron")
+app.register_blueprint(pronvideo_bp, url_prefix="/pronvideo")
+app.register_blueprint(pronragg_bp, url_prefix="/pronragg")
 # app.register_blueprint(questions_bp, url_prefix="/media")  # <-- add this
 # ------------------------------------------------------------------------------
 # Local run (Gunicorn will import `verification:app` on Spaces)