Spaces:

pykara
/

py-learn-backend

Running

App Files Files Community

Oviya commited on 6 days ago

Commit

d7ebbb4

1 Parent(s): 48fb81d

add pronragupgrade

Browse files

Files changed (13) hide show

chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/data_level0.bin +1 -1
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/header.bin +0 -0
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/length.bin +1 -1
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/link_lists.bin +0 -0
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/data_level0.bin +1 -1
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/header.bin +0 -0
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/length.bin +1 -1
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/link_lists.bin +0 -0
feedback.mp4 +2 -2
pronragupgrade.py +1146 -0
requirements.txt +8 -0
teacher_feedback_sentences_category.json +529 -189
verification.py +2 -0

chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/data_level0.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db8f9dd08c89ad45ed5b37e53fb7096c1f0be75e0c9377baede6add3ae3b97c6
 size 167600

 version https://git-lfs.github.com/spec/v1
+oid sha256:90b564d60a2658c07a41e1133109c1574bb40f6ab674750bba8b8eeb28a08f25
 size 167600

chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/header.bin RENAMED Viewed

File without changes

chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/length.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27571dbe52639d675f7ce26f4bf06ca84d65a2c943ad57727b90871d758a0d4d
 size 400

 version https://git-lfs.github.com/spec/v1
+oid sha256:7171cf84eb030fe5cb580f57a325f57cceb0aed0e55ea95c81d67d4181e1ed81
 size 400

chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 → 1ceaf3a3-30e6-42c4-b515-99a05466da04}/link_lists.bin RENAMED Viewed

File without changes

chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/data_level0.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4f6e0dd5ee483e09b514559e6411fbc53b886ea77d8b25559576d80e4642179
 size 167600

 version https://git-lfs.github.com/spec/v1
+oid sha256:f79deee1f1ed7bc4c1095b45122b981044435dc5d6fbf46d1303b8b3dcf9b9a0
 size 167600

chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/header.bin RENAMED Viewed

File without changes

chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/length.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acc1ab79cf9173b7ffaa20ccc92926b3f13299fc8c1fcc191a99c6a56cb2cebd
 size 400

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8d329104353429c3a4fab240f87e7cba8ac17269bbfe57d26150d03cb34fa0a
 size 400

chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 → 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/link_lists.bin RENAMED Viewed

File without changes

feedback.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65b7295f9c5050cbd16e2c4ad4b314fb891fe74804b3cd033d020e657d567ac7
-size 90917484

 version https://git-lfs.github.com/spec/v1
+oid sha256:d91dd148f4b3bc5f7b4174a4ddd0d5665d123202f442c342f44bc3ffb33a4932
+size 248621925

pronragupgrade.py ADDED Viewed

	@@ -0,0 +1,1146 @@

+import os
+import re
+import torch
+import tempfile
+import subprocess
+import soundfile as sf
+import numpy as np
+import json
+import base64
+import random
+import chromadb
+import eng_to_ipa as ipa
+from flask import Flask, request, jsonify,Blueprint
+from flask_cors import CORS
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+pronragupgrade_bp = Blueprint("pronragupgrade", __name__)
+# ==================================================
+# 1. SETUP & CONFIG
+# ==================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+VIDEO_PATH = os.path.join(BASE_DIR, "feedback.mp4")
+JSON_PATH = os.path.join(BASE_DIR, "teacher_feedback_sentences_category.json")
+CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+MODEL_ID = "moxeeeem/wav2vec2-finetuned-pronunciation-correction"
+print(f"Loading model to {DEVICE}...")
+processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
+model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID).to(DEVICE)
+model.eval()
+# ==================================================
+# 2. CHROMA DB INITIALIZATION
+# ==================================================
+client = chromadb.PersistentClient(path=CHROMA_DIR)
+collection = client.get_or_create_collection("feedback")
+def init_segments():
+    if collection.count() > 0:
+        return
+    if not os.path.exists(JSON_PATH):
+        print(f"Warning: JSON file not found at {JSON_PATH}")
+        # Create more comprehensive dummy data with multiple clips per category
+        dummy_data = [
+            # Syllable category clips
+            {"id": 1, "text": "Let's work on syllable count", "category": "syllable", "start": 0, "end": 5},
+            {"id": 2, "text": "That word has multiple syllables", "category": "syllable", "start": 5, "end": 10},
+            {"id": 3, "text": "Make sure you pronounce all syllables", "category": "syllable", "start": 10, "end": 15},
+            # Ending category clips
+            {"id": 4, "text": "Focus on the ending sound", "category": "ending", "start": 15, "end": 20},
+            {"id": 5, "text": "Don't forget the final consonant", "category": "ending", "start": 20, "end": 25},
+            {"id": 6, "text": "Complete the word properly", "category": "ending", "start": 25, "end": 30},
+            # Vowel category clips
+            {"id": 7, "text": "Let's work on vowel sounds", "category": "vowel", "start": 30, "end": 35},
+            {"id": 8, "text": "The vowel should be clear", "category": "vowel", "start": 35, "end": 40},
+            {"id": 9, "text": "Focus on vowel quality", "category": "vowel", "start": 40, "end": 45},
+            # Consonant category clips
+            {"id": 10, "text": "Articulate consonants clearly", "category": "consonant", "start": 45, "end": 50},
+            {"id": 11, "text": "Consonants should be crisp", "category": "consonant", "start": 50, "end": 55},
+            {"id": 12, "text": "Work on consonant sounds", "category": "consonant", "start": 55, "end": 60},
+            # Stress category clips
+            {"id": 13, "text": "Focus on word stress", "category": "stress", "start": 60, "end": 65},
+            {"id": 14, "text": "Emphasize the correct syllable", "category": "stress", "start": 65, "end": 70},
+            {"id": 15, "text": "Watch your rhythm and stress", "category": "stress", "start": 70, "end": 75},
+            # Success category clips
+            {"id": 16, "text": "Excellent work!", "category": "success", "start": 75, "end": 80},
+            {"id": 17, "text": "Great pronunciation!", "category": "success", "start": 80, "end": 85},
+            {"id": 18, "text": "Keep up the good work!", "category": "success", "start": 85, "end": 90},
+            # Wrong word category clips
+            {"id": 19, "text": "That sounds like a different word", "category": "wrong_word", "start": 90, "end": 95},
+            {"id": 20, "text": "Please say the target word", "category": "wrong_word", "start": 95, "end": 100},
+            # Multiple words category clips
+            {"id": 21, "text": "Say just one word please", "category": "multiple_words", "start": 100, "end": 105},
+            {"id": 22, "text": "Focus on a single word", "category": "multiple_words", "start": 105, "end": 110},
+            # Silence category clips
+            {"id": 23, "text": "I couldn't hear anything", "category": "silence", "start": 110, "end": 115},
+            {"id": 24, "text": "Please speak louder", "category": "silence", "start": 115, "end": 120},
+            # Specific phoneme clips
+            {"id": 25, "text": "For the 'æ' sound like in cat", "category": "vowel", "phoneme": "æ", "start": 120, "end": 125},
+            {"id": 26, "text": "The 'r' should be soft", "category": "consonant", "phoneme": "r", "start": 125, "end": 130},
+            {"id": 27, "text": "The 'ɪ' sound is short", "category": "vowel", "phoneme": "ɪ", "start": 130, "end": 135},
+            {"id": 28, "text": "The 't' should be clear", "category": "consonant", "phoneme": "t", "start": 135, "end": 140},
+        ]
+        for item in dummy_data:
+            meta = {"category": item["category"], "start": item["start"], "end": item["end"]}
+            if "phoneme" in item:
+                meta["phoneme"] = item["phoneme"]
+            collection.add(ids=[str(item["id"])], documents=[item["text"]], metadatas=[meta])
+        print(f"Created {len(dummy_data)} dummy video segments in ChromaDB")
+        return
+    with open(JSON_PATH, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    for item in data:
+        meta = {
+            "category": item["category"],
+            "start": item["start"],
+            "end": item["end"]
+        }
+        if "phoneme" in item:
+            meta["phoneme"] = item["phoneme"]
+        collection.add(
+            ids=[str(item["id"])],
+            documents=[item["text"]],
+            metadatas=[meta]
+        )
+    print(f"Loaded {len(data)} video segments into ChromaDB")
+init_segments()
+# ==================================================
+# 3. UK ENGLISH PRONUNCIATION SYSTEM
+# ==================================================
+# UK Phoneme Sound Database
+UK_PHONEME_DB = {
+    "ɪ": {"name": "KIT vowel", "example": "sit", "tip": "Short front vowel", "type": "vowel"},
+    "iː": {"name": "FLEECE vowel", "example": "see", "tip": "Long front vowel", "type": "vowel"},
+    "ʊ": {"name": "FOOT vowel", "example": "put", "tip": "Short rounded back vowel", "type": "vowel"},
+    "uː": {"name": "GOOSE vowel", "example": "too", "tip": "Long rounded back vowel", "type": "vowel"},
+    "e": {"name": "DRESS vowel", "example": "bed", "tip": "Short mid front vowel", "type": "vowel"},
+    "ə": {"name": "SCHWA", "example": "about", "tip": "Relaxed central vowel", "type": "vowel"},
+    "ɜː": {"name": "NURSE vowel", "example": "bird", "tip": "Long central vowel", "type": "vowel"},
+    "ɔː": {"name": "THOUGHT vowel", "example": "law", "tip": "Long open-mid back vowel", "type": "vowel"},
+    "æ": {"name": "TRAP vowel", "example": "cat", "tip": "Short open front vowel", "type": "vowel"},
+    "ʌ": {"name": "STRUT vowel", "example": "cup", "tip": "Short mid back vowel", "type": "vowel"},
+    "ɑː": {"name": "BATH vowel", "example": "father", "tip": "Long open back vowel", "type": "vowel"},
+    "ɒ": {"name": "LOT vowel", "example": "hot", "tip": "Short open back rounded vowel", "type": "vowel"},
+    "eɪ": {"name": "FACE diphthong", "example": "day", "tip": "Glide from e to ɪ", "type": "diphthong"},
+    "aɪ": {"name": "PRICE diphthong", "example": "eye", "tip": "Glide from a to ɪ", "type": "diphthong"},
+    "ɔɪ": {"name": "CHOICE diphthong", "example": "boy", "tip": "Glide from ɔ to ɪ", "type": "diphthong"},
+    "aʊ": {"name": "MOUTH diphthong", "example": "now", "tip": "Glide from a to ʊ", "type": "diphthong"},
+    "əʊ": {"name": "GOAT diphthong", "example": "go", "tip": "Glide from ə to ʊ", "type": "diphthong"},
+    "p": {"name": "voiceless bilabial plosive", "example": "pen", "tip": "Explosive 'p' sound", "type": "consonant"},
+    "b": {"name": "voiced bilabial plosive", "example": "bad", "tip": "Voiced 'b' with vibration", "type": "consonant"},
+    "t": {"name": "voiceless alveolar plosive", "example": "tea", "tip": "Tongue tip on alveolar ridge", "type": "consonant"},
+    "d": {"name": "voiced alveolar plosive", "example": "did", "tip": "Voiced 'd' with vibration", "type": "consonant"},
+    "k": {"name": "voiceless velar plosive", "example": "cat", "tip": "Back of tongue on soft palate", "type": "consonant"},
+    "ɡ": {"name": "voiced velar plosive", "example": "get", "tip": "Voiced 'g' with vibration", "type": "consonant"},
+    "tʃ": {"name": "voiceless palato-alveolar affricate", "example": "chin", "tip": "Combination of 't' and 'ʃ'", "type": "consonant"},
+    "dʒ": {"name": "voiced palato-alveolar affricate", "example": "jam", "tip": "Combination of 'd' and 'ʒ'", "type": "consonant"},
+    "f": {"name": "voiceless labiodental fricative", "example": "fall", "tip": "Upper teeth on lower lip", "type": "consonant"},
+    "v": {"name": "voiced labiodental fricative", "example": "van", "tip": "Voiced version of 'f'", "type": "consonant"},
+    "θ": {"name": "voiceless dental fricative", "example": "thin", "tip": "Tongue between teeth, no vibration", "type": "consonant"},
+    "ð": {"name": "voiced dental fricative", "example": "then", "tip": "Tongue between teeth, with vibration", "type": "consonant"},
+    "s": {"name": "voiceless alveolar fricative", "example": "see", "tip": "Hissing 's' sound", "type": "consonant"},
+    "z": {"name": "voiced alveolar fricative", "example": "zoo", "tip": "Voiced 'z' sound", "type": "consonant"},
+    "ʃ": {"name": "voiceless palato-alveolar fricative", "example": "she", "tip": "'Sh' sound, tongue raised", "type": "consonant"},
+    "ʒ": {"name": "voiced palato-alveolar fricative", "example": "pleasure", "tip": "Voiced 'zh' sound", "type": "consonant"},
+    "h": {"name": "voiceless glottal fricative", "example": "hot", "tip": "Breathy 'h' from throat", "type": "consonant"},
+    "m": {"name": "bilabial nasal", "example": "man", "tip": "Humming 'm' with lips closed", "type": "consonant"},
+    "n": {"name": "alveolar nasal", "example": "no", "tip": "Tongue on alveolar ridge", "type": "consonant"},
+    "ŋ": {"name": "velar nasal", "example": "sing", "tip": "'Ng' sound, back of tongue up", "type": "consonant"},
+    "l": {"name": "alveolar lateral approximant", "example": "let", "tip": "Tongue tip on alveolar ridge", "type": "consonant"},
+    "r": {"name": "alveolar approximant", "example": "red", "tip": "UK 'r' is soft", "type": "consonant"},
+    "j": {"name": "palatal approximant", "example": "yes", "tip": "'Y' sound", "type": "consonant"},
+    "w": {"name": "labio-velar approximant", "example": "we", "tip": "Round lips", "type": "consonant"},
+}
+# Common words with syllable info
+COMMON_UK_WORDS = {
+    "rabbit": {"phonemes": ["r", "æ", "b", "ɪ", "t"], "syllables": 2, "stress": "first"},
+    "dog": {"phonemes": ["d", "ɒ", "ɡ"], "syllables": 1, "stress": "only"},
+    "cat": {"phonemes": ["k", "æ", "t"], "syllables": 1, "stress": "only"},
+    "water": {"phonemes": ["w", "ɔː", "t", "ə"], "syllables": 2, "stress": "first"},
+    "hello": {"phonemes": ["h", "ɛ", "l", "əʊ"], "syllables": 2, "stress": "second"},
+    "banana": {"phonemes": ["b", "ə", "n", "ɑː", "n", "ə"], "syllables": 3, "stress": "second"},
+    "computer": {"phonemes": ["k", "ə", "m", "p", "j", "uː", "t", "ə"], "syllables": 3, "stress": "second"},
+    "elephant": {"phonemes": ["ɛ", "l", "ɪ", "f", "ə", "n", "t"], "syllables": 3, "stress": "first"},
+}
+def get_uk_pronunciation(word):
+    """Get UK pronunciation with syllable info."""
+    word_lower = word.lower().strip()
+    if word_lower in COMMON_UK_WORDS:
+        return COMMON_UK_WORDS[word_lower]["phonemes"]
+    try:
+        ipa_str = ipa.convert(word)
+        clean_ipa = re.sub(r'[ˈˌː]', '', ipa_str)
+        phonemes = []
+        i = 0
+        while i < len(clean_ipa):
+            if i + 1 < len(clean_ipa):
+                two_char = clean_ipa[i:i+2]
+                if two_char in ['eɪ', 'aɪ', 'ɔɪ', 'aʊ', 'əʊ', 'tʃ', 'dʒ']:
+                    phonemes.append(two_char)
+                    i += 2
+                    continue
+            phonemes.append(clean_ipa[i])
+            i += 1
+        return phonemes
+    except Exception as e:
+        print(f"Error getting IPA for {word}: {e}")
+        if word_lower == "rabbit":
+            return ["r", "æ", "b", "ɪ", "t"]
+        phonemes = []
+        for char in word_lower:
+            if char in 'aeiou':
+                vowel_map = {'a': 'æ', 'e': 'ɛ', 'i': 'ɪ', 'o': 'ɒ', 'u': 'ʌ'}
+                phonemes.append(vowel_map.get(char, char))
+            elif char == 'g':
+                phonemes.append('ɡ')
+            else:
+                phonemes.append(char)
+        return phonemes
+def get_word_info(word):
+    """Get syllable and stress info for a word."""
+    word_lower = word.lower().strip()
+    if word_lower in COMMON_UK_WORDS:
+        return {
+            "syllables": COMMON_UK_WORDS[word_lower]["syllables"],
+            "stress": COMMON_UK_WORDS[word_lower]["stress"]
+        }
+    phonemes = get_uk_pronunciation(word)
+    vowel_count = sum(1 for p in phonemes
+                     if UK_PHONEME_DB.get(p, {}).get('type') in ['vowel', 'diphthong'])
+    if vowel_count == 1:
+        stress = "only"
+    elif vowel_count == 2:
+        stress = "first"
+    else:
+        stress = "second"
+    return {
+        "syllables": vowel_count,
+        "stress": stress
+    }
+# ==================================================
+# 4. CORRECTED PHONEME ANALYSIS
+# ==================================================
+def is_exact_phoneme_match(ref, stu):
+    """STRICT matching for accurate scoring."""
+    if not stu:
+        return False
+    ref_norm = ref.replace('ː', '')
+    stu_norm = stu.replace('ː', '')
+    if ref_norm == stu_norm:
+        return True
+    uk_variations = {
+        'ɒ': ['ɔ'], 'ɔ': ['ɒ'],
+        'ɪ': ['i'], 'ɛ': ['e'],
+        'ɡ': ['g'], 'æ': ['a'],
+    }
+    if ref_norm in uk_variations and stu_norm in uk_variations[ref_norm]:
+        return 0.5
+    return False
+def analyze_pronunciation_strict(student_phonemes, reference_phonemes, word):
+    """STRICT analysis."""
+    if not student_phonemes:
+        return {
+            "score": 0,
+            "errors": [],
+            "exact_correct": 0,
+            "partial_correct": 0,
+            "total_expected": len(reference_phonemes) if reference_phonemes else 0,
+            "accuracy_percentage": 0,
+        }
+    min_len = min(len(student_phonemes), len(reference_phonemes))
+    exact_correct = 0
+    partial_correct = 0
+    errors = []
+    for i in range(min_len):
+        ref = reference_phonemes[i]
+        stu = student_phonemes[i]
+        match_result = is_exact_phoneme_match(ref, stu)
+        if match_result == True:
+            exact_correct += 1
+        elif match_result == 0.5:
+            partial_correct += 0.5
+        else:
+            errors.append({
+                "position": i + 1,
+                "expected": ref,
+                "said": stu,
+                "type": UK_PHONEME_DB.get(ref, {}).get("type", "unknown"),
+            })
+    total_expected = len(reference_phonemes) if reference_phonemes else 0
+    if total_expected == 0:
+        score = 0
+    else:
+        base_score = (exact_correct + partial_correct) / total_expected * 100
+        if len(student_phonemes) < len(reference_phonemes):
+            missing_penalty = (len(reference_phonemes) - len(student_phonemes)) / len(reference_phonemes) * 30
+            base_score = max(0, base_score - missing_penalty)
+        if len(student_phonemes) > len(reference_phonemes):
+            extra_penalty = (len(student_phonemes) - len(reference_phonemes)) / len(reference_phonemes) * 20
+            base_score = max(0, base_score - extra_penalty)
+        score = round(max(0, min(100, base_score)), 1)
+    accuracy_percentage = round((exact_correct + partial_correct) / total_expected * 100, 1) if total_expected > 0 else 0
+    return {
+        "score": score,
+        "errors": errors,
+        "exact_correct": exact_correct,
+        "partial_correct": partial_correct,
+        "total_expected": total_expected,
+        "accuracy_percentage": accuracy_percentage,
+    }
+# ==================================================
+# 5. SCENARIO DETECTION
+# ==================================================
+class ScenarioDetector:
+    """Scenario detection with correct priorities."""
+    SCENARIO_PRIORITIES = [
+        'silence',
+        'multiple_words',
+        'wrong_word',
+        'syllable',
+        'ending',
+        'vowel',
+        'consonant',
+        'stress',
+        'success',
+    ]
+    @staticmethod
+    def detect_silence(student_phonemes, audio_error=None):
+        if audio_error:
+            error_lower = audio_error.lower()
+            if any(x in error_lower for x in ['silence', 'quiet', 'empty']):
+                return {
+                    'scenario': 'silence',
+                    'category': 'silence',
+                    'confidence': 1.0,
+                    'feedback': "I couldn't hear anything. Please speak louder.",
+                    'action': "increase_volume"
+                }
+        if not student_phonemes or len(student_phonemes) == 0:
+            return {
+                'scenario': 'silence',
+                'category': 'silence',
+                'confidence': 0.9,
+                'feedback': "No speech detected.",
+                'action': "check_microphone"
+            }
+        return None
+    @staticmethod
+    def detect_multiple_words(student_phonemes, reference_phonemes):
+        if not student_phonemes:
+            return None
+        if len(student_phonemes) > len(reference_phonemes) * 2:
+            return {
+                'scenario': 'multiple_words',
+                'category': 'multiple_words',
+                'confidence': 0.8,
+                'feedback': "I heard multiple words. Please say only one word.",
+                'action': "speak_single_word"
+            }
+        return None
+    @staticmethod
+    def detect_wrong_word(student_phonemes, reference_phonemes, word):
+        if not student_phonemes or not reference_phonemes:
+            return None
+        min_len = min(len(student_phonemes), len(reference_phonemes))
+        if min_len == 0:
+            return None
+        matches = 0
+        for i in range(min_len):
+            ref = reference_phonemes[i]
+            stu = student_phonemes[i]
+            if is_exact_phoneme_match(ref, stu):
+                matches += 1
+        similarity = matches / len(reference_phonemes) if len(reference_phonemes) > 0 else 0
+        if similarity < 0.3:
+            return {
+                'scenario': 'wrong_word',
+                'category': 'wrong_word',
+                'confidence': 0.9,
+                'feedback': f"That doesn't sound like '{word}'.",
+                'action': "repeat_target_word"
+            }
+        return None
+    @staticmethod
+    def detect_syllable_issues(student_phonemes, reference_phonemes, word):
+        if not student_phonemes or not reference_phonemes:
+            return None
+        word_info = get_word_info(word)
+        ref_syllables = word_info["syllables"]
+        stu_vowels = sum(1 for p in student_phonemes
+                        if UK_PHONEME_DB.get(p, {}).get('type') in ['vowel', 'diphthong'])
+        if stu_vowels == 0 and len(student_phonemes) > 0:
+            return {
+                'scenario': 'syllable',
+                'category': 'syllable',
+                'confidence': 0.9,
+                'feedback': f"Missing vowel sounds. '{word}' needs vowel pronunciation.",
+                'action': "add_vowel_sounds"
+            }
+        if ref_syllables >= 2 and abs(stu_vowels - ref_syllables) >= 1:
+            missing_count = len(reference_phonemes) - len(student_phonemes)
+            if missing_count >= 2 and stu_vowels < ref_syllables:
+                return {
+                    'scenario': 'syllable',
+                    'category': 'syllable',
+                    'confidence': 0.8,
+                    'feedback': f"'{word}' has {ref_syllables} syllable(s). You're missing a syllable.",
+                    'action': "add_syllables"
+                }
+            elif stu_vowels > ref_syllables:
+                return {
+                    'scenario': 'syllable',
+                    'category': 'syllable',
+                    'confidence': 0.7,
+                    'feedback': f"'{word}' has {ref_syllables} syllable(s). You added extra sounds.",
+                    'action': "reduce_syllables"
+                }
+        return None
+    @staticmethod
+    def detect_ending_issues(student_phonemes, reference_phonemes):
+        if not student_phonemes or not reference_phonemes:
+            return None
+        if len(student_phonemes) < len(reference_phonemes):
+            missing_count = len(reference_phonemes) - len(student_phonemes)
+            if missing_count == 1:
+                missing_sound = reference_phonemes[-1]
+                return {
+                    'scenario': 'ending',
+                    'category': 'ending',
+                    'confidence': 0.8,
+                    'feedback': f"You're missing the final sound: '{missing_sound}'.",
+                    'action': "complete_ending",
+                    'target_phoneme': missing_sound
+                }
+            elif missing_count > 1:
+                missing_part = reference_phonemes[-missing_count:]
+                missing_vowels = sum(1 for p in missing_part
+                                   if UK_PHONEME_DB.get(p, {}).get('type') in ['vowel', 'diphthong'])
+                if missing_vowels == 0:
+                    return {
+                        'scenario': 'ending',
+                        'category': 'ending',
+                        'confidence': 0.7,
+                        'feedback': f"You're missing the ending: '{''.join(missing_part)}'.",
+                        'action': "complete_ending"
+                    }
+        if len(student_phonemes) >= 1 and len(reference_phonemes) >= 1:
+            final_stu = student_phonemes[-1]
+            final_ref = reference_phonemes[-1]
+            if not is_exact_phoneme_match(final_ref, final_stu):
+                return {
+                    'scenario': 'ending',
+                    'category': 'ending',
+                    'confidence': 0.7,
+                    'feedback': f"Final sound should be '{final_ref}' not '{final_stu}'.",
+                    'action': "correct_final_sound",
+                    'target_phoneme': final_ref
+                }
+        return None
+    @staticmethod
+    def detect_vowel_issues(student_phonemes, reference_phonemes):
+        if not student_phonemes or not reference_phonemes:
+            return None
+        vowel_errors = []
+        min_len = min(len(student_phonemes), len(reference_phonemes))
+        for i in range(min_len):
+            ref = reference_phonemes[i]
+            stu = student_phonemes[i]
+            ref_info = UK_PHONEME_DB.get(ref, {})
+            if ref_info.get('type') in ['vowel', 'diphthong']:
+                if not is_exact_phoneme_match(ref, stu):
+                    vowel_errors.append({
+                        'position': i + 1,
+                        'expected': ref,
+                        'actual': stu,
+                        'tip': f"Use {ref} sound",
+                    })
+        if vowel_errors:
+            primary = vowel_errors[0]
+            return {
+                'scenario': 'vowel',
+                'category': 'vowel',
+                'confidence': 0.9,
+                'feedback': f"Vowel issue: {primary['tip']}",
+                'action': "adjust_vowel",
+                'target_phoneme': primary['expected']
+            }
+        return None
+    @staticmethod
+    def detect_consonant_issues(student_phonemes, reference_phonemes):
+        if not student_phonemes or not reference_phonemes:
+            return None
+        consonant_errors = []
+        min_len = min(len(student_phonemes), len(reference_phonemes))
+        for i in range(min_len):
+            ref = reference_phonemes[i]
+            stu = student_phonemes[i]
+            ref_info = UK_PHONEME_DB.get(ref, {})
+            if ref_info.get('type') == 'consonant':
+                if not is_exact_phoneme_match(ref, stu):
+                    consonant_errors.append({
+                        'position': i + 1,
+                        'expected': ref,
+                        'actual': stu,
+                        'tip': ref_info.get('tip', f'Articulate {ref} clearly'),
+                    })
+        if consonant_errors:
+            primary = consonant_errors[0]
+            return {
+                'scenario': 'consonant',
+                'category': 'consonant',
+                'confidence': 0.8,
+                'feedback': f"Consonant: {primary['tip']}",
+                'action': "articulate_consonant",
+                'target_phoneme': primary['expected']
+            }
+        return None
+    @staticmethod
+    def detect_stress_issues(student_phonemes, reference_phonemes, word):
+        if not student_phonemes or not reference_phonemes:
+            return None
+        word_info = get_word_info(word)
+        if word_info["syllables"] < 2:
+            return None
+        correct_count = 0
+        min_len = min(len(student_phonemes), len(reference_phonemes))
+        for i in range(min_len):
+            if is_exact_phoneme_match(reference_phonemes[i], student_phonemes[i]):
+                correct_count += 1
+        accuracy = correct_count / len(reference_phonemes) if len(reference_phonemes) > 0 else 0
+        if accuracy >= 0.8 and word_info["syllables"] >= 2:
+            stress_pattern = {
+                "first": "first syllable",
+                "second": "second syllable",
+                "third": "third syllable"
+            }.get(word_info["stress"], "correct syllable")
+            return {
+                'scenario': 'stress',
+                'category': 'stress',
+                'confidence': 0.6,
+                'feedback': f"For '{word}', emphasize the {stress_pattern}.",
+                'action': "practice_stress"
+            }
+        return None
+    @staticmethod
+    def detect_success(analysis_result, score):
+        if not analysis_result:
+            return None
+        if score >= 95:
+            return {
+                'scenario': 'success',
+                'category': 'success',
+                'confidence': 1.0,
+                'feedback': "Excellent pronunciation! Perfect! 🎉",
+                'action': "continue_excellent_work"
+            }
+        elif score >= 85:
+            return {
+                'scenario': 'success',
+                'category': 'success',
+                'confidence': 0.9,
+                'feedback': "Very good pronunciation!",
+                'action': "refine_pronunciation"
+            }
+        elif score >= 75:
+            return {
+                'scenario': 'success',
+                'category': 'success',
+                'confidence': 0.8,
+                'feedback': "Good pronunciation! Keep practicing.",
+                'action': "practice_more"
+            }
+        return None
+    @classmethod
+    def detect_scenarios(cls, student_phonemes, reference_phonemes, word, analysis_result, audio_error=None):
+        score = analysis_result.get('score', 0) if analysis_result else 0
+        detectors = [
+            ('silence', lambda: cls.detect_silence(student_phonemes, audio_error)),
+            ('multiple_words', lambda: cls.detect_multiple_words(student_phonemes, reference_phonemes)),
+            ('wrong_word', lambda: cls.detect_wrong_word(student_phonemes, reference_phonemes, word)),
+            ('syllable', lambda: cls.detect_syllable_issues(student_phonemes, reference_phonemes, word)),
+            ('vowel', lambda: cls.detect_vowel_issues(student_phonemes, reference_phonemes)),
+            ('consonant', lambda: cls.detect_consonant_issues(student_phonemes, reference_phonemes)),
+            ('ending', lambda: cls.detect_ending_issues(student_phonemes, reference_phonemes)),
+            ('stress', lambda: cls.detect_stress_issues(student_phonemes, reference_phonemes, word)),
+            ('success', lambda: cls.detect_success(analysis_result, score)),
+        ]
+        for scenario_name, detector_func in detectors:
+            result = detector_func()
+            if result:
+                if scenario_name == 'success' and score < 75:
+                    continue
+                return result
+        return {
+            'scenario': 'needs_improvement',
+            'category': 'general',
+            'confidence': 0.5,
+            'feedback': "Pronunciation needs improvement.",
+            'action': "practice_sounds"
+        }
+# ==================================================
+# 6. IMPROVED VIDEO RAG BUILDER - MERGES MULTIPLE PORTIONS
+# ==================================================
+# ==================================================
+# 6. IMPROVED VIDEO RAG BUILDER - SMART SELECTION
+# ==================================================
+def build_feedback_video(category, feedback_message, target_phoneme=None, student_errors=None):
+    """
+    Build feedback video with ordered, dynamic selection:
+    - success: [praise] -> [move-to-next]
+    - vowel: [specific phoneme] -> [one general]
+    - consonant: [specific phoneme] -> [one general]
+    - other categories: keep balanced/general strategies as before (2–3 clips)
+    Returns:
+        Base64 encoded video string with multiple merged clips
+    """
+    print(f"\n=== Building video for: {category} ===")
+    print(f"Target phoneme: {target_phoneme}")
+    print(f"Student errors: {student_errors}")
+    # Extract target phoneme from errors if not provided
+    if not target_phoneme and student_errors:
+        for error in student_errors:
+            if error.get("type") in ["vowel", "diphthong", "consonant"]:
+                target_phoneme = error.get("expected")
+                if target_phoneme:
+                    print(f"Extracted target phoneme from errors: {target_phoneme}")
+                    break
+    # Extract target phoneme from feedback if present
+    if not target_phoneme:
+        m = re.search(r"'([^']+)'", feedback_message)
+        target_phoneme = m.group(1) if m else None
+        if target_phoneme:
+            print(f"Extracted target phoneme from feedback: {target_phoneme}")
+    selected_metadatas = []
+    try:
+        # Pull category clips
+        gen_results = collection.get(where={"category": category})
+        if not gen_results or not gen_results.get('metadatas'):
+            print(f"No clips found for category: {category}")
+            return ""
+        metadatas = gen_results['metadatas']
+        documents = gen_results.get('documents', [])
+        # Safe zip in case of mismatch
+        items = []
+        for idx, meta in enumerate(metadatas):
+            text = documents[idx] if idx < len(documents) else ""
+            items.append({"meta": meta, "text": text})
+        # Split generic vs specific (for vowel/consonant)
+        generic_clips = []
+        specific_clips = []  # list of tuples (meta, phoneme)
+        for it in items:
+            meta = it["meta"]
+            clip_phoneme = meta.get("phoneme")
+            if clip_phoneme:
+                specific_clips.append((meta, clip_phoneme))
+            else:
+                # attach text for success/vowel/consonant classification later
+                meta_copy = dict(meta)
+                meta_copy["_text"] = it["text"]
+                generic_clips.append(meta_copy)
+        print(f"Found {len(generic_clips)} generic clips, {len(specific_clips)} specific clips")
+        # Special ordering rules
+        if category == "success":
+            # First: praise message, then: move-next message (both random, dynamic)
+            praise_keywords = ["good", "great", "perfect", "excellent", "well done", "nice", "clear"]
+            next_keywords = ["next", "move"]
+            # Build pools from generic success clips using text
+            praise_pool = [m for m in generic_clips if any(k in m.get("_text", "").lower() for k in praise_keywords)]
+            next_pool = [m for m in generic_clips if any(k in m.get("_text", "").lower() for k in next_keywords)]
+            print(f"Success classification: praise={len(praise_pool)} next={len(next_pool)}")
+            # Pick first (praise) randomly
+            first_clip = random.choice(praise_pool) if praise_pool else (random.choice(generic_clips) if generic_clips else None)
+            # Pick second (move-next) randomly and ensure different from first
+            if next_pool:
+                next_candidates = [m for m in next_pool if f"{m.get('start')}_{m.get('end')}" != f"{first_clip.get('start')}_{first_clip.get('end')}" ] if first_clip else next_pool
+                second_clip = random.choice(next_candidates) if next_candidates else None
+            else:
+                # Fallback: pick any other success generic clip different from first
+                alt_candidates = [m for m in generic_clips if f"{m.get('start')}_{m.get('end')}" != f"{first_clip.get('start')}_{first_clip.get('end')}" ] if first_clip else generic_clips
+                second_clip = random.choice(alt_candidates) if len(alt_candidates) > 0 else None
+            selected_metadatas.clear()
+            if first_clip:
+                selected_metadatas.append(first_clip)
+            if second_clip:
+                selected_metadatas.append(second_clip)
+        elif category in ["vowel", "consonant"]:
+            # Specific first, then exactly one general
+            specific_found = False
+            # 1) exact phoneme
+            if target_phoneme:
+                for meta, clip_phoneme in specific_clips:
+                    if clip_phoneme == target_phoneme:
+                        selected_metadatas.append(meta)
+                        specific_found = True
+                        print(f"✓ Selected specific {category} clip for phoneme: {target_phoneme}")
+                        break
+            # 2) related fallback (mostly for vowels)
+            if not specific_found and target_phoneme and category == "vowel":
+                vowel_groups = {
+                    'ɪ': ['iː', 'i'], 'iː': ['ɪ', 'i'],
+                    'æ': ['a', 'ɑː'], 'ɑː': ['æ', 'a'],
+                    'ʊ': ['uː', 'u'], 'uː': ['ʊ', 'u'],
+                    'ɒ': ['ɔ', 'ɔː'], 'ɔː': ['ɒ', 'ɔ'],
+                }
+                related_phonemes = vowel_groups.get(target_phoneme, [])
+                for meta, clip_phoneme in specific_clips:
+                    if clip_phoneme in related_phonemes:
+                        selected_metadatas.append(meta)
+                        specific_found = True
+                        print(f"✓ Selected related vowel clip: {clip_phoneme} for target {target_phoneme}")
+                        break
+            # 3) If still not found and we have any specific clip with same category, prefer one that exists
+            if not specific_found and specific_clips:
+                fallback_meta, fallback_ph = random.choice(specific_clips)
+                selected_metadatas.append(fallback_meta)
+                specific_found = True
+                print(f"✓ Fallback to available specific {category} clip: {fallback_ph}")
+            # Then exactly one general
+            if generic_clips:
+                general_choice = random.choice(generic_clips)
+                selected_metadatas.append(general_choice)
+                print("✓ Added one general clip after specific")
+            # Note: If no generic and only specific found, we keep only one clip.
+            # If no specific and generic exists, we keep one general clip (as requested “only one general”).
+        else:
+            # Keep existing smart strategy for other categories
+            selection_strategy = "balanced"
+            if category in ["syllable", "ending", "stress"]:
+                selection_strategy = "general_focus"
+            print(f"Using selection strategy: {selection_strategy}")
+            if selection_strategy == "general_focus":
+                if generic_clips:
+                    selected_generic = random.sample(generic_clips, min(2, len(generic_clips)))
+                    selected_metadatas.extend(selected_generic)
+                # Add a specific if relevant and space remains
+                if target_phoneme and len(selected_metadatas) < 3:
+                    for meta, clip_phoneme in specific_clips:
+                        if clip_phoneme == target_phoneme:
+                            selected_metadatas.append(meta)
+                            print(f"✓ Added specific clip for: {target_phoneme}")
+                            break
+            else:
+                # balanced
+                if generic_clips:
+                    selected_metadatas.append(random.choice(generic_clips))
+                if target_phoneme:
+                    for meta, clip_phoneme in specific_clips:
+                        if clip_phoneme == target_phoneme:
+                            selected_metadatas.append(meta)
+                            print(f"✓ Selected specific clip for: {target_phoneme}")
+                            break
+                # Fill with additional generic if needed
+                if len(selected_metadatas) < 2 and generic_clips:
+                    remaining = [c for c in generic_clips if c not in selected_metadatas]
+                    if remaining:
+                        selected_metadatas.append(random.choice(remaining))
+        # Remove duplicates while preserving order
+        unique_metadatas = []
+        seen = set()
+        for meta in selected_metadatas:
+            key = f"{meta.get('start')}_{meta.get('end')}"
+            if key not in seen:
+                seen.add(key)
+                unique_metadatas.append(meta)
+        selected_metadatas = unique_metadatas
+        # Ensure minimum clips but DO NOT violate vowel/consonant rule (only one general)
+        if category not in ["vowel", "consonant"]:
+            if len(selected_metadatas) < 2 and generic_clips:
+                needed = 2 - len(selected_metadatas)
+                remaining = [c for c in generic_clips if c not in selected_metadatas]
+                if remaining:
+                    selected_metadatas.extend(random.sample(remaining, min(needed, len(remaining))))
+        if len(selected_metadatas) == 0:
+            print("No clips selected after filtering.")
+            return ""
+        print(f"Selected {len(selected_metadatas)} video clips:")
+        for i, meta in enumerate(selected_metadatas):
+            phoneme = meta.get('phoneme', 'generic')
+            print(f"  Clip {i+1}: {meta.get('category')} - {phoneme} [{meta.get('start')}->{meta.get('end')}]")
+        # --- FFmpeg Processing ---
+        if not os.path.exists(VIDEO_PATH):
+            print(f"Video file not found: {VIDEO_PATH}")
+            return ""
+        clips = []
+        concat_file = None
+        final_video_path = None
+        try:
+            # Extract individual clips
+            for i, seg in enumerate(selected_metadatas):
+                tmp_clip = tempfile.NamedTemporaryFile(delete=False, suffix=f"_{i}.mp4")
+                tmp_clip.close()
+                # Extract segment
+                subprocess.run([
+                    "ffmpeg", "-y", "-ss", str(seg["start"]), "-to", str(seg["end"]),
+                    "-i", VIDEO_PATH, "-c:v", "libx264", "-preset", "ultrafast",
+                    "-crf", "28", "-c:a", "aac", tmp_clip.name
+                ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                clips.append(tmp_clip.name)
+            # Create concat list
+            concat_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w")
+            for clip_path in clips:
+                concat_file.write(f"file '{os.path.abspath(clip_path)}'\n")
+            concat_file.close()
+            # Create final video
+            final_video_path = tempfile.NamedTemporaryFile(delete=False, suffix="_final.mp4")
+            final_video_path.close()
+            # Concatenate
+            subprocess.run([
+                "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_file.name,
+                "-c", "copy", final_video_path.name
+            ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+            # Encode to Base64
+            with open(final_video_path.name, "rb") as f:
+                v_data = base64.b64encode(f.read()).decode()
+            print(f"✓ Successfully merged {len(clips)} video clips")
+            return v_data
+        except Exception as e:
+            print(f"✗ Video concatenation error: {e}")
+            return ""
+        finally:
+            # Cleanup
+            if concat_file and os.path.exists(concat_file.name):
+                os.remove(concat_file.name)
+            if final_video_path and os.path.exists(final_video_path.name):
+                os.remove(final_video_path.name)
+            for c in clips:
+                if os.path.exists(c):
+                    os.remove(c)
+    except Exception as e:
+        print(f"✗ Video generation error: {e}")
+        return ""
+# ==================================================
+# 7. AUDIO PROCESSING
+# ==================================================
+def process_audio_file(audio_path):
+    """Process audio file."""
+    try:
+        wav_path = audio_path.replace('.webm', '.wav')
+        subprocess.run([
+            "ffmpeg", "-y", "-i", audio_path,
+            "-ac", "1", "-ar", "16000",
+            "-acodec", "pcm_s16le",
+            wav_path
+        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        speech, sample_rate = sf.read(wav_path)
+        if len(speech) == 0:
+            return None, "empty_audio"
+        rms = np.sqrt(np.mean(speech**2))
+        peak = np.max(np.abs(speech))
+        if rms < 0.001 or peak < 0.02:
+            return None, f"silent_rms_{rms:.6f}_peak_{peak:.4f}"
+        if peak < 0.5:
+            boost_factor = 0.5 / peak if peak > 0 else 1.0
+            speech = speech * min(boost_factor, 3.0)
+        inputs = processor(speech, sampling_rate=sample_rate,
+                          return_tensors="pt", padding=True)
+        with torch.no_grad():
+            logits = model(inputs.input_values.to(DEVICE)).logits
+        pred_ids = torch.argmax(logits, dim=-1)
+        raw_transcription = processor.batch_decode(pred_ids)[0]
+        phonemes = [p for p in raw_transcription.replace(" ", "") if p.strip()]
+        print(f"Extracted phonemes: {phonemes}")
+        return phonemes, None
+    except Exception as e:
+        print(f"Audio processing error: {str(e)}")
+        return None, f"error: {str(e)}"
+# ==================================================
+# 8. TEST VIDEO GENERATION
+# ==================================================
+def test_video_generation():
+    """Test that video generation merges multiple clips."""
+    print("\n=== TESTING VIDEO GENERATION ===")
+    test_cases = [
+        {"category": "syllable", "feedback": "Syllable issue", "target_phoneme": None},
+        {"category": "vowel", "feedback": "Vowel issue for 'æ'", "target_phoneme": "æ"},
+        {"category": "consonant", "feedback": "Consonant issue for 'r'", "target_phoneme": "r"},
+        {"category": "ending", "feedback": "Missing final 't'", "target_phoneme": "t"},
+    ]
+    for test in test_cases:
+        print(f"\nTesting category: {test['category']}")
+        video_blob = build_feedback_video(
+            test['category'],
+            test['feedback'],
+            test['target_phoneme']
+        )
+        if video_blob:
+            print(f"✓ Video generated successfully ({len(video_blob)} bytes)")
+            print(f"  Contains multiple merged clips")
+        else:
+            print(f"✗ Failed to generate video")
+        # Also test with just the feedback message
+        video_blob2 = build_feedback_video(
+            test['category'],
+            test['feedback']
+        )
+        if video_blob2:
+            print(f"✓ Video also works without explicit target phoneme")
+    print("\n" + "="*60)
+# ==================================================
+# 9. MAIN ENDPOINT
+# ==================================================
+@pronragupgrade_bp.route("/score", methods=["POST"])
+def train_pronunciation():
+    """Main endpoint with multi-clip video feedback."""
+    try:
+        word = request.form.get('word', '').strip().lower()
+        if not word:
+            return jsonify({
+                "success": False,
+                "error": "No word provided",
+                "scenario": "input_error"
+            }), 400
+        if 'audio' not in request.files:
+            return jsonify({
+                "success": False,
+                "error": "No audio file",
+                "scenario": "input_error"
+            }), 400
+        audio_file = request.files['audio']
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as tmp_file:
+            audio_file.save(tmp_file.name)
+            temp_path = tmp_file.name
+        print(f"\n=== Processing: '{word}' ===")
+        try:
+            # Process audio
+            student_phonemes, audio_error = process_audio_file(temp_path)
+            # Get reference
+            reference_phonemes = get_uk_pronunciation(word)
+            # Analyze
+            analysis = analyze_pronunciation_strict(student_phonemes, reference_phonemes, word)
+            score = analysis["score"]
+            # Detect scenario
+            scenario_info = ScenarioDetector.detect_scenarios(
+                student_phonemes=student_phonemes,
+                reference_phonemes=reference_phonemes,
+                word=word,
+                analysis_result=analysis,
+                audio_error=audio_error
+            )
+            scenario = scenario_info['scenario']
+            category = scenario_info.get('category', scenario)
+            feedback = scenario_info['feedback']
+            action = scenario_info.get('action', '')
+            target_phoneme = scenario_info.get('target_phoneme')
+            # Generate video with MULTIPLE clips
+            print(f"Generating video for category: {category}")
+            video_blob = build_feedback_video(category, feedback, target_phoneme)
+            # Prepare response
+            response = {
+                "success": True,
+                "scenario": scenario,
+                "score": score,
+                "is_acceptable": score >= 75,
+                "word": word,
+                "student_phonemes": student_phonemes if student_phonemes else [],
+                "reference_phonemes": reference_phonemes,
+                "ipa_notation": "/" + "".join(reference_phonemes) + "/",
+                "feedback": feedback,
+                "action_suggestion": action,
+                "videoBlobBase64": video_blob if video_blob else "",
+                "video_clips_merged": True if video_blob else False,
+                "analysis": {
+                    "accuracy": f"{analysis.get('exact_correct', 0)}/{analysis.get('total_expected', 0)} exact matches",
+                    "accuracy_percentage": analysis.get('accuracy_percentage', 0),
+                }
+            }
+            return jsonify(response)
+        finally:
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
+                wav_path = temp_path.replace('.webm', '.wav')
+                if os.path.exists(wav_path):
+                    os.remove(wav_path)
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return jsonify({
+            "success": False,
+            "error": str(e),
+            "scenario": "system_error"
+        }), 500

requirements.txt CHANGED Viewed

@@ -53,3 +53,11 @@ openai-whisper
 rapidfuzz==3.6.1
 faster-whisper

 rapidfuzz==3.6.1
 faster-whisper
+numpy
+soundfile
+eng-to-ipa
+torch
+torchaudio
+torchcodec

teacher_feedback_sentences_category.json CHANGED Viewed

@@ -1,338 +1,678 @@
 [
   {
-    "id": "feedback.mp4_sent_0",
     "video_file": "feedback.mp4",
-    "start": 0.167,
-    "end": 1.689,
-    "text": "I could not hear your voice.",
-    "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_1",
     "video_file": "feedback.mp4",
-    "start": 2.771,
-    "end": 5.735,
-    "text": "Hold the record button and say the word.",
-    "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_2",
     "video_file": "feedback.mp4",
-    "start": 6.936,
-    "end": 9.319,
-    "text": "Speak clearly and try again.",
-    "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_3",
     "video_file": "feedback.mp4",
-    "start": 9.24,
-    "end": 12.304,
-    "text": "It sounds like a different word.",
     "category": "wrong_word"
   },
   {
-    "id": "feedback.mp4_sent_4",
     "video_file": "feedback.mp4",
-    "start": 13.305,
-    "end": 15.568,
-    "text": "Please say the correct word again.",
     "category": "wrong_word"
   },
   {
-    "id": "feedback.mp4_sent_5",
     "video_file": "feedback.mp4",
-    "start": 15.548,
-    "end": 19.153,
-    "text": "Listen to the example and repeat.",
     "category": "wrong_word"
   },
   {
-    "id": "feedback.mp4_sent_6",
     "video_file": "feedback.mp4",
-    "start": 20.255,
-    "end": 20.956,
-    "text": "Excellent!",
-    "category": "success"
   },
   {
-    "id": "feedback.mp4_sent_7",
     "video_file": "feedback.mp4",
-    "start": 21.456,
-    "end": 23.078,
-    "text": "You said it correctly.",
-    "category": "success"
   },
   {
-    "id": "feedback.mp4_sent_8",
     "video_file": "feedback.mp4",
-    "start": 24.22,
-    "end": 26.463,
-    "text": "Your pronunciation is clear.",
-    "category": "success"
   },
   {
-    "id": "feedback.mp4_sent_9",
     "video_file": "feedback.mp4",
-    "start": 27.585,
-    "end": 28.586,
-    "text": "Great job!",
-    "category": "success"
   },
   {
-    "id": "feedback.mp4_sent_10",
     "video_file": "feedback.mp4",
-    "start": 28.989,
-    "end": 30.631,
-    "text": "Move to the next word",
-    "category": "success"
   },
   {
-    "id": "feedback.mp4_sent_11",
     "video_file": "feedback.mp4",
-    "start": 30.571,
-    "end": 33.514,
-    "text": "Focus on the vowel sound",
-    "category": "vowel"
   },
   {
-    "id": "feedback.mp4_sent_12",
     "video_file": "feedback.mp4",
-    "start": 33.454,
-    "end": 36.717,
-    "text": "Open your mouth a little more",
-    "category": "vowel"
   },
   {
-    "id": "feedback.mp4_sent_13",
     "video_file": "feedback.mp4",
-    "start": 37.998,
-    "end": 40.441,
-    "text": "Say the word slowly once",
-    "category": "vowel"
   },
   {
-    "id": "feedback.mp4_sent_14",
     "video_file": "feedback.mp4",
-    "start": 30.571,
-    "end": 44.105,
-    "text": "Focus on the first sound",
-    "category": "consonant"
   },
   {
-    "id": "feedback.mp4_sent_15",
     "video_file": "feedback.mp4",
-    "start": 44.045,
-    "end": 47.388,
-    "text": "Make the consonant clear",
-    "category": "consonant"
   },
   {
-    "id": "feedback.mp4_sent_16",
     "video_file": "feedback.mp4",
-    "start": 47.328,
-    "end": 50.812,
-    "text": "Repeat the word slowly",
-    "category": "consonant"
   },
   {
-    "id": "feedback.mp4_sent_17",
     "video_file": "feedback.mp4",
-    "start": 50.732,
-    "end": 54.075,
-    "text": "Do not stop early",
     "category": "ending"
   },
   {
-    "id": "feedback.mp4_sent_18",
     "video_file": "feedback.mp4",
-    "start": 37.998,
-    "end": 57.999,
-    "text": "Say the last sound clearly",
     "category": "ending"
   },
   {
-    "id": "feedback.mp4_sent_19",
     "video_file": "feedback.mp4",
-    "start": 59.145,
-    "end": 60.647,
-    "text": "Try the word again.",
     "category": "ending"
   },
   {
-    "id": "feedback.mp4_sent_20",
     "video_file": "feedback.mp4",
-    "start": 60.587,
-    "end": 64.873,
-    "text": "Say the strong part a little louder.",
-    "category": "stress"
   },
   {
-    "id": "feedback.mp4_sent_21",
     "video_file": "feedback.mp4",
-    "start": 64.813,
-    "end": 69.018,
-    "text": "Keep the rest of the word smooth.",
-    "category": "stress"
   },
   {
-    "id": "feedback.mp4_sent_22",
     "video_file": "feedback.mp4",
-    "start": 70.34,
-    "end": 72.863,
-    "text": "Try again with clear stress.",
-    "category": "stress"
   },
   {
-    "id": "feedback.mp4_sent_23",
     "video_file": "feedback.mp4",
-    "start": 74.365,
-    "end": 76.328,
-    "text": "Break the word into parts.",
-    "category": "syllable"
   },
   {
-    "id": "feedback.mp4_sent_24",
     "video_file": "feedback.mp4",
-    "start": 77.73,
-    "end": 80.013,
-    "text": "Say each part slowly.",
-    "category": "syllable"
   },
   {
-    "id": "feedback.mp4_sent_25",
     "video_file": "feedback.mp4",
-    "start": 81.154,
-    "end": 83.177,
-    "text": "Then say the full word.",
-    "category": "syllable"
   },
   {
-    "id": "feedback.mp4_sent_26",
     "video_file": "feedback.mp4",
-    "start": 84.525,
-    "end": 88.95,
-    "text": "Say only the target word without extra words.",
-    "category": "multipleword"
   },
   {
-    "id": "feedback.mp4_sent_27",
     "video_file": "feedback.mp4",
-    "start": 89.311,
-    "end": 92.474,
-    "text": "Use a big a sound at the start.",
-    "category": "apple"
   },
   {
-    "id": "feedback.mp4_sent_28",
     "video_file": "feedback.mp4",
-    "start": 93.275,
-    "end": 95.738,
-    "text": "Apple, not apple.",
-    "category": "apple"
   },
   {
-    "id": "feedback.mp4_sent_29",
     "video_file": "feedback.mp4",
-    "start": 97.079,
-    "end": 103.226,
-    "text": "Open your mouth more for a, like apple, not apple.",
-    "category": "apple"
   },
   {
-    "id": "feedback.mp4_sent_30",
     "video_file": "feedback.mp4",
-    "start": 104.548,
-    "end": 108.372,
-    "text": "Make a short o sound, not o.",
-    "category": "ball"
   },
   {
-    "id": "feedback.mp4_sent_31",
     "video_file": "feedback.mp4",
-    "start": 109.173,
-    "end": 110.775,
-    "text": "Say ball.",
-    "category": "ball"
   },
   {
-    "id": "feedback.mp4_sent_32",
     "video_file": "feedback.mp4",
-    "start": 110.695,
-    "end": 114.159,
-    "text": "Start with a strong b.",
-    "category": "ball"
   },
   {
-    "id": "feedback.mp4_sent_33",
     "video_file": "feedback.mp4",
-    "start": 114.326,
-    "end": 114.987,
-    "text": "sound.",
-    "category": "ball"
   },
   {
-    "id": "feedback.mp4_sent_34",
     "video_file": "feedback.mp4",
-    "start": 116.629,
-    "end": 118.572,
     "text": "I could not hear your voice.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_35",
     "video_file": "feedback.mp4",
-    "start": 119.233,
-    "end": 121.837,
     "text": "Please hold the record button and say the word.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_36",
     "video_file": "feedback.mp4",
-    "start": 121.777,
-    "end": 124.4,
     "text": "It was very quiet.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_37",
     "video_file": "feedback.mp4",
-    "start": 125.081,
-    "end": 127.425,
     "text": "Speak a little louder and try again.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_38",
     "video_file": "feedback.mp4",
-    "start": 127.345,
-    "end": 130.329,
     "text": "No sound was recorded.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_39",
     "video_file": "feedback.mp4",
-    "start": 130.95,
-    "end": 133.594,
     "text": "Check your microphone and say the word again.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_40",
     "video_file": "feedback.mp4",
-    "start": 133.514,
-    "end": 136.177,
     "text": "I think you whispered.",
     "category": "silence"
   },
   {
-    "id": "feedback.mp4_sent_41",
     "video_file": "feedback.mp4",
-    "start": 136.958,
-    "end": 139.522,
     "text": "Use your clear classroom voice.",
     "category": "silence"
   }
-]

 [
   {
+    "id": "feedback.mp4_sent_000",
     "video_file": "feedback.mp4",
+    "start": "000.000",
+    "end": "002.171",
+    "text": "Good job, keep it up.",
+    "category": "success"
   },
   {
+    "id": "feedback.mp4_sent_001",
     "video_file": "feedback.mp4",
+    "start": "002.897",
+    "end": "006.374",
+    "text": "Perfect pronunciation.",
+    "category": "success"
   },
   {
+    "id": "feedback.mp4_sent_002",
     "video_file": "feedback.mp4",
+    "start": "006.953",
+    "end": "011.010",
+    "text": "Great, your pronunciation is clear.",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_003",
+    "video_file": "feedback.mp4",
+    "start": "011.589",
+    "end": "014.200",
+    "text": "Move to the next word.",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_004",
+    "video_file": "feedback.mp4",
+    "start": "014.766",
+    "end": "017.060",
+    "text": "Let's move on.",
+    "category": "success"
   },
   {
+    "id": "feedback.mp4_sent_005",
     "video_file": "feedback.mp4",
+    "start": "017.098",
+    "end": "019.700",
+    "text": "Ready for the next word?",
+    "category": "success"
+  },
+  {
+    "id": "feedback.mp4_sent_006",
+    "video_file": "feedback.mp4",
+    "start": "019.992",
+    "end": "023.175",
+    "text": "You said a different word.",
     "category": "wrong_word"
   },
   {
+    "id": "feedback.mp4_sent_007",
     "video_file": "feedback.mp4",
+    "start": "023.178",
+    "end": "026.365",
+    "text": "Oops! That’s not the word.",
     "category": "wrong_word"
   },
   {
+    "id": "feedback.mp4_sent_008",
     "video_file": "feedback.mp4",
+    "start": "026.660",
+    "end": "029.240",
+    "text": "I heard a different word.",
     "category": "wrong_word"
   },
   {
+    "id": "feedback.mp4_sent_009",
     "video_file": "feedback.mp4",
+    "start": "030.134",
+    "end": "033.135",
+    "text": "Let’s focus on the correct one.",
+    "category": "wrong_word"
   },
   {
+    "id": "feedback.mp4_sent_010",
     "video_file": "feedback.mp4",
+    "start": "033.615",
+    "end": "037.085",
+    "text": "Let’s practice the word once more.",
+    "category": "wrong_word"
   },
   {
+    "id": "feedback.mp4_sent_011",
     "video_file": "feedback.mp4",
+    "start": "037.380",
+    "end": "040.270",
+    "text": "Can you say the correct word again?",
+    "category": "wrong_word"
   },
   {
+    "id": "feedback.mp4_sent_012",
     "video_file": "feedback.mp4",
+    "start": "041.412",
+    "end": "044.325",
+    "text": "I heard more than one word.",
+    "category": "multiple_words"
   },
   {
+    "id": "feedback.mp4_sent_013",
     "video_file": "feedback.mp4",
+    "start": "044.330",
+    "end": "047.220",
+    "text": "You said multiple words.",
+    "category": "multiple_words"
   },
   {
+    "id": "feedback.mp4_sent_014",
     "video_file": "feedback.mp4",
+    "start": "047.230",
+    "end": "050.700",
+    "text": "Let’s focus on just one.",
+    "category": "multiple_words"
   },
   {
+    "id": "feedback.mp4_sent_015",
     "video_file": "feedback.mp4",
+    "start": "050.993",
+    "end": "054.175",
+    "text": "Please say only one word.",
+    "category": "multiple_words"
   },
   {
+    "id": "feedback.mp4_sent_016",
     "video_file": "feedback.mp4",
+    "start": "055.091",
+    "end": "059.041",
+    "text": "Oops! I did not hear all the parts of the word.",
+    "category": "syllable"
   },
   {
+    "id": "feedback.mp4_sent_017",
     "video_file": "feedback.mp4",
+    "start": "059.665",
+    "end": "062.783",
+    "text": "It sounded like the word was missing a part.",
+    "category": "syllable"
   },
   {
+    "id": "feedback.mp4_sent_018",
     "video_file": "feedback.mp4",
+    "start": "063.615",
+    "end": "067.980",
+    "text": "No problem—try again, slowly and clearly.",
+    "category": "syllable"
   },
   {
+    "id": "feedback.mp4_sent_019",
     "video_file": "feedback.mp4",
+    "start": "068.812",
+    "end": "073.178",
+    "text": "Nice try—let’s say every part this time.",
+    "category": "syllable"
+  },
+  {
+    "id": "feedback.mp4_sent_020",
+    "video_file": "feedback.mp4",
+    "start": "074.010",
+    "end": "077.750",
+    "text": "Tap once for each part and say it again.",
+    "category": "syllable"
+  },
+  {
+    "id": "feedback.mp4_sent_021",
+    "video_file": "feedback.mp4",
+    "start": "078.583",
+    "end": "083.572",
+    "text": "Listen, then say it in parts, not too fast.",
+    "category": "syllable"
   },
   {
+    "id": "feedback.mp4_sent_022",
     "video_file": "feedback.mp4",
+    "start": "084.815",
+    "end": "088.355",
+    "text": "Oops! The ending sound was too soft.",
     "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_023",
     "video_file": "feedback.mp4",
+    "start": "088.769",
+    "end": "092.719",
+    "text": "It sounds like the last sound got skipped.",
     "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_024",
     "video_file": "feedback.mp4",
+    "start": "093.135",
+    "end": "096.877",
+    "text": "You missed the ending sound, try again.",
     "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_025",
     "video_file": "feedback.mp4",
+    "start": "097.915",
+    "end": "102.074",
+    "text": "The last sound wasn’t clear. Please say it again.",
+    "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_026",
     "video_file": "feedback.mp4",
+    "start": "102.698",
+    "end": "107.687",
+    "text": "No worries—try again and say the ending clearly.",
+    "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_027",
     "video_file": "feedback.mp4",
+    "start": "108.310",
+    "end": "113.716",
+    "text": "Say the word again and make the last sound loud and clear.",
+    "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_028",
     "video_file": "feedback.mp4",
+    "start": "114.130",
+    "end": "119.121",
+    "text": "You added an extra sound at the end. Let’s correct that.",
+    "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_029",
     "video_file": "feedback.mp4",
+    "start": "119.537",
+    "end": "124.942",
+    "text": "The ending sound was too long. Try saying it shorter.",
+    "category": "ending"
   },
   {
+    "id": "feedback.mp4_sent_030",
     "video_file": "feedback.mp4",
+    "start": 124.944,
+    "end": 127.645,
+    "text": "Check your vowel sound /æ/.",
+    "category": "vowel",
+    "phoneme": "æ"
+  },
+  {
+    "id": "feedback.mp4_sent_031",
+    "video_file": "feedback.mp4",
+    "start": 130.550,
+    "end": 133.674,
+    "text": "Check your vowel sound /e/.",
+    "category": "vowel",
+    "phoneme": "e"
+  },
+  {
+    "id": "feedback.mp4_sent_032",
+    "video_file": "feedback.mp4",
+    "start": 135.960,
+    "end": 139.287,
+    "text": "Check your vowel sound /ɪ/.",
+    "category": "vowel",
+    "phoneme": "ɪ"
   },
   {
+    "id": "feedback.mp4_sent_033",
     "video_file": "feedback.mp4",
+    "start": 141.366,
+    "end": 146.355,
+    "text": "Check your vowel sound /ɒ/ or /ɑ/.",
+    "category": "vowel",
+    "phoneme": "ɒ"
   },
   {
+    "id": "feedback.mp4_sent_034",
     "video_file": "feedback.mp4",
+    "start": 148.266,
+    "end": 151.344,
+    "text": "Check your vowel sound /ʌ/.",
+    "category": "vowel",
+    "phoneme": "ʌ"
   },
   {
+    "id": "feedback.mp4_sent_035",
     "video_file": "feedback.mp4",
+    "start": 151.345,
+    "end": 154.255,
+    "text": "Check your vowel sound /iː/.",
+    "category": "vowel",
+    "phoneme": "iː"
   },
   {
+    "id": "feedback.mp4_sent_036",
     "video_file": "feedback.mp4",
+    "start": 158.622,
+    "end": 161.947,
+    "text": "Check your vowel sound /uː/.",
+    "category": "vowel",
+    "phoneme": "uː"
   },
   {
+    "id": "feedback.mp4_sent_037",
     "video_file": "feedback.mp4",
+    "start": 166.936,
+    "end": 170.262,
+    "text": "Check your vowel sound /eɪ/.",
+    "category": "vowel",
+    "phoneme": "eɪ"
   },
   {
+    "id": "feedback.mp4_sent_038",
     "video_file": "feedback.mp4",
+    "start": 181.489,
+    "end": 184.607,
+    "text": "Check your vowel sound /ɜː/.",
+    "category": "vowel",
+    "phoneme": "ɜː"
   },
   {
+    "id": "feedback.mp4_sent_039",
     "video_file": "feedback.mp4",
+    "start": 187.725,
+    "end": 190.844,
+    "text": "Check your vowel sound /ɔː/.",
+    "category": "vowel",
+    "phoneme": "ɔː"
   },
   {
+    "id": "feedback.mp4_sent_040",
     "video_file": "feedback.mp4",
+    "start": 193.335,
+    "end": 196.460,
+    "text": "Check your vowel sound /aʊ/.",
+    "category": "vowel",
+    "phoneme": "aʊ"
   },
   {
+    "id": "feedback.mp4_sent_041",
     "video_file": "feedback.mp4",
+    "start": "197.285",
+    "end": "202.280",
+    "text": "Check your vowel sound, make sure it's pronounced clearly.",
+    "category": "vowel"
+  },
+  {
+    "id": "feedback.mp4_sent_042",
+    "video_file": "feedback.mp4",
+    "start": "204.359",
+    "end": "210.175",
+    "text": "Listen closely to the vowel sound, it needs to be clearer.",
+    "category": "vowel"
+  },
+  {
+    "id": "feedback.mp4_sent_043",
+    "video_file": "feedback.mp4",
+    "start": "212.465",
+    "end": "217.670",
+    "text": "Try saying the vowel sound a little longer and more clearly.",
+    "category": "vowel"
+  },
+  {
+    "id": "feedback.mp4_sent_044",
+    "video_file": "feedback.mp4",
+    "start": "242.608",
+    "end": "245.519",
     "text": "I could not hear your voice.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_045",
     "video_file": "feedback.mp4",
+    "start": "245.935",
+    "end": "250.092",
     "text": "Please hold the record button and say the word.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_046",
     "video_file": "feedback.mp4",
+    "start": "251.756",
+    "end": "254.250",
     "text": "It was very quiet.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_047",
     "video_file": "feedback.mp4",
+    "start": "254.455",
+    "end": "258.408",
     "text": "Speak a little louder and try again.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_048",
     "video_file": "feedback.mp4",
+    "start": "259.235",
+    "end": "262.155",
     "text": "No sound was recorded.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_049",
     "video_file": "feedback.mp4",
+    "start": "262.566",
+    "end": "266.724",
     "text": "Check your microphone and say the word again.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_050",
     "video_file": "feedback.mp4",
+    "start": "267.345",
+    "end": "270.050",
     "text": "I think you whispered.",
     "category": "silence"
   },
   {
+    "id": "feedback.mp4_sent_051",
     "video_file": "feedback.mp4",
+    "start": "270.260",
+    "end": "274.625",
     "text": "Use your clear classroom voice.",
     "category": "silence"
+  },
+  {
+    "id": "feedback.mp4_sent_052",
+    "video_file": "feedback.mp4",
+    "start": "219.533",
+    "end": "224.314",
+    "text": "Check your consonant sound, it should be sharper.",
+    "category": "consonant"
+  },
+  {
+    "id": "feedback.mp4_sent_053",
+    "video_file": "feedback.mp4",
+    "start": "226.394",
+    "end": "232.838",
+    "text": "Make sure to pronounce the consonant clearly, it’s important for clarity.",
+    "category": "consonant"
+  },
+  {
+    "id": "feedback.mp4_sent_054",
+    "video_file": "feedback.mp4",
+    "start": "234.196",
+    "end": "240.737",
+    "text": "Focus on the consonant sound, it needs to be more distinct.",
+    "category": "consonant"
+  },
+  {
+    "id": "feedback.mp4_sent_055",
+    "video_file": "feedback.mp4",
+    "start": 273.684,
+    "end": 277.460,
+    "text": "Check your consonant sound /b/.",
+    "category": "consonant",
+    "phoneme": "b"
+  },
+  {
+    "id": "feedback.mp4_sent_056",
+    "video_file": "feedback.mp4",
+    "start": 277.465,
+    "end": 281.820,
+    "text": "Check your consonant sound /ch/.",
+    "category": "consonant",
+    "phoneme": "tʃ"
+  },
+  {
+    "id": "feedback.mp4_sent_057",
+    "video_file": "feedback.mp4",
+    "start": 282.404,
+    "end": 286.760,
+    "text": "Check your consonant sound /d/.",
+    "category": "consonant",
+    "phoneme": "d"
+  },
+  {
+    "id": "feedback.mp4_sent_058",
+    "video_file": "feedback.mp4",
+    "start": 287.049,
+    "end": 291.407,
+    "text": "Check your consonant sound /dh/.",
+    "category": "consonant",
+    "phoneme": "ð"
+  },
+  {
+    "id": "feedback.mp4_sent_059",
+    "video_file": "feedback.mp4",
+    "start": 291.697,
+    "end": 296.055,
+    "text": "Check your consonant sound /f/.",
+    "category": "consonant",
+    "phoneme": "f"
+  },
+  {
+    "id": "feedback.mp4_sent_060",
+    "video_file": "feedback.mp4",
+    "start": 296.635,
+    "end": 300.704,
+    "text": "Check your consonant sound /g/.",
+    "category": "consonant",
+    "phoneme": "ɡ"
+  },
+  {
+    "id": "feedback.mp4_sent_061",
+    "video_file": "feedback.mp4",
+    "start": 301.285,
+    "end": 305.352,
+    "text": "Check your consonant sound /h/.",
+    "category": "consonant",
+    "phoneme": "h"
+  },
+  {
+    "id": "feedback.mp4_sent_062",
+    "video_file": "feedback.mp4",
+    "start": 305.643,
+    "end": 310.001,
+    "text": "Check your consonant sound /j/.",
+    "category": "consonant",
+    "phoneme": "j"
+  },
+  {
+    "id": "feedback.mp4_sent_063",
+    "video_file": "feedback.mp4",
+    "start": 310.290,
+    "end": 314.649,
+    "text": "Check your consonant sound /k/.",
+    "category": "consonant",
+    "phoneme": "k"
+  },
+  {
+    "id": "feedback.mp4_sent_064",
+    "video_file": "feedback.mp4",
+    "start": 314.935,
+    "end": 319.295,
+    "text": "Check your consonant sound /l/.",
+    "category": "consonant",
+    "phoneme": "l"
+  },
+  {
+    "id": "feedback.mp4_sent_065",
+    "video_file": "feedback.mp4",
+    "start": 319.300,
+    "end": 323.945,
+    "text": "Check your consonant sound /m/.",
+    "category": "consonant",
+    "phoneme": "m"
+  },
+  {
+    "id": "feedback.mp4_sent_066",
+    "video_file": "feedback.mp4",
+    "start": 323.948,
+    "end": 328.595,
+    "text": "Check your consonant sound /n/.",
+    "category": "consonant",
+    "phoneme": "n"
+  },
+  {
+    "id": "feedback.mp4_sent_067",
+    "video_file": "feedback.mp4",
+    "start": 328.597,
+    "end": 333.245,
+    "text": "Check your consonant sound /ng/.",
+    "category": "consonant",
+    "phoneme": "ŋ"
+  },
+  {
+    "id": "feedback.mp4_sent_068",
+    "video_file": "feedback.mp4",
+    "start": 333.820,
+    "end": 337.604,
+    "text": "Check your consonant sound /p/.",
+    "category": "consonant",
+    "phoneme": "p"
+  },
+  {
+    "id": "feedback.mp4_sent_069",
+    "video_file": "feedback.mp4",
+    "start": 338.180,
+    "end": 342.539,
+    "text": "Check your consonant sound /r/.",
+    "category": "consonant",
+    "phoneme": "r"
+  },
+  {
+    "id": "feedback.mp4_sent_070",
+    "video_file": "feedback.mp4",
+    "start": 342.541,
+    "end": 346.899,
+    "text": "Check your consonant sound /s/.",
+    "category": "consonant",
+    "phoneme": "s"
+  },
+  {
+    "id": "feedback.mp4_sent_071",
+    "video_file": "feedback.mp4",
+    "start": 346.902,
+    "end": 351.547,
+    "text": "Check your consonant sound /sh/.",
+    "category": "consonant",
+    "phoneme": "ʃ"
+  },
+  {
+    "id": "feedback.mp4_sent_072",
+    "video_file": "feedback.mp4",
+    "start": 351.830,
+    "end": 355.904,
+    "text": "Check your consonant sound /t/.",
+    "category": "consonant",
+    "phoneme": "t"
+  },
+  {
+    "id": "feedback.mp4_sent_073",
+    "video_file": "feedback.mp4",
+    "start": 356.196,
+    "end": 360.263,
+    "text": "Check your consonant sound /th/.",
+    "category": "consonant",
+    "phoneme": "θ"
+  },
+  {
+    "id": "feedback.mp4_sent_074",
+    "video_file": "feedback.mp4",
+    "start": 360.554,
+    "end": 364.912,
+    "text": "Check your consonant sound /v/.",
+    "category": "consonant",
+    "phoneme": "v"
+  },
+  {
+    "id": "feedback.mp4_sent_075",
+    "video_file": "feedback.mp4",
+    "start": 365.490,
+    "end": 369.560,
+    "text": "Check your consonant sound /w/.",
+    "category": "consonant",
+    "phoneme": "w"
+  },
+  {
+    "id": "feedback.mp4_sent_076",
+    "video_file": "feedback.mp4",
+    "start": 369.562,
+    "end": 373.919,
+    "text": "Check your consonant sound /y/.",
+    "category": "consonant",
+    "phoneme": "j"
+  },
+  {
+    "id": "feedback.mp4_sent_077",
+    "video_file": "feedback.mp4",
+    "start": 374.205,
+    "end": 378.570,
+    "text": "Check your consonant sound /z/.",
+    "category": "consonant",
+    "phoneme": "z"
+  },
+  {
+    "id": "feedback.mp4_sent_078",
+    "video_file": "feedback.mp4",
+    "start": 378.855,
+    "end": 382.635,
+    "text": "Check your consonant sound /zh/.",
+    "category": "consonant",
+    "phoneme": "ʒ"
   }
+]

verification.py CHANGED Viewed

@@ -498,6 +498,7 @@ from ragg.app import rag_bp
 from pron import pron_bp
 from pronvideo import pronvideo_bp
 from pronragg import pronragg_bp
 from ragg.ingest_trigger import ingest_trigger_bp
 app.register_blueprint(movie_bp, url_prefix="/media")
 app.register_blueprint(questions_bp, url_prefix="/media")
@@ -511,6 +512,7 @@ app.register_blueprint(ingest_trigger_bp, url_prefix="/rag")
 app.register_blueprint(pron_bp, url_prefix="/pron")
 app.register_blueprint(pronvideo_bp, url_prefix="/pronvideo")
 app.register_blueprint(pronragg_bp, url_prefix="/pronragg")
 # app.register_blueprint(questions_bp, url_prefix="/media")  # <-- add this
 # ------------------------------------------------------------------------------
 # Local run (Gunicorn will import `verification:app` on Spaces)

 from pron import pron_bp
 from pronvideo import pronvideo_bp
 from pronragg import pronragg_bp
+from pronragupgrade import pronragupgrade_bp
 from ragg.ingest_trigger import ingest_trigger_bp
 app.register_blueprint(movie_bp, url_prefix="/media")
 app.register_blueprint(questions_bp, url_prefix="/media")
 app.register_blueprint(pron_bp, url_prefix="/pron")
 app.register_blueprint(pronvideo_bp, url_prefix="/pronvideo")
 app.register_blueprint(pronragg_bp, url_prefix="/pronragg")
+app.register_blueprint(pronragupgrade_bp, url_prefix="/pronragupgrade")
 # app.register_blueprint(questions_bp, url_prefix="/media")  # <-- add this
 # ------------------------------------------------------------------------------
 # Local run (Gunicorn will import `verification:app` on Spaces)