""" Feedback Generator ================== Two-layer system: Layer 1 — Rule engine: maps specific feature errors to expert articulatory cues Layer 2 — LLM rewriter: takes rule outputs and rewrites them into natural, encouraging coach-like language via a lightweight local model (or cloud fallback). The rule templates are the ground truth; the LLM only adds warmth and fluency. """ from __future__ import annotations import os import json import textwrap from dataclasses import dataclass from typing import List, Dict, Optional, Tuple from mdd_engine import PhonemeError, MDDResult, FEATURE_NAMES # ────────────────────────────────────────────── # 1. Articulatory feedback rule bank # ────────────────────────────────────────────── # Each rule = {trigger_features, direction, tip, drill, self_check} # direction: "missing" | "extra" | "both" FEATURE_RULES: List[Dict] = [ # ── VOICING (Others group) ──────────────────────────────────────────── { "features": ["voiced"], "direction": "missing", "tip": ( "Your vocal cords are not vibrating when they should be. " "Place two fingers lightly on your throat (the Adam's apple area). " "Now say the sound — if you feel vibration, you've got it. " "Try humming first ('mmm'), then slide into the target sound." ), "drill": "Practice pairs: /f/ → /v/, /s/ → /z/, /p/ → /b/. " "Feel the buzz turn on for the second sound each time.", "self_check": "Put your hand on your throat. You should feel a gentle buzz.", }, { "features": ["voiced"], "direction": "extra", "tip": ( "You are voicing a sound that should be voiceless — your vocal cords " "are buzzing when they should be still. " "Whisper the sound first to train your cords to stay quiet, " "then gradually add breath pressure without the buzz." ), "drill": "Whisper-shout drill: whisper /p/, /t/, /k/, /f/, /s/ ten times.", "self_check": "Put your hand on your throat. It should feel still, no vibration.", }, # ── MANNER: STOP ───────────────────────────────────────────────────── { "features": ["stop"], "direction": "missing", "tip": ( "This sound needs a full closure in your mouth — air must be completely " "blocked and then released in a burst. " "Your tongue or lips are not making a tight enough seal, letting air trickle " "through instead of building up pressure." ), "drill": "Tap your fingers on the desk for each stop: /p/ – /t/ – /k/. " "Feel the 'pop' as pressure releases each time.", "self_check": "Before the release, you should feel air pressure building behind the closure.", }, { "features": ["stop"], "direction": "extra", "tip": ( "You are closing your airway completely when the sound should be continuous. " "Relax the articulators and keep a small opening so air can flow through " "without a burst." ), "drill": "Say /s/ and /f/ — feel the continuous uninterrupted airflow, no pop.", "self_check": "You should hear no 'pop' or sudden release — just steady air.", }, # ── MANNER: FRICATIVE ──────────────────────────────────────────────── { "features": ["fricative"], "direction": "missing", "tip": ( "This sound requires turbulent airflow — a hissing or buzzing quality. " "Narrow the passage between your tongue (or lips) and the articulators just enough " "that the air becomes turbulent. Too wide gives a vowel; full closure gives a stop." ), "drill": "Hold /s/, /f/, /sh/ for three full seconds each. Feel the continuous friction.", "self_check": "You should hear a clear hissing or buzzing sound throughout, not silence or a pop.", }, # ── MANNER: NASAL ───────────────────────────────────────────────────── { "features": ["nasal"], "direction": "missing", "tip": ( "This sound requires airflow through your nose. " "Pinch your nostrils closed — if the sound changes dramatically, " "you were accidentally blocking nasal airflow. " "Let air flow freely through your nose as you make the sound." ), "drill": "Alternate: hum 'mmm' (nasal), then 'bbb' (not nasal). Feel the difference.", "self_check": "Pinch your nose lightly — a nasal sound will feel 'stuffed up' when blocked.", }, { "features": ["nasal"], "direction": "extra", "tip": ( "Your sound has unwanted nasality — air is leaking through your nose. " "Practice lifting the soft palate by saying 'uh-oh' firmly, then keep that " "lifted feeling while producing the target sound." ), "drill": "Say 'back — bank', 'bad — band'. The first word of each pair is not nasal.", "self_check": "Hold a mirror under your nose — it should not fog up.", }, # ── MANNER: AFFRICATE ──────────────────────────────────────────────── { "features": ["affricate"], "direction": "missing", "tip": ( "An affricate starts with a complete closure (like a stop) then releases " "into a fricative — think of /ch/ in 'church' or /jh/ in 'judge'. " "You are either skipping the closure or the friction release. " "Make sure you feel both: a tight seal followed by a hissing release." ), "drill": "Say 'ch-ch-ch' rapidly, feeling the tap-and-hiss for each one.", "self_check": "You should feel a brief closure then turbulent airflow — two phases in one sound.", }, # ── MANNER: APPROXIMANT / LIQUID ───────────────────────────────────── { "features": ["approximant", "liquid"], "direction": "missing", "tip": ( "This sound (/l/, /r/, /w/, /y/) needs your articulators to approach each other " "closely without fully touching or creating friction. " "Relax the contact — you may be pressing too hard and creating a stop, " "or not shaping your mouth precisely enough." ), "drill": "Say 'la-la-la' for /l/ and 'ra-ra-ra' for /r/ slowly, keeping the tongue light.", "self_check": "There should be no pop and no hiss — just a smooth, resonant glide.", }, # ── MANNER: CONTINUANT ─────────────────────────────────────────────── { "features": ["continuant"], "direction": "missing", "tip": ( "This sound should have continuous, uninterrupted airflow — it is not a stop. " "Keep your airway open and let air flow through for the full duration of the sound." ), "drill": "Sustain /s/, /m/, /l/ or /v/ for three seconds without any interruption.", "self_check": "You should be able to hold the sound indefinitely without cutting off air.", }, # ── PLACE: BILABIAL ────────────────────────────────────────────────── { "features": ["bilabial"], "direction": "missing", "tip": ( "This sound needs both lips pressed firmly together (/p/, /b/, /m/). " "You may be making it with only one lip or further back in the mouth. " "Press your lips together completely before releasing." ), "drill": "Say 'pa-ba-ma' ten times, exaggerating full lip closure each time.", "self_check": "Watch yourself in a mirror — both lips should close completely.", }, # ── PLACE: LABIAL (labiodental /f/, /v/) ──────────────────────────── { "features": ["labial"], "direction": "missing", "tip": ( "This sound needs your lips to be active — either both lips together (bilabial: /p/, /b/, /m/) " "or upper teeth touching the lower lip (labiodental: /f/, /v/). " "You may be making the sound too far back with the tongue." ), "drill": "Exaggerate lip contact. Say 'pop', 'bob', 'mom', 'five', 'very' in front of a mirror.", "self_check": "Watch yourself in a mirror — you should see clear lip movement.", }, # ── PLACE: DENTAL ──────────────────────────────────────────────────── { "features": ["dental"], "direction": "missing", "tip": ( "This sound (/th/, /dh/) requires your tongue tip to be right at or between your teeth. " "Stick your tongue tip just between your upper and lower front teeth " "and let air flow over it." ), "drill": "Say 'think' and 'this' slowly, deliberately placing your tongue between your teeth each time.", "self_check": "You should feel your tongue tip touching the edges of your front teeth.", }, # ── PLACE: ALVEOLAR ────────────────────────────────────────────────── { "features": ["alveolar"], "direction": "missing", "tip": ( "Your tongue tip needs to touch the alveolar ridge — the hard bump just behind " "your upper front teeth. " "This is the target for /t/, /d/, /n/, /s/, /z/, /l/. " "You may be placing your tongue too far back or too far forward." ), "drill": "Touch the ridge behind your upper teeth with your tongue tip and feel it. " "Now tap /t/ ten times, always returning to that exact spot.", "self_check": "Is your tongue tip touching the hard ridge — not the teeth and not the palate?", }, # ── PLACE: PALATAL ──────────────────────────────────────────────────── { "features": ["palatal"], "direction": "missing", "tip": ( "This sound (/sh/, /zh/, /ch/, /jh/, /y/) is made with the tongue body raised " "toward the hard palate — the hard, bony roof just behind the alveolar ridge. " "Move your tongue further back from the teeth and arch it upward." ), "drill": "Say 'she', 'measure', 'church' — feel your tongue body rise toward the hard palate.", "self_check": "You should feel your tongue broadly touching or approaching the middle of the roof.", }, # ── PLACE: VELAR ────────────────────────────────────────────────────── { "features": ["velar"], "direction": "missing", "tip": ( "This sound (/k/, /g/, /ng/) is made at the back of your mouth, with the back of your tongue " "touching the soft palate (velum). " "Try gargling — that back-of-tongue raised position is exactly what you need." ), "drill": "Say 'king', 'ring', 'sing' — focus on the back-of-tongue closure each time.", "self_check": "You should feel the back of your tongue lift and meet the soft palate.", }, # ── PLACE: GLOTTAL ──────────────────────────────────────────────────── { "features": ["glottal"], "direction": "missing", "tip": ( "This sound (/hh/) is made deep in the throat at the vocal folds. " "Think of fogging up a mirror — breathe out gently with a completely open throat. " "No tongue or lip constriction should be involved." ), "drill": "Say 'hi', 'hat', 'hot' — the /h/ should feel like a breath, not a friction sound.", "self_check": "Place a hand on your throat — you should feel warmth from breath, not a hiss.", }, # ── PLACE: RETROFLEX ───────────────────────────────────────────────── { "features": ["retroflex"], "direction": "missing", "tip": ( "This sound (/r/ in English, /er/) requires your tongue tip to curl back toward " "the back of the alveolar ridge without touching anything, or to bunch up in the " "center of your mouth. " "Say 'uh' then slowly curl your tongue tip upward and backward." ), "drill": "Practice: 'uh' → curl tongue → 'er'. Hold 'er' for three seconds.", "self_check": "Your tongue tip should point upward or backward but NOT touch the roof.", }, # ── PLACE: CORONAL ─────────────────────────────────────────────────── { "features": ["coronal"], "direction": "missing", "tip": ( "Coronal sounds are made with the front part (blade or tip) of the tongue — " "this covers /t/, /d/, /s/, /z/, /n/, /l/, /sh/, /th/, and /r/. " "Make sure your tongue front is active and positioned correctly for this sound." ), "drill": "Say 'tip', 'dip', 'sip', 'nip' — feel the tongue tip or blade doing the work.", "self_check": "Is your tongue front — tip or blade — the part making contact?", }, # ── PLACE: DORSAL ──────────────────────────────────────────────────── { "features": ["dorsal"], "direction": "missing", "tip": ( "Dorsal sounds (/k/, /g/, /ng/, /w/, /y/) involve the back (body or root) of the tongue. " "Your tongue body needs to arch toward the velum or palate. " "You may be using your tongue tip when the back of the tongue should lead." ), "drill": "Say 'key', 'go', 'sing' — feel the back hump of your tongue rise each time.", "self_check": "The front of your tongue should be relaxed; the back should be doing the work.", }, # ── VOWEL HEIGHT ────────────────────────────────────────────────────── { "features": ["high"], "direction": "missing", "tip": ( "This vowel needs your tongue to be high in your mouth. " "Think of 'ee' in 'feet' or 'oo' in 'food' — the tongue is raised close to the palate. " "Raise your tongue toward the roof of your mouth as you say the vowel." ), "drill": "Slide from 'ah' (low, jaw open) → 'ee' (high, jaw nearly closed) and feel the tongue rise.", "self_check": "Your jaw should be mostly closed; the tongue should be near the roof.", }, { "features": ["mid"], "direction": "missing", "tip": ( "This vowel needs a mid-height tongue position — halfway between fully raised and fully lowered. " "Think of 'eh' in 'bed' or 'oh' in 'boat'. " "Relax your jaw to a half-open position." ), "drill": "Slide 'ee' (high) → 'eh' (mid) → 'ah' (low) and stop at the middle position.", "self_check": "Your jaw should be half open — neither clenched nor dropped wide.", }, { "features": ["low"], "direction": "missing", "tip": ( "This vowel needs your tongue to drop down and your jaw to open wide. " "Think of 'ah' in 'father' or 'ae' in 'cat' — the tongue is flat and low. " "Let your jaw drop and your tongue rest at the bottom of your mouth." ), "drill": "Say 'ah' like a doctor's exam — exaggerate the open jaw and flat tongue.", "self_check": "Your jaw should be open wide; your tongue should feel flat at the bottom.", }, # ── VOWEL BACKNESS ─────────────────────────────────────────────────── { "features": ["front"], "direction": "missing", "tip": ( "This vowel should be made with your tongue pushed toward the front of your mouth. " "Smile slightly — this naturally pulls the tongue body forward." ), "drill": "Say 'ee – ay – eh' and feel your tongue staying at the front for all three.", "self_check": "You should feel tension or contact toward the front of your mouth.", }, { "features": ["back"], "direction": "missing", "tip": ( "This vowel should be made with your tongue retracted toward the back of your mouth. " "Round your lips slightly and pull your tongue body backward as you say the vowel." ), "drill": "Say 'oo – oh – aw' — feel your tongue pulling back and the lips rounding each time.", "self_check": "You should feel the back of your tongue arch upward and backward.", }, { "features": ["central"], "direction": "missing", "tip": ( "This vowel (like the schwa /ə/ in 'about') should be made with a completely neutral, " "centered tongue — not pushed forward or pulled back. " "Relax all tension in your jaw, lips, and tongue." ), "drill": "Say 'uh' with a completely relaxed, drooping jaw and limp tongue.", "self_check": "Your mouth should feel effortless, tongue neither front nor back.", }, # ── LIP ROUNDING (Others group: 'round') ───────────────────────────── { "features": ["round"], "direction": "missing", "tip": ( "This sound requires rounded, protruded lips — like you are blowing out a candle. " "Form an 'oo' shape with your lips before and during the sound." ), "drill": "Exaggerate lip rounding: say 'oo – oh – aw' with very pursed lips.", "self_check": "Look in a mirror — your lips should form a clear circle or oval.", }, { "features": ["round"], "direction": "extra", "tip": ( "You are rounding your lips when they should be spread or neutral. " "Spread your lips into a slight smile and keep them flat as you say the sound." ), "drill": "Say 'ee – ih – eh' with a relaxed smile — no lip rounding at all.", "self_check": "Your lips should be flat or slightly spread, not puckered.", }, # ── VOWEL LENGTH (Others group: 'long' / 'short') ───────────────────── { "features": ["long"], "direction": "missing", "tip": ( "This vowel should be noticeably longer in duration. " "English long vowels (/iy/, /uw/, /aa/, /ao/, /ae/, /er/) are roughly twice " "as long as their short counterparts. Stretch it out." ), "drill": "Say 'beat' and hold the vowel: 'beeeeat'. Then compare with the short 'bit'.", "self_check": "Record yourself — the vowel should sound stretched, not clipped.", }, { "features": ["short"], "direction": "missing", "tip": ( "This vowel should be brief and clipped. " "Short vowels (/ih/, /eh/, /ah/, /uh/) are reduced in duration. " "Don't let the vowel linger — move quickly to the next sound." ), "drill": "Say 'bit', 'bet', 'but', 'book' — snap off each vowel quickly.", "self_check": "The vowel should feel brief. If you can hold it comfortably, it's too long.", }, # ── VOWEL TYPE (Others group: 'monophthong' / 'diphthong') ────────── { "features": ["monophthong"], "direction": "missing", "tip": ( "This vowel should be pure and steady — your tongue and lips should hold the same " "position throughout. You may be letting the vowel glide (diphthongize). " "Keep your tongue and jaw completely still from start to finish." ), "drill": "Hold /aa/, /iy/, or /uw/ for three seconds without any movement.", "self_check": "The vowel quality should be identical at the beginning and end — no glide.", }, { "features": ["diphthong"], "direction": "missing", "tip": ( "This vowel should glide from one position to another — it is a diphthong. " "English diphthongs like /ay/ ('bite'), /aw/ ('bout'), /oy/ ('boy'), " "/ey/ ('bait'), /ow/ ('boat') have a clear movement. " "Let your tongue and jaw glide smoothly to the second target." ), "drill": "Say 'buy – bow – boy – bay – boat' slowly and feel the glide in each vowel.", "self_check": "The vowel should sound like it is moving, not fixed in one place.", }, ] # Build a fast lookup: feature → list of applicable rules _RULE_INDEX: Dict[str, List[Dict]] = {} for rule in FEATURE_RULES: for feat in rule["features"]: _RULE_INDEX.setdefault(feat, []).append(rule) # ────────────────────────────────────────────── # 2. Rule matcher # ────────────────────────────────────────────── @dataclass class RuleFeedback: feature: str direction: str # "missing" | "extra" tip: str drill: str self_check: str count: int = 1 # how many phonemes triggered this rule def match_rules(errors: List[PhonemeError]) -> List[RuleFeedback]: """ Given a list of phoneme errors, find the most relevant feedback rules. Rules are deduplicated and sorted by frequency of occurrence. """ triggered: Dict[Tuple[str, str], RuleFeedback] = {} for error in errors: for feat in error.missing_features: for rule in _RULE_INDEX.get(feat, []): if rule["direction"] in ("missing", "both"): key = (feat, "missing") if key in triggered: triggered[key].count += 1 else: triggered[key] = RuleFeedback( feature=feat, direction="missing", tip=rule["tip"], drill=rule["drill"], self_check=rule["self_check"], ) for feat in error.extra_features: for rule in _RULE_INDEX.get(feat, []): if rule["direction"] in ("extra", "both"): key = (feat, "extra") if key in triggered: triggered[key].count += 1 else: triggered[key] = RuleFeedback( feature=feat, direction="extra", tip=rule["tip"], drill=rule["drill"], self_check=rule["self_check"], ) # Sort by occurrence count descending return sorted(triggered.values(), key=lambda r: -r.count) # ────────────────────────────────────────────── # 3. Template-based fallback feedback (no LLM needed) # ────────────────────────────────────────────── def format_feedback_template( result: MDDResult, rules: List[RuleFeedback], max_issues: int = 3, ) -> str: """Structured text feedback without LLM — always available.""" lines = [] score = result.utterance_score # Score header if score >= 85: lines.append(f"🎉 Great pronunciation! Score: {score:.0f}/100") elif score >= 65: lines.append(f"👍 Good effort! Score: {score:.0f}/100 — a few things to polish.") elif score >= 45: lines.append(f"📚 Score: {score:.0f}/100 — let's work on some key areas.") else: lines.append(f"💪 Score: {score:.0f}/100 — keep practicing, you'll get there!") if not rules: lines.append("\nNo significant feature errors detected. Well done!") return "\n".join(lines) lines.append(f"\nI found {len(result.errors)} phoneme(s) that need attention.\n") for i, rule in enumerate(rules[:max_issues]): direction_word = "missing" if rule.direction == "missing" else "extra" lines.append(f"— Issue {i+1}: [{rule.feature}] feature {direction_word}") lines.append(f" 💡 {rule.tip}") lines.append(f" 🏋️ Drill: {rule.drill}") lines.append(f" ✅ Self-check: {rule.self_check}\n") return "\n".join(lines) # ────────────────────────────────────────────── # 4. LLM-enhanced feedback # ────────────────────────────────────────────── LLM_SYSTEM_PROMPT = """You are a warm, encouraging English pronunciation coach. Your student just attempted to say a sentence and you've identified specific phonological feature errors. Your task is to rewrite the structured feedback into a single natural, conversational coaching response. Rules: - Keep ALL the articulatory tips and self-checks intact — do not omit or soften them. - Write as if speaking to the student directly. - Be encouraging but honest. - Limit response to 200 words maximum. - Do not add new advice not present in the structured feedback. - Start with a brief overall assessment, then naturally weave in the tips. - End with one motivating sentence. """ def generate_llm_feedback( structured_feedback: str, score: float, model_name: str = "Qwen/Qwen2.5-0.5B-Instruct", # lightweight default use_cloud_fallback: bool = True, ) -> str: """ Rewrites structured feedback into natural coaching language. Tries (in order): 1. Local transformers model (if available) 2. Cloud LLM API (if use_cloud_fallback=True and API key set) 3. Returns structured_feedback unchanged as graceful degradation """ prompt = f"""Here is structured pronunciation feedback for a student who scored {score:.0f}/100: {structured_feedback} Please rewrite this as a warm, natural coaching response.""" # --- Try local model first --- try: from transformers import AutoTokenizer, AutoModelForCausalLM import torch tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", ) messages = [ {"role": "system", "content": LLM_SYSTEM_PROMPT}, {"role": "user", "content": prompt}, ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer([text], return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=256, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode( output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True ) return response.strip() except Exception as local_err: print(f"[Local LLM] Not available: {local_err}") # --- Cloud fallback (OpenAI-compatible API) --- if use_cloud_fallback: api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY") api_base = os.environ.get("LLM_API_BASE", "https://api.openai.com/v1") cloud_model = os.environ.get("LLM_MODEL", "gpt-4o-mini") if api_key: try: import httpx headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } body = { "model": cloud_model, "messages": [ {"role": "system", "content": LLM_SYSTEM_PROMPT}, {"role": "user", "content": prompt}, ], "max_tokens": 300, "temperature": 0.7, } r = httpx.post(f"{api_base}/chat/completions", json=body, headers=headers, timeout=15) r.raise_for_status() return r.json()["choices"][0]["message"]["content"].strip() except Exception as cloud_err: print(f"[Cloud LLM] Failed: {cloud_err}") # --- Graceful degradation --- return structured_feedback # ────────────────────────────────────────────── # 5. Main feedback pipeline # ────────────────────────────────────────────── def generate_feedback( result: MDDResult, use_llm: bool = True, max_issues: int = 3, ) -> Dict: """ Full feedback pipeline. Returns a dict with keys: score, template_feedback, final_feedback, error_summary, rules_triggered """ rules = match_rules(result.errors) template_fb = format_feedback_template(result, rules, max_issues) if use_llm and rules: final_fb = generate_llm_feedback(template_fb, result.utterance_score) else: final_fb = template_fb error_summary = [ { "position": e.position, "target": e.target_phoneme, "is_deletion": e.is_deletion, "missing_features": e.missing_features, "extra_features": e.extra_features, "accuracy": round(e.feature_accuracy, 3), "severity": e.severity, } for e in result.errors ] return { "score": round(result.utterance_score, 1), "template_feedback": template_fb, "final_feedback": final_fb, "error_summary": error_summary, "feature_error_counts": result.feature_error_counts, "rules_triggered": [ { "feature": r.feature, "direction": r.direction, "occurrences": r.count, } for r in rules ], }