enver commited on Feb 6

Commit

21f2aa3

verified ·

1 Parent(s): 8440967

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

.gitignore +24 -0
.pytest_cache/.gitignore +2 -0
.pytest_cache/CACHEDIR.TAG +4 -0
.pytest_cache/README.md +8 -0
.pytest_cache/v/cache/lastfailed +3 -0
.pytest_cache/v/cache/nodeids +37 -0
README.md +138 -0
align_graphemes.py +202 -0
batch_align_all.py +226 -0
ctc_align_90.py +127 -0
ctc_align_90_physics.py +259 -0
ctc_align_91.py +259 -0
physics_analyzer.py +281 -0
physics_analyzer_v2.py +370 -0
physics_analyzer_v3.py +542 -0
requirements.txt +21 -0
src/__init__.py +35 -0
src/alignment_engine.py +407 -0
src/duration_model.py +311 -0
src/lisan_phonemes.json +438 -0
src/mfa_refiner.py +419 -0
src/physics_validator.py +930 -0
src/pipeline.py +334 -0
src/tajweed_parser.py +334 -0
surah_90_test.py +241 -0
surah_91_full_pipeline.py +213 -0
surah_91_test.py +297 -0
tests/test_alignment_engine.py +224 -0
tests/test_physics_validator.py +303 -0
tests/test_pipeline.py +118 -0
whisperx_align_90.py +140 -0
whisperx_surah90.py +118 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+env/
+.venv/
+ENV/
+# IDE
+.vscode/
+.idea/
+*.swp
+# Output
+output/
+*.json
+!src/*.json
+# OS
+.DS_Store
+Thumbs.db

.pytest_cache/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Created by pytest automatically.
2	+ *

.pytest_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1,4 @@

+Signature: 8a477f597d28d172789f06886806bc55
+# This file is a cache directory tag created by pytest.
+# For information about cache directory tags, see:
+#	https://bford.info/cachedir/spec.html

.pytest_cache/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# pytest cache directory #
+This directory contains data from the pytest's cache plugin,
+which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
+**Do not** commit this to version control.
+See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.

.pytest_cache/v/cache/lastfailed ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_short_segment_skipped": true
+}

.pytest_cache/v/cache/nodeids ADDED Viewed

	@@ -0,0 +1,37 @@

+[
+  "tests/test_alignment_engine.py::TestArabicPhonemes::test_arabic_phonetic_transcription",
+  "tests/test_alignment_engine.py::TestDataclasses::test_alignment_result",
+  "tests/test_alignment_engine.py::TestDataclasses::test_phoneme_alignment",
+  "tests/test_alignment_engine.py::TestDataclasses::test_phoneme_normalized_duration",
+  "tests/test_alignment_engine.py::TestDataclasses::test_word_alignment",
+  "tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_phoneme_generation",
+  "tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_returns_result",
+  "tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_timing_monotonic",
+  "tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_word_count",
+  "tests/test_alignment_engine.py::TestPhonemeNormalization::test_phonemes_cover_word_duration",
+  "tests/test_alignment_engine.py::TestPhonemeNormalization::test_phonemes_fit_word_boundary",
+  "tests/test_alignment_engine.py::TestTimingMonotonicity::test_phoneme_timing_monotonic",
+  "tests/test_alignment_engine.py::TestTimingMonotonicity::test_word_timing_monotonic",
+  "tests/test_physics_validator.py::TestGhunnahValidation::test_ghunnah_returns_physics_result",
+  "tests/test_physics_validator.py::TestIdghamValidation::test_idgham_returns_physics_result",
+  "tests/test_physics_validator.py::TestIkhfaValidation::test_ikhfa_returns_physics_result",
+  "tests/test_physics_validator.py::TestIzharValidation::test_izhar_returns_physics_result",
+  "tests/test_physics_validator.py::TestMaddValidation::test_madd_asli_duration",
+  "tests/test_physics_validator.py::TestMaddValidation::test_madd_returns_physics_result",
+  "tests/test_physics_validator.py::TestPhysicsValidatorInit::test_custom_sample_rate",
+  "tests/test_physics_validator.py::TestPhysicsValidatorInit::test_default_init",
+  "tests/test_physics_validator.py::TestPhysicsValidatorInit::test_thresholds_exist",
+  "tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_detects_dip_spike",
+  "tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_returns_physics_result",
+  "tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_short_segment_handles_gracefully",
+  "tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_short_segment_skipped",
+  "tests/test_physics_validator.py::TestTafkheemValidation::test_tafkheem_returns_physics_result",
+  "tests/test_physics_validator.py::TestValidationResults::test_madd_result_fields",
+  "tests/test_physics_validator.py::TestValidationResults::test_physics_result_fields",
+  "tests/test_physics_validator.py::TestValidationResults::test_qalqalah_result_fields",
+  "tests/test_pipeline.py::TestFullPipeline::test_grapheme_count_matches",
+  "tests/test_pipeline.py::TestFullPipeline::test_surah_91_ayah_1",
+  "tests/test_pipeline.py::TestTimingRegression::test_no_negative_durations",
+  "tests/test_pipeline.py::TestTimingRegression::test_no_overlapping_phonemes",
+  "tests/test_pipeline.py::TestTimingRegression::test_no_zero_duration_phonemes"
+]

README.md ADDED Viewed

	@@ -0,0 +1,138 @@

+# TajweedSST — Quranic Letter-Level Alignment & Tajweed Physics Engine
+> CTC Forced Alignment + Acoustic Physics Validation for Quranic Recitation
+## Overview
+TajweedSST is a Python pipeline that produces **letter-level timing data** for Quranic recitation audio. It combines **wav2vec2 CTC forced alignment** with **acoustic physics validation** (Tajweed rules) to generate timing files consumed by [MahQuranApp](https://github.com/ihyatafsir/MahQuranApp) for real-time letter highlighting.
+## Pipeline Architecture
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    TajweedSST Pipeline                       │
+│                                                             │
+│  1. CTC Forced Alignment (wav2vec2)                         │
+│     └─ Word-level timestamps from audio                     │
+│                                                             │
+│  2. Character Expansion                                     │
+│     └─ Word timestamps → individual character timing        │
+│                                                             │
+│  3. Grapheme Matching                                       │
+│     └─ Merge base + diacritics to match App.tsx rendering   │
+│                                                             │
+│  4. Tajweed Parsing                                         │
+│     └─ Map letters to Tajweed rules (Qalqalah, Ghunnah..)  │
+│                                                             │
+│  5. Physics Validation                                      │
+│     └─ RMS bounce, duration, formant analysis               │
+│                                                             │
+│  6. Export to MahQuranApp format                             │
+│     └─ JSON with idx, char, ayah, start(ms), end, wordIdx   │
+└─────────────────────────────────────────────────────────────┘
+```
+## Quick Start
+### Prerequisites
+```bash
+cd /path/to/tajweedsst
+python3 -m venv venv
+source venv/bin/activate
+pip install torch torchaudio ctc-forced-aligner librosa
+```
+### Single Surah
+```bash
+# Align Surah 90 (Al-Balad) for Abdul Basit
+python ctc_align_91.py  # Template script
+```
+### Batch All Surahs
+```bash
+# Process all 114 surahs for Abdul Basit
+python batch_align_all.py
+```
+## Output Format
+Each `letter_timing_XX.json` contains an array of timing entries:
+```json
+{
+  "idx": 0,
+  "char": "لَ",
+  "ayah": 1,
+  "start": 3360,
+  "end": 3410,
+  "duration": 50,
+  "wordIdx": 0,
+  "weight": 1.0
+}
+```
+### Fields
+| Field | Type | Description |
+|-------|------|-------------|
+| `idx` | int | Sequential letter index |
+| `char` | string | Arabic grapheme (base + diacritics) |
+| `ayah` | int | Verse number (1-indexed) |
+| `start` | int | Start time in milliseconds |
+| `end` | int | End time in milliseconds |
+| `duration` | int | Duration in milliseconds |
+| `wordIdx` | int | Word index within the surah |
+| `weight` | float | Confidence weight |
+## Critical: Grapheme Matching
+The timing data **must** match the grapheme count produced by MahQuranApp's `splitIntoGraphemes()` function. This function combines base Arabic letters with their following diacritics:
+**App.tsx Diacritics Set:**
+```
+ً ٌ ٍ َ ُ ِ ّ ْ ٰ ۖ ۗ ۘ ۙ ۚ ۛ ۜ ٔ ٓ ـ
+```
+Plus Unicode ranges: `0x064B–0x0652` and `0x0610–0x061A`
+**Example:** The word `لَآ` splits into 2 graphemes: `['لَ', 'آ']`
+If the timing count doesn't match the grapheme count, highlighting will drift!
+## Physics Validation
+TajweedSST validates timing against acoustic physics:
+| Rule | Check | Method |
+|------|-------|--------|
+| Qalqalah | RMS dip + spike | Envelope analysis |
+| Ghunnah | Nasal duration | Duration measurement |
+| Madd | Extended vowel | Duration ratio |
+| Tafkheem | Heavy articulation | Formant F2 analysis |
+## Project Structure
+```
+tajweedsst/
+├── src/
+│   ├── tajweed_parser.py     # Tajweed rule detection
+│   ├── physics_validator.py  # Acoustic validation
+│   └── duration_model.py     # Duration calibration
+├── tests/                    # 34 unit/integration tests
+├── ctc_align_90.py          # Single surah alignment
+├── ctc_align_91.py          # Template with physics
+├── batch_align_all.py       # Batch all surahs
+└── README.md
+```
+## Reciter Support
+Currently supported:
+- **Abdul Basit** (114 surahs)
+## License
+MIT

align_graphemes.py ADDED Viewed

	@@ -0,0 +1,202 @@

+#!/usr/bin/env python3
+"""
+Grapheme-Aligned Timing Generator for Surah 91
+This script:
+1. Reads verse text from verses_v4.json and extracts graphemes (exactly as MahQuranApp does)
+2. Reads the original timing and maps it to the grapheme count
+3. Outputs timing with exactly the right number of entries
+The key is: timing entries must match the grapheme count from verse.words[].arabic
+"""
+import json
+from pathlib import Path
+# Config
+SURAH = 91
+PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
+TIMING_PATH = PROJECT_ROOT / "public/data/abdul_basit_original/letter_timing_91.json"
+OUTPUT_PATH = PROJECT_ROOT / "public/data/abdul_basit/letter_timing_91_aligned.json"
+# Arabic diacritics (same as MahQuranApp App.tsx)
+DIACRITICS = set('ًٌٍَُِّْٰۖۗۘۙۚۛۜٔٓـ')
+def split_graphemes(text: str) -> list[str]:
+    """Split Arabic text into graphemes (base letter + following diacritics)
+    This matches the splitIntoGraphemes function in MahQuranApp"""
+    graphemes = []
+    current = ''
+    for ch in text:
+        is_diacritic = (ch in DIACRITICS or
+                        (0x064B <= ord(ch) <= 0x0652) or
+                        (0x0610 <= ord(ch) <= 0x061A))
+        if ch == ' ':
+            if current:
+                graphemes.append(current)
+                current = ''
+        elif is_diacritic and current:
+            current += ch
+        else:
+            if current:
+                graphemes.append(current)
+            current = ch
+    if current:
+        graphemes.append(current)
+    return graphemes
+def get_all_graphemes(surah_num: int) -> list[dict]:
+    """Extract all graphemes from verse text, exactly as MahQuranApp renders them"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        verses = json.load(f).get(str(surah_num), [])
+    all_graphemes = []
+    word_idx = 0
+    for verse in verses:
+        ayah = verse.get('ayah', 0)
+        words = verse.get('words', [])
+        for word in words:
+            arabic = word.get('arabic', '')
+            graphemes = split_graphemes(arabic)
+            for g in graphemes:
+                all_graphemes.append({
+                    'char': g,
+                    'ayah': ayah,
+                    'wordIdx': word_idx
+                })
+            word_idx += 1
+    return all_graphemes
+def strip_diacritics(text: str) -> str:
+    """Remove diacritics from Arabic text"""
+    return ''.join(ch for ch in text if ch not in DIACRITICS and not (0x064B <= ord(ch) <= 0x0652))
+def is_standalone_diacritic(char: str) -> bool:
+    """Check if char is a standalone diacritic"""
+    if len(char) != 1:
+        return False
+    return char in DIACRITICS or (0x064B <= ord(char) <= 0x0652)
+def distribute_timing(graphemes: list[dict], original_timing: list[dict]) -> list[dict]:
+    """Map original timing to graphemes by matching base letters, skipping diacritics"""
+    if not original_timing:
+        return []
+    # First, filter out standalone diacritics from original timing
+    # and merge their duration into the previous letter
+    filtered_timing = []
+    for entry in original_timing:
+        char = entry['char']
+        if is_standalone_diacritic(char):
+            # Merge duration into previous entry
+            if filtered_timing:
+                filtered_timing[-1]['end'] = entry['end']
+                filtered_timing[-1]['duration'] = filtered_timing[-1]['end'] - filtered_timing[-1]['start']
+        else:
+            filtered_timing.append(dict(entry))  # Copy
+    print(f"    (Filtered timing: {len(filtered_timing)} base letters)")
+    aligned_timing = []
+    orig_idx = 0
+    for i, g in enumerate(graphemes):
+        grapheme_char = g['char']
+        base_letter = strip_diacritics(grapheme_char)
+        # Try to find matching original timing entry by base letter
+        matched = None
+        search_start = max(0, orig_idx - 2)
+        search_end = min(len(filtered_timing), orig_idx + 10)  # Search wider
+        for j in range(search_start, search_end):
+            orig_char = filtered_timing[j]['char']
+            orig_base = strip_diacritics(orig_char)
+            if orig_base == base_letter or orig_char in grapheme_char or base_letter in orig_char:
+                matched = filtered_timing[j]
+                orig_idx = j + 1
+                break
+        if not matched and orig_idx < len(filtered_timing):
+            # Fallback: use next available timing
+            matched = filtered_timing[orig_idx]
+            orig_idx += 1
+        if matched:
+            aligned_timing.append({
+                'idx': i,
+                'char': grapheme_char,
+                'ayah': g['ayah'],
+                'start': matched['start'],
+                'end': matched['end'],
+                'duration': matched.get('duration', matched['end'] - matched['start']),
+                'wordIdx': g['wordIdx'],
+                'weight': matched.get('weight', 1.0)
+            })
+        else:
+            # Last resort: estimate from previous
+            if aligned_timing:
+                prev = aligned_timing[-1]
+                aligned_timing.append({
+                    'idx': i,
+                    'char': grapheme_char,
+                    'ayah': g['ayah'],
+                    'start': prev['end'],
+                    'end': prev['end'] + 100,
+                    'duration': 100,
+                    'wordIdx': g['wordIdx'],
+                    'weight': 1.0
+                })
+    return aligned_timing
+def main():
+    print("=" * 60)
+    print(f"Grapheme-Aligned Timing Generator: Surah {SURAH}")
+    print("=" * 60)
+    # Get graphemes from verse text
+    graphemes = get_all_graphemes(SURAH)
+    print(f"\n[1] Graphemes from verse text: {len(graphemes)}")
+    # Load original timing
+    with open(TIMING_PATH, 'r', encoding='utf-8') as f:
+        original_timing = json.load(f)
+    print(f"[2] Original timing entries: {len(original_timing)}")
+    # Distribute timing to graphemes
+    aligned_timing = distribute_timing(graphemes, original_timing)
+    print(f"[3] Aligned timing entries: {len(aligned_timing)}")
+    # Save
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(aligned_timing, f, ensure_ascii=False, indent=2)
+    print(f"\n[4] Saved: {OUTPUT_PATH}")
+    # Show sample
+    print("\n=== First 10 graphemes ===")
+    for t in aligned_timing[:10]:
+        print(f"  {t['idx']:3d}: '{t['char']}' @ {t['start']}-{t['end']}ms (ayah={t['ayah']})")
+    print("\n" + "=" * 60)
+    print("✓ Done! Copy to letter_timing_91.json to test")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

batch_align_all.py ADDED Viewed

	@@ -0,0 +1,226 @@

+#!/usr/bin/env python3
+"""
+Batch CTC Alignment for All Abdul Basit Surahs
+Processes all 114 surahs with the full pipeline:
+1. CTC forced alignment (wav2vec2)
+2. Grapheme matching (App.tsx compatible)
+3. Export to MahQuranApp format
+Usage:
+    cd /Documents/26apps/tajweedsst
+    source venv/bin/activate
+    python batch_align_all.py
+"""
+import json
+import sys
+import time
+import torch
+from pathlib import Path
+from ctc_forced_aligner import (
+    load_audio,
+    load_alignment_model,
+    generate_emissions,
+    preprocess_text,
+    get_alignments,
+    get_spans,
+    postprocess_results,
+)
+# Config
+PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
+OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
+AUDIO_DIR = PROJECT_ROOT / "public/audio/abdul_basit"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+BATCH_SIZE = 4
+# Exact same DIACRITICS as App.tsx line 176
+DIACRITICS = set(['ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٰ', 'ۖ', 'ۗ', 'ۘ', 'ۙ', 'ۚ', 'ۛ', 'ۜ', 'ٔ', 'ٓ', 'ـ'])
+def is_diacritic(ch):
+    """Match App.tsx splitIntoGraphemes exactly"""
+    return ch in DIACRITICS or (0x064B <= ord(ch) <= 0x0652) or (0x0610 <= ord(ch) <= 0x061A)
+def split_into_graphemes(text):
+    """Exact same logic as App.tsx splitIntoGraphemes"""
+    graphemes = []
+    current = ''
+    for ch in text:
+        if ch == ' ':
+            if current:
+                graphemes.append(current)
+                current = ''
+        elif is_diacritic(ch) and current:
+            current += ch
+        else:
+            if current:
+                graphemes.append(current)
+            current = ch
+    if current:
+        graphemes.append(current)
+    return graphemes
+def load_quran_text(all_verses, surah_num):
+    """Load Quran text for a surah"""
+    verses = all_verses.get(str(surah_num), [])
+    return ' '.join(v.get('text', '') for v in verses)
+def get_grapheme_list(all_verses, surah_num):
+    """Get graphemes with ayah info matching App.tsx rendering"""
+    verses = all_verses.get(str(surah_num), [])
+    grapheme_list = []
+    for v in verses:
+        for word in v['text'].split():
+            for g in split_into_graphemes(word):
+                grapheme_list.append({'char': g, 'ayah': v['ayah']})
+    return grapheme_list
+def process_surah(surah_num, alignment_model, alignment_tokenizer, all_verses):
+    """Process a single surah through the full pipeline"""
+    audio_path = AUDIO_DIR / f"surah_{surah_num:03d}.mp3"
+    output_path = OUTPUT_DIR / f"letter_timing_{surah_num}.json"
+    if not audio_path.exists():
+        return None, "No audio file"
+    text = load_quran_text(all_verses, surah_num)
+    if not text.strip():
+        return None, "No verse text"
+    grapheme_list = get_grapheme_list(all_verses, surah_num)
+    try:
+        # Step 1: Load audio
+        audio_waveform = load_audio(str(audio_path), alignment_model.dtype, alignment_model.device)
+        # Step 2: Generate CTC emissions
+        emissions, stride = generate_emissions(
+            alignment_model, audio_waveform, batch_size=BATCH_SIZE
+        )
+        # Step 3: Preprocess text
+        tokens_starred, text_starred = preprocess_text(
+            text, romanize=True, language="ara",
+        )
+        # Step 4: Get alignments
+        segments, scores, blank_token = get_alignments(
+            emissions, tokens_starred, alignment_tokenizer,
+        )
+        # Step 5: Get spans & post-process
+        spans = get_spans(tokens_starred, segments, blank_token)
+        word_timestamps = postprocess_results(text_starred, spans, stride, scores)
+        # Step 6: Expand to character-level
+        char_timings = []
+        for wt in word_timestamps:
+            word = wt['text']
+            start = wt['start']
+            end = wt['end']
+            duration = end - start
+            char_dur = duration / len(word) if word else 0
+            for i, char in enumerate(word):
+                if not char.isspace():
+                    char_timings.append({
+                        'start': start + i * char_dur,
+                        'end': start + (i + 1) * char_dur,
+                    })
+        # Step 7: Map CTC chars to graphemes
+        timing = []
+        ci = 0
+        for gi, ginfo in enumerate(grapheme_list):
+            g = ginfo['char']
+            s, e = None, None
+            for _ in range(len(g)):
+                if ci < len(char_timings):
+                    if s is None:
+                        s = int(char_timings[ci]['start'] * 1000)
+                    e = int(char_timings[ci]['end'] * 1000)
+                    ci += 1
+            if s is None:
+                s = timing[-1]['end'] if timing else 0
+                e = s + 100
+            timing.append({
+                'idx': gi,
+                'char': g,
+                'ayah': ginfo['ayah'],
+                'start': s,
+                'end': e,
+                'duration': e - s,
+                'wordIdx': gi // 4,
+                'weight': 1.0
+            })
+        # Save
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(timing, f, ensure_ascii=False, indent=2)
+        return len(timing), f"OK ({len(grapheme_list)} graphemes)"
+    except Exception as ex:
+        return None, f"Error: {ex}"
+def main():
+    start_time = time.time()
+    print("=" * 60)
+    print("Batch CTC Alignment - Abdul Basit (All 114 Surahs)")
+    print(f"Device: {DEVICE}")
+    print("=" * 60)
+    # Load model once
+    print("\n[1] Loading wav2vec alignment model...")
+    alignment_model, alignment_tokenizer = load_alignment_model(
+        DEVICE,
+        dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+    )
+    print("    Model loaded.")
+    # Load all verses
+    print("[2] Loading verses...")
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        all_verses = json.load(f)
+    print(f"    Loaded {len(all_verses)} surahs")
+    # Process each surah
+    results = []
+    for surah_num in range(1, 115):
+        elapsed = time.time() - start_time
+        print(f"\n[Surah {surah_num:03d}/114] ({elapsed:.0f}s elapsed)...")
+        count, status = process_surah(
+            surah_num, alignment_model, alignment_tokenizer, all_verses
+        )
+        results.append((surah_num, count, status))
+        if count:
+            print(f"    ✓ {count} letters - {status}")
+        else:
+            print(f"    ✗ {status}")
+    # Summary
+    elapsed = time.time() - start_time
+    ok = sum(1 for _, c, _ in results if c)
+    fail = sum(1 for _, c, _ in results if not c)
+    print("\n" + "=" * 60)
+    print(f"BATCH COMPLETE in {elapsed:.0f}s ({elapsed/60:.1f}min)")
+    print(f"  ✓ Success: {ok}/114")
+    print(f"  ✗ Failed:  {fail}/114")
+    print("=" * 60)
+    # Cleanup
+    del alignment_model
+    torch.cuda.empty_cache()
+if __name__ == "__main__":
+    main()

ctc_align_90.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#!/usr/bin/env python3
+"""
+CTC Forced Aligner for Surah 90 (Al-Balad)
+Uses ctc-forced-aligner v0.3.0 from GitHub for word-level alignment.
+Based on MahQuranApp/scripts/ctc_quran_aligner.py
+"""
+import json
+import torch
+from pathlib import Path
+from ctc_forced_aligner import (
+    load_audio,
+    load_alignment_model,
+    generate_emissions,
+    preprocess_text,
+    get_alignments,
+    get_spans,
+    postprocess_results,
+)
+# Config
+SURAH = 90
+PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
+OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
+AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_090.mp3"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+BATCH_SIZE = 4
+def load_quran_text(surah_num: int) -> str:
+    """Load Quran text from verses_v4.json"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        all_verses = json.load(f)
+    verses = all_verses.get(str(surah_num), [])
+    return ' '.join(v.get('text', '') for v in verses)
+def main():
+    print("=" * 60)
+    print(f"CTC Forced Aligner for Surah {SURAH} (Al-Balad)")
+    print(f"Device: {DEVICE}")
+    print("=" * 60)
+    # 1. Load alignment model
+    print("\n[1] Loading alignment model...")
+    alignment_model, alignment_tokenizer = load_alignment_model(
+        DEVICE,
+        dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+    )
+    print("    Model loaded.")
+    # 2. Load audio
+    print("\n[2] Loading audio...")
+    audio_waveform = load_audio(str(AUDIO_PATH), alignment_model.dtype, alignment_model.device)
+    print(f"    Audio loaded.")
+    # 3. Get Quran text
+    text = load_quran_text(SURAH)
+    print(f"\n[3] Text length: {len(text)} chars")
+    print(f"    First 60: {text[:60]}...")
+    # 4. Generate emissions
+    print("\n[4] Generating emissions...")
+    emissions, stride = generate_emissions(
+        alignment_model, audio_waveform, batch_size=BATCH_SIZE
+    )
+    print(f"    Emissions shape: {emissions.shape}")
+    # 5. Preprocess text
+    print("\n[5] Preprocessing text...")
+    tokens_starred, text_starred = preprocess_text(
+        text,
+        romanize=True,
+        language="ara",
+    )
+    # 6. Get alignments
+    print("\n[6] Getting alignments...")
+    segments, scores, blank_token = get_alignments(
+        emissions, tokens_starred, alignment_tokenizer,
+    )
+    # 7. Get spans
+    spans = get_spans(tokens_starred, segments, blank_token)
+    # 8. Post-process results
+    word_timestamps = postprocess_results(text_starred, spans, stride, scores)
+    print(f"    Got {len(word_timestamps)} word alignments")
+    # 9. Convert to character-level timing (seconds format)
+    char_timings = []
+    for wt in word_timestamps:
+        word = wt['text']
+        start = wt['start']
+        end = wt['end']
+        duration = end - start
+        char_dur = duration / len(word) if word else 0
+        for i, char in enumerate(word):
+            if not char.isspace():
+                char_timings.append({
+                    "char": char,
+                    "start": round(start + i * char_dur, 3),
+                    "end": round(start + (i + 1) * char_dur, 3),
+                    "idx": len(char_timings)
+                })
+    print(f"\n[7] Total chars: {len(char_timings)}")
+    # 10. Save output
+    output_path = OUTPUT_DIR / f"letter_timing_{SURAH}.json"
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(char_timings, f, ensure_ascii=False, indent=2)
+    print(f"\n[8] Saved to {output_path}")
+    # Print first 20 for verification
+    print("\n=== First 20 characters ===")
+    for ct in char_timings[:20]:
+        dur_ms = (ct['end'] - ct['start']) * 1000
+        print(f"  {ct['idx']:3d}: '{ct['char']}' @ {ct['start']:.3f}s - {ct['end']:.3f}s ({dur_ms:.0f}ms)")
+    print("\n" + "=" * 60)
+    print("✓ CTC Alignment complete!")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

ctc_align_90_physics.py ADDED Viewed

	@@ -0,0 +1,259 @@

+#!/usr/bin/env python3
+"""
+CTC Forced Aligner + Physics for Surah 91 (Al-Balad)
+Uses ctc-forced-aligner (wav2vec CTC) + TajweedSST physics refinement.
+Pipeline:
+1. CTC Alignment: wav2vec forced alignment for letter timing
+2. Tajweed Parser: Map letters to Tajweed rules
+3. Physics Validation: Validate with acoustic physics
+4. Export: MahQuranApp format
+Usage:
+    cd /Documents/26apps/tajweedsst
+    source venv/bin/activate
+    python3 ctc_align_91.py
+"""
+import json
+import torch
+import sys
+from pathlib import Path
+from ctc_forced_aligner import (
+    load_audio,
+    load_alignment_model,
+    generate_emissions,
+    preprocess_text,
+    get_alignments,
+    get_spans,
+    postprocess_results,
+)
+sys.path.insert(0, str(Path(__file__).parent))
+from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+from src.physics_validator import PhysicsValidator, ValidationStatus
+from src.duration_model import DurationModel, MaddType
+import librosa
+# Config
+SURAH = 90
+PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
+OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
+AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_090.mp3"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+BATCH_SIZE = 4
+def load_quran_text(surah_num: int) -> str:
+    """Load Quran text from verses_v4.json"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        all_verses = json.load(f)
+    verses = all_verses.get(str(surah_num), [])
+    return ' '.join(v.get('text', '') for v in verses)
+def run_ctc_alignment(text: str):
+    """Run CTC forced alignment"""
+    print("\n[1] Loading wav2vec alignment model...")
+    alignment_model, alignment_tokenizer = load_alignment_model(
+        DEVICE,
+        dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+    )
+    print("\n[2] Loading audio...")
+    audio_waveform = load_audio(str(AUDIO_PATH), alignment_model.dtype, alignment_model.device)
+    print("\n[3] Generating CTC emissions...")
+    emissions, stride = generate_emissions(
+        alignment_model, audio_waveform, batch_size=BATCH_SIZE
+    )
+    print(f"    Emissions shape: {emissions.shape}")
+    print("\n[4] Preprocessing text...")
+    tokens_starred, text_starred = preprocess_text(
+        text,
+        romanize=True,
+        language="ara",
+    )
+    print("\n[5] Getting alignments...")
+    segments, scores, blank_token = get_alignments(
+        emissions, tokens_starred, alignment_tokenizer,
+    )
+    spans = get_spans(tokens_starred, segments, blank_token)
+    word_timestamps = postprocess_results(text_starred, spans, stride, scores)
+    print(f"    Got {len(word_timestamps)} word alignments")
+    # Cleanup GPU
+    del alignment_model
+    torch.cuda.empty_cache()
+    return word_timestamps
+def convert_to_char_timings(word_timestamps):
+    """Convert word timestamps to character-level timing"""
+    char_timings = []
+    word_idx = 0
+    for wt in word_timestamps:
+        word = wt['text']
+        start = wt['start']
+        end = wt['end']
+        duration = end - start
+        char_dur = duration / len(word) if word else 0
+        word_has_chars = False
+        for i, char in enumerate(word):
+            if not char.isspace():
+                word_has_chars = True
+                char_timings.append({
+                    "char": char,
+                    "start": round(start + i * char_dur, 3),
+                    "end": round(start + (i + 1) * char_dur, 3),
+                    "idx": len(char_timings),
+                    "wordIdx": word_idx
+                })
+        if word_has_chars:
+            word_idx += 1
+    return char_timings
+def apply_physics(char_timings, text):
+    """Apply Tajweed parsing and physics validation"""
+    print("\n[6] Parsing Tajweed rules...")
+    parser = TajweedParser()
+    # Get all letter tags
+    all_tags = []
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        verses = json.load(f).get(str(SURAH), [])
+    for verse in verses:
+        word_tags = parser.parse_text(verse['text'])
+        for word_tag in word_tags:
+            for letter in word_tag.letters:
+                all_tags.append({
+                    'char': letter.char_visual,
+                    'tajweed_type': letter.tajweed_type,
+                    'physics_check': letter.physics_check,
+                    'madd_count': letter.madd_count
+                })
+    print(f"    Tajweed tags: {len(all_tags)}")
+    # Load audio for physics
+    print("\n[7] Loading audio for physics...")
+    audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
+    physics = PhysicsValidator(sample_rate=sr)
+    duration_model = DurationModel()
+    # Calibrate
+    vowels = [t['end'] - t['start'] for t in char_timings if 0.05 <= (t['end'] - t['start']) <= 0.15]
+    if vowels:
+        duration_model.calibrate_from_samples("Abdul_Basit", vowels)
+        print(f"    Harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
+    # Apply physics
+    print("\n[8] Applying physics validation...")
+    stats = {'total': 0, 'validated': 0, 'passed': 0, 'marginal': 0, 'failed': 0}
+    for i, entry in enumerate(char_timings):
+        stats['total'] += 1
+        if i < len(all_tags):
+            tag = all_tags[i]
+            entry['tajweed'] = tag['tajweed_type'].value
+            if tag['physics_check'] != PhysicsCheck.NONE:
+                stats['validated'] += 1
+                start, end = entry['start'], entry['end']
+                try:
+                    check = tag['physics_check']
+                    if check == PhysicsCheck.CHECK_RMS_BOUNCE:
+                        val = physics.validate_qalqalah(audio, start, end)
+                    elif check == PhysicsCheck.CHECK_DURATION:
+                        val = physics.validate_madd(audio, start, end, tag['madd_count'] or 2)
+                    elif check == PhysicsCheck.CHECK_GHUNNAH:
+                        val = physics.validate_ghunnah(audio, start, end)
+                    elif check == PhysicsCheck.CHECK_FORMANT_F2:
+                        val = physics.validate_tafkheem(audio, start, end)
+                    else:
+                        val = None
+                    if val:
+                        entry['physics'] = val.status.value
+                        entry['score'] = float(round(val.score, 2))
+                        if val.status == ValidationStatus.PASS:
+                            stats['passed'] += 1
+                        elif val.status == ValidationStatus.MARGINAL:
+                            stats['marginal'] += 1
+                        else:
+                            stats['failed'] += 1
+                except Exception:
+                    pass
+    return char_timings, stats
+def main():
+    print("=" * 60)
+    print(f"CTC + Physics Pipeline: Surah {SURAH} (Al-Balad)")
+    print(f"Device: {DEVICE}")
+    print("=" * 60)
+    # Get text
+    text = load_quran_text(SURAH)
+    print(f"\nText length: {len(text)} chars")
+    # Run CTC alignment
+    word_timestamps = run_ctc_alignment(text)
+    # Convert to char timings
+    char_timings = convert_to_char_timings(word_timestamps)
+    print(f"\n    Total chars: {len(char_timings)}")
+    # Apply physics
+    char_timings, stats = apply_physics(char_timings, text)
+    # Print stats
+    print(f"\n[9] Statistics:")
+    print(f"    Total: {stats['total']}")
+    print(f"    Validated: {stats['validated']}")
+    print(f"    ✓ Passed: {stats['passed']}")
+    print(f"    ~ Marginal: {stats['marginal']}")
+    print(f"    ✗ Failed: {stats['failed']}")
+    if stats['validated'] > 0:
+        rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
+        print(f"    Pass Rate: {rate:.1f}%")
+    # Save
+    output_path = OUTPUT_DIR / f"letter_timing_{SURAH}_ctc.json"
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(char_timings, f, ensure_ascii=False, indent=2)
+    print(f"\n[10] Saved: {output_path}")
+    # Show sample
+    print("\n=== First 15 characters ===")
+    for ct in char_timings[:15]:
+        tj = ct.get('tajweed', 'None')
+        ph = ct.get('physics', '-')
+        print(f"  {ct['idx']:3d}: '{ct['char']}' @ {ct['start']:.3f}s | {tj} | {ph}")
+    print("\n" + "=" * 60)
+    print("✓ CTC + Physics Pipeline complete!")
+    print(f"  Output: {output_path}")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

ctc_align_91.py ADDED Viewed

	@@ -0,0 +1,259 @@

+#!/usr/bin/env python3
+"""
+CTC Forced Aligner + Physics for Surah 91 (Ash-Shams)
+Uses ctc-forced-aligner (wav2vec CTC) + TajweedSST physics refinement.
+Pipeline:
+1. CTC Alignment: wav2vec forced alignment for letter timing
+2. Tajweed Parser: Map letters to Tajweed rules
+3. Physics Validation: Validate with acoustic physics
+4. Export: MahQuranApp format
+Usage:
+    cd /Documents/26apps/tajweedsst
+    source venv/bin/activate
+    python3 ctc_align_91.py
+"""
+import json
+import torch
+import sys
+from pathlib import Path
+from ctc_forced_aligner import (
+    load_audio,
+    load_alignment_model,
+    generate_emissions,
+    preprocess_text,
+    get_alignments,
+    get_spans,
+    postprocess_results,
+)
+sys.path.insert(0, str(Path(__file__).parent))
+from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+from src.physics_validator import PhysicsValidator, ValidationStatus
+from src.duration_model import DurationModel, MaddType
+import librosa
+# Config
+SURAH = 91
+PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
+OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
+AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_091.mp3"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+BATCH_SIZE = 4
+def load_quran_text(surah_num: int) -> str:
+    """Load Quran text from verses_v4.json"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        all_verses = json.load(f)
+    verses = all_verses.get(str(surah_num), [])
+    return ' '.join(v.get('text', '') for v in verses)
+def run_ctc_alignment(text: str):
+    """Run CTC forced alignment"""
+    print("\n[1] Loading wav2vec alignment model...")
+    alignment_model, alignment_tokenizer = load_alignment_model(
+        DEVICE,
+        dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+    )
+    print("\n[2] Loading audio...")
+    audio_waveform = load_audio(str(AUDIO_PATH), alignment_model.dtype, alignment_model.device)
+    print("\n[3] Generating CTC emissions...")
+    emissions, stride = generate_emissions(
+        alignment_model, audio_waveform, batch_size=BATCH_SIZE
+    )
+    print(f"    Emissions shape: {emissions.shape}")
+    print("\n[4] Preprocessing text...")
+    tokens_starred, text_starred = preprocess_text(
+        text,
+        romanize=True,
+        language="ara",
+    )
+    print("\n[5] Getting alignments...")
+    segments, scores, blank_token = get_alignments(
+        emissions, tokens_starred, alignment_tokenizer,
+    )
+    spans = get_spans(tokens_starred, segments, blank_token)
+    word_timestamps = postprocess_results(text_starred, spans, stride, scores)
+    print(f"    Got {len(word_timestamps)} word alignments")
+    # Cleanup GPU
+    del alignment_model
+    torch.cuda.empty_cache()
+    return word_timestamps
+def convert_to_char_timings(word_timestamps):
+    """Convert word timestamps to character-level timing"""
+    char_timings = []
+    word_idx = 0
+    for wt in word_timestamps:
+        word = wt['text']
+        start = wt['start']
+        end = wt['end']
+        duration = end - start
+        char_dur = duration / len(word) if word else 0
+        word_has_chars = False
+        for i, char in enumerate(word):
+            if not char.isspace():
+                word_has_chars = True
+                char_timings.append({
+                    "char": char,
+                    "start": round(start + i * char_dur, 3),
+                    "end": round(start + (i + 1) * char_dur, 3),
+                    "idx": len(char_timings),
+                    "wordIdx": word_idx
+                })
+        if word_has_chars:
+            word_idx += 1
+    return char_timings
+def apply_physics(char_timings, text):
+    """Apply Tajweed parsing and physics validation"""
+    print("\n[6] Parsing Tajweed rules...")
+    parser = TajweedParser()
+    # Get all letter tags
+    all_tags = []
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        verses = json.load(f).get(str(SURAH), [])
+    for verse in verses:
+        word_tags = parser.parse_text(verse['text'])
+        for word_tag in word_tags:
+            for letter in word_tag.letters:
+                all_tags.append({
+                    'char': letter.char_visual,
+                    'tajweed_type': letter.tajweed_type,
+                    'physics_check': letter.physics_check,
+                    'madd_count': letter.madd_count
+                })
+    print(f"    Tajweed tags: {len(all_tags)}")
+    # Load audio for physics
+    print("\n[7] Loading audio for physics...")
+    audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
+    physics = PhysicsValidator(sample_rate=sr)
+    duration_model = DurationModel()
+    # Calibrate
+    vowels = [t['end'] - t['start'] for t in char_timings if 0.05 <= (t['end'] - t['start']) <= 0.15]
+    if vowels:
+        duration_model.calibrate_from_samples("Abdul_Basit", vowels)
+        print(f"    Harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
+    # Apply physics
+    print("\n[8] Applying physics validation...")
+    stats = {'total': 0, 'validated': 0, 'passed': 0, 'marginal': 0, 'failed': 0}
+    for i, entry in enumerate(char_timings):
+        stats['total'] += 1
+        if i < len(all_tags):
+            tag = all_tags[i]
+            entry['tajweed'] = tag['tajweed_type'].value
+            if tag['physics_check'] != PhysicsCheck.NONE:
+                stats['validated'] += 1
+                start, end = entry['start'], entry['end']
+                try:
+                    check = tag['physics_check']
+                    if check == PhysicsCheck.CHECK_RMS_BOUNCE:
+                        val = physics.validate_qalqalah(audio, start, end)
+                    elif check == PhysicsCheck.CHECK_DURATION:
+                        val = physics.validate_madd(audio, start, end, tag['madd_count'] or 2)
+                    elif check == PhysicsCheck.CHECK_GHUNNAH:
+                        val = physics.validate_ghunnah(audio, start, end)
+                    elif check == PhysicsCheck.CHECK_FORMANT_F2:
+                        val = physics.validate_tafkheem(audio, start, end)
+                    else:
+                        val = None
+                    if val:
+                        entry['physics'] = val.status.value
+                        entry['score'] = float(round(val.score, 2))
+                        if val.status == ValidationStatus.PASS:
+                            stats['passed'] += 1
+                        elif val.status == ValidationStatus.MARGINAL:
+                            stats['marginal'] += 1
+                        else:
+                            stats['failed'] += 1
+                except Exception:
+                    pass
+    return char_timings, stats
+def main():
+    print("=" * 60)
+    print(f"CTC + Physics Pipeline: Surah {SURAH} (Ash-Shams)")
+    print(f"Device: {DEVICE}")
+    print("=" * 60)
+    # Get text
+    text = load_quran_text(SURAH)
+    print(f"\nText length: {len(text)} chars")
+    # Run CTC alignment
+    word_timestamps = run_ctc_alignment(text)
+    # Convert to char timings
+    char_timings = convert_to_char_timings(word_timestamps)
+    print(f"\n    Total chars: {len(char_timings)}")
+    # Apply physics
+    char_timings, stats = apply_physics(char_timings, text)
+    # Print stats
+    print(f"\n[9] Statistics:")
+    print(f"    Total: {stats['total']}")
+    print(f"    Validated: {stats['validated']}")
+    print(f"    ✓ Passed: {stats['passed']}")
+    print(f"    ~ Marginal: {stats['marginal']}")
+    print(f"    ✗ Failed: {stats['failed']}")
+    if stats['validated'] > 0:
+        rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
+        print(f"    Pass Rate: {rate:.1f}%")
+    # Save
+    output_path = OUTPUT_DIR / f"letter_timing_{SURAH}_ctc.json"
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(char_timings, f, ensure_ascii=False, indent=2)
+    print(f"\n[10] Saved: {output_path}")
+    # Show sample
+    print("\n=== First 15 characters ===")
+    for ct in char_timings[:15]:
+        tj = ct.get('tajweed', 'None')
+        ph = ct.get('physics', '-')
+        print(f"  {ct['idx']:3d}: '{ct['char']}' @ {ct['start']:.3f}s | {tj} | {ph}")
+    print("\n" + "=" * 60)
+    print("✓ CTC + Physics Pipeline complete!")
+    print(f"  Output: {output_path}")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

physics_analyzer.py ADDED Viewed

	@@ -0,0 +1,281 @@

+#!/usr/bin/env python3
+"""
+Physics Wave Analyzer for Surah 90
+Validates Tajweed rules using actual audio signal processing:
+- Qalqalah: RMS energy dip→spike pattern
+- Madd: Duration verification (2x, 4x, 6x average)
+- Tafkheem: Low-frequency energy presence
+"""
+import json
+import numpy as np
+from pathlib import Path
+try:
+    import librosa
+    HAS_LIBROSA = True
+except ImportError:
+    HAS_LIBROSA = False
+    print("WARNING: librosa not available")
+def convert_to_json_safe(obj):
+    """Convert numpy types to JSON-serializable Python types"""
+    if isinstance(obj, dict):
+        return {k: convert_to_json_safe(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_to_json_safe(i) for i in obj]
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    return obj
+# Paths
+AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
+TIMING_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/letter_timing_90.json"
+OUTPUT_PATH = Path(__file__).parent / "output/surah_90_physics.json"
+def load_audio():
+    """Load audio file"""
+    print(f"Loading: {AUDIO_PATH}")
+    y, sr = librosa.load(AUDIO_PATH, sr=22050)
+    duration = len(y) / sr
+    print(f"  Duration: {duration:.1f}s, Sample rate: {sr}Hz")
+    return y, sr
+def load_timing():
+    """Load timing data with Tajweed tags"""
+    with open(TIMING_PATH, 'r', encoding='utf-8') as f:
+        return json.load(f)
+def extract_segment(y, sr, start, end):
+    """Extract audio segment"""
+    start_sample = int(start * sr)
+    end_sample = int(end * sr)
+    return y[start_sample:end_sample]
+def analyze_qalqalah(segment, sr):
+    """
+    Analyze Qalqalah (bounce) pattern.
+    Expected: RMS dip followed by spike at letter end.
+    """
+    if len(segment) < 512:
+        return {"status": "TOO_SHORT", "confidence": 0.0}
+    # Calculate RMS energy
+    rms = librosa.feature.rms(y=segment, frame_length=256, hop_length=64)[0]
+    if len(rms) < 4:
+        return {"status": "INSUFFICIENT_FRAMES", "confidence": 0.0}
+    # Look for dip→spike pattern
+    # Divide into thirds
+    third = len(rms) // 3
+    if third < 1:
+        return {"status": "TOO_SHORT", "confidence": 0.0}
+    first_third = np.mean(rms[:third])
+    middle_third = np.mean(rms[third:2*third])
+    last_third = np.mean(rms[2*third:])
+    # Qalqalah pattern: middle should dip, end should spike
+    has_dip = middle_third < first_third * 0.9
+    has_spike = last_third > middle_third * 1.1
+    if has_dip and has_spike:
+        confidence = min(1.0, (first_third - middle_third) / first_third + (last_third - middle_third) / last_third)
+        return {
+            "status": "DETECTED",
+            "confidence": round(confidence, 3),
+            "pattern": {"first": round(float(first_third), 4), "middle": round(float(middle_third), 4), "last": round(float(last_third), 4)}
+        }
+    elif has_spike:
+        return {"status": "PARTIAL_SPIKE", "confidence": 0.5}
+    else:
+        return {"status": "NOT_DETECTED", "confidence": 0.2}
+def analyze_madd(segment, sr, expected_count):
+    """
+    Analyze Madd (elongation) duration.
+    Verify letter duration matches expected count (2, 4, or 6 harakaat).
+    """
+    duration_ms = len(segment) / sr * 1000
+    # Average haraka duration ~100-150ms for Tarteel recitation
+    base_haraka = 120  # ms
+    expected_duration = expected_count * base_haraka
+    ratio = duration_ms / expected_duration if expected_duration > 0 else 0
+    # Allow ±30% tolerance
+    if 0.7 <= ratio <= 1.3:
+        status = "CORRECT"
+        confidence = 1.0 - abs(1.0 - ratio)
+    elif 0.5 <= ratio <= 1.5:
+        status = "CLOSE"
+        confidence = 0.6
+    else:
+        status = "MISMATCH"
+        confidence = 0.3
+    return {
+        "status": status,
+        "confidence": round(confidence, 3),
+        "actual_ms": round(duration_ms, 1),
+        "expected_ms": round(expected_duration, 1),
+        "ratio": round(ratio, 2)
+    }
+def analyze_tafkheem(segment, sr):
+    """
+    Analyze Tafkheem (heaviness) - heavy letters have stronger low frequencies.
+    """
+    if len(segment) < 1024:
+        return {"status": "TOO_SHORT", "confidence": 0.0}
+    # Compute spectral centroid - lower = heavier
+    centroid = librosa.feature.spectral_centroid(y=segment, sr=sr)[0]
+    mean_centroid = np.mean(centroid)
+    # Heavy letters typically have centroid < 2000Hz
+    # Light letters typically > 2500Hz
+    if mean_centroid < 1800:
+        status = "HEAVY"
+        confidence = 0.9
+    elif mean_centroid < 2200:
+        status = "MODERATE"
+        confidence = 0.7
+    else:
+        status = "LIGHT"
+        confidence = 0.4
+    return {
+        "status": status,
+        "confidence": round(confidence, 3),
+        "spectral_centroid": round(float(mean_centroid), 1)
+    }
+def run_analysis():
+    """Run physics analysis on all tagged letters"""
+    print("=" * 60)
+    print("Physics Wave Analysis - Surah 90")
+    print("=" * 60)
+    if not HAS_LIBROSA:
+        print("ERROR: librosa required for analysis")
+        return
+    # Load data
+    y, sr = load_audio()
+    timing = load_timing()
+    print(f"\n[1] Analyzing {len(timing)} letters...")
+    # Analyze each tagged letter
+    results = {
+        "qalqalah": [],
+        "madd": [],
+        "tafkheem": [],
+        "summary": {}
+    }
+    counts = {"qalqalah": 0, "madd": 0, "tafkheem": 0, "other": 0}
+    passed = {"qalqalah": 0, "madd": 0, "tafkheem": 0}
+    for entry in timing:
+        tajweed = entry.get("tajweed_type", "None")
+        physics = entry.get("physics_check", "None")
+        if tajweed == "None" or physics == "None":
+            continue
+        start = entry.get("start", 0)
+        end = entry.get("end", 0)
+        char = entry.get("char", "")
+        segment = extract_segment(y, sr, start, end)
+        if "qalqalah" in tajweed.lower():
+            counts["qalqalah"] += 1
+            analysis = analyze_qalqalah(segment, sr)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            analysis["tajweed"] = tajweed
+            results["qalqalah"].append(analysis)
+            if analysis["confidence"] >= 0.5:
+                passed["qalqalah"] += 1
+        elif "madd" in tajweed.lower():
+            counts["madd"] += 1
+            madd_count = entry.get("madd_count", 2)
+            analysis = analyze_madd(segment, sr, madd_count)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            analysis["tajweed"] = tajweed
+            analysis["expected_count"] = madd_count
+            results["madd"].append(analysis)
+            if analysis["confidence"] >= 0.5:
+                passed["madd"] += 1
+        elif "tafkheem" in tajweed.lower():
+            counts["tafkheem"] += 1
+            analysis = analyze_tafkheem(segment, sr)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            analysis["tajweed"] = tajweed
+            results["tafkheem"].append(analysis)
+            if analysis["status"] in ["HEAVY", "MODERATE"]:
+                passed["tafkheem"] += 1
+        else:
+            counts["other"] += 1
+    # Summary
+    results["summary"] = {
+        "qalqalah": {"total": counts["qalqalah"], "passed": passed["qalqalah"], "rate": round(passed["qalqalah"]/max(1,counts["qalqalah"]), 2)},
+        "madd": {"total": counts["madd"], "passed": passed["madd"], "rate": round(passed["madd"]/max(1,counts["madd"]), 2)},
+        "tafkheem": {"total": counts["tafkheem"], "passed": passed["tafkheem"], "rate": round(passed["tafkheem"]/max(1,counts["tafkheem"]), 2)},
+    }
+    # Print results
+    print("\n[2] Results:")
+    print(f"    Qalqalah: {passed['qalqalah']}/{counts['qalqalah']} passed ({results['summary']['qalqalah']['rate']*100:.0f}%)")
+    print(f"    Madd: {passed['madd']}/{counts['madd']} passed ({results['summary']['madd']['rate']*100:.0f}%)")
+    print(f"    Tafkheem: {passed['tafkheem']}/{counts['tafkheem']} passed ({results['summary']['tafkheem']['rate']*100:.0f}%)")
+    # Save
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(convert_to_json_safe(results), f, ensure_ascii=False, indent=2)
+    print(f"\n[3] Saved: {OUTPUT_PATH}")
+    # Show samples
+    print("\n[4] Sample Qalqalah Analysis:")
+    for r in results["qalqalah"][:3]:
+        print(f"    [{r['char']}] {r['time']} → {r['status']} (conf: {r['confidence']})")
+    print("\n[5] Sample Madd Analysis:")
+    for r in results["madd"][:3]:
+        print(f"    [{r['char']}] {r['actual_ms']:.0f}ms vs {r['expected_ms']:.0f}ms → {r['status']}")
+    print("\n" + "=" * 60)
+    print("✓ Physics Analysis Complete!")
+    print("=" * 60)
+    return results
+if __name__ == "__main__":
+    run_analysis()

physics_analyzer_v2.py ADDED Viewed

	@@ -0,0 +1,370 @@

+#!/usr/bin/env python3
+"""
+Enhanced Physics Wave Analyzer - Using Lisan al-Arab Principles
+Integrated from MahQuranApp/scripts/lisan_madd_detector.py
+Key techniques:
+1. Sustained region detection (spectral flux + energy stability)
+2. Anti-drift stabilization (gap closing + minimum duration)
+3. Per-character Tajweed physics analysis
+"""
+import json
+import numpy as np
+from pathlib import Path
+from scipy.ndimage import gaussian_filter1d
+try:
+    import librosa
+    HAS_LIBROSA = True
+except ImportError:
+    HAS_LIBROSA = False
+    print("WARNING: librosa not available")
+# Paths
+AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
+TIMING_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/letter_timing_90.json"
+OUTPUT_PATH = Path(__file__).parent / "output/surah_90_physics_v2.json"
+# Tajweed character sets
+MADD_LETTERS = set('اويٱى')
+QALQALAH_LETTERS = set('قطبجد')
+TAFKHEEM_LETTERS = set('صضطظخغق')
+HALQ_LETTERS = set('ءهعحغخ')
+def convert_to_json_safe(obj):
+    """Convert numpy types to JSON-serializable Python types"""
+    if isinstance(obj, dict):
+        return {k: convert_to_json_safe(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_to_json_safe(i) for i in obj]
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    return obj
+class LisanPhysicsAnalyzer:
+    """
+    Physics analyzer using Lisan al-Arab acoustic principles.
+    """
+    def __init__(self, audio_path, sr=16000, hop_length=256):
+        self.audio_path = str(audio_path)
+        self.sr = sr
+        self.hop_length = hop_length
+        print(f"Loading audio: {audio_path}")
+        self.audio, _ = librosa.load(self.audio_path, sr=self.sr)
+        self.duration = len(self.audio) / self.sr
+        print(f"  Duration: {self.duration:.1f}s, Sample rate: {sr}Hz")
+    def extract_segment(self, start, end):
+        """Extract audio segment by time"""
+        start_sample = int(start * self.sr)
+        end_sample = int(end * self.sr)
+        return self.audio[start_sample:end_sample]
+    def detect_sustained_regions(self, segment):
+        """
+        Detect regions where sound is SUSTAINED (استمرّ).
+        From LisanMaddDetector - detects madd vowels being held.
+        Returns: array of sustain scores per frame (higher = more sustained)
+        """
+        if len(segment) < 512:
+            return np.zeros(1)
+        # 1. Compute spectral flux (low flux = sustained sound)
+        S = np.abs(librosa.stft(segment, hop_length=self.hop_length))
+        flux = np.sqrt(np.sum(np.diff(S, axis=1)**2, axis=0))
+        flux = np.concatenate([[0], flux])
+        flux = gaussian_filter1d(flux.astype(np.float64), sigma=2)
+        # Invert: high score where flux is LOW (sustained sound)
+        max_flux = np.max(flux) if np.max(flux) > 0 else 1
+        sustain_score = 1 - (flux / max_flux)
+        # 2. Check energy stability (sustained sounds have stable RMS)
+        energy = librosa.feature.rms(y=segment, hop_length=self.hop_length)[0]
+        energy = gaussian_filter1d(energy.astype(np.float64), sigma=2)
+        # Energy stability: low variance in local windows
+        stability = np.zeros_like(energy)
+        window = 5
+        for i in range(window, len(energy) - window):
+            local_std = np.std(energy[max(0, i-window):i+window])
+            local_mean = np.mean(energy[max(0, i-window):i+window])
+            if local_mean > 0:
+                stability[i] = 1 - min(local_std / local_mean, 1)
+        # Pad stability to match sustain_score length
+        min_len = min(len(sustain_score), len(stability))
+        sustain_score = sustain_score[:min_len]
+        stability = stability[:min_len]
+        # Combined score: both low flux AND stable energy = sustained vowel
+        combined = sustain_score * stability
+        return combined
+    def analyze_madd(self, segment, char, expected_count=2):
+        """
+        Analyze Madd (elongation) using sustain detection.
+        """
+        duration_ms = len(segment) / self.sr * 1000
+        # Detect sustained regions
+        sustain_scores = self.detect_sustained_regions(segment)
+        avg_sustain = np.mean(sustain_scores) if len(sustain_scores) > 0 else 0
+        # Calculate expected duration
+        base_haraka = 100  # ms per haraka (Abdul Basit is slower)
+        expected_duration = expected_count * base_haraka
+        # Determine if sustain matches expected madd
+        if avg_sustain > 0.5:
+            detected_count = 3 if avg_sustain > 0.7 else 2
+        else:
+            detected_count = 1
+        ratio = duration_ms / expected_duration if expected_duration > 0 else 0
+        if ratio >= 0.7 and avg_sustain >= 0.4:
+            status = "SUSTAINED"
+            confidence = 0.8 if avg_sustain > 0.6 else 0.6
+        elif ratio >= 0.5:
+            status = "PARTIAL"
+            confidence = 0.5
+        else:
+            status = "SHORT"
+            confidence = 0.3
+        return {
+            "status": status,
+            "confidence": round(confidence, 3),
+            "actual_ms": round(duration_ms, 1),
+            "expected_ms": round(expected_duration, 1),
+            "ratio": round(ratio, 2),
+            "sustain_score": round(avg_sustain, 3),
+            "detected_count": detected_count
+        }
+    def analyze_qalqalah(self, segment):
+        """
+        Analyze Qalqalah (bounce) using RMS energy patterns.
+        Improved: checks for energy release at end of segment.
+        """
+        if len(segment) < 256:
+            return {"status": "TOO_SHORT", "confidence": 0.0}
+        # Use smaller frame for short segments
+        frame_length = min(256, len(segment) // 2)
+        hop = frame_length // 4
+        rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop)[0]
+        if len(rms) < 3:
+            return {"status": "INSUFFICIENT_FRAMES", "confidence": 0.0}
+        # Qalqalah pattern: should have energy release at end
+        # Look at last third vs first two-thirds
+        split_idx = len(rms) * 2 // 3
+        first_part = np.mean(rms[:split_idx])
+        last_part = np.mean(rms[split_idx:])
+        # Also check for any spike in segment
+        max_rms = np.max(rms)
+        mean_rms = np.mean(rms)
+        has_energy = mean_rms > 0.01
+        has_release = last_part > first_part * 0.8  # Energy maintained or released at end
+        has_spike = max_rms > mean_rms * 1.3
+        if has_energy and has_release and has_spike:
+            confidence = min(0.9, (max_rms / mean_rms - 1) + 0.5)
+            return {
+                "status": "DETECTED",
+                "confidence": round(confidence, 3),
+                "pattern": {
+                    "first": round(float(first_part), 4),
+                    "last": round(float(last_part), 4),
+                    "max": round(float(max_rms), 4),
+                    "mean": round(float(mean_rms), 4)
+                }
+            }
+        elif has_energy:
+            return {"status": "PARTIAL", "confidence": 0.4}
+        else:
+            return {"status": "NO_ENERGY", "confidence": 0.1}
+    def analyze_tafkheem(self, segment):
+        """
+        Analyze Tafkheem (heaviness) using spectral centroid.
+        Heavy consonants have lower spectral centroid (more bass).
+        """
+        if len(segment) < 512:
+            return {"status": "TOO_SHORT", "confidence": 0.0}
+        # Compute spectral centroid
+        centroid = librosa.feature.spectral_centroid(y=segment, sr=self.sr)[0]
+        mean_centroid = np.mean(centroid)
+        # Also check low-frequency energy ratio
+        S = np.abs(librosa.stft(segment))
+        freqs = librosa.fft_frequencies(sr=self.sr)
+        low_freq_idx = np.where(freqs < 1000)[0]
+        high_freq_idx = np.where(freqs >= 1000)[0]
+        low_energy = np.sum(S[low_freq_idx, :])
+        high_energy = np.sum(S[high_freq_idx, :])
+        total_energy = low_energy + high_energy
+        low_ratio = low_energy / total_energy if total_energy > 0 else 0.5
+        # Heavy letters: low centroid + high low-frequency ratio
+        if mean_centroid < 1500 and low_ratio > 0.6:
+            status = "HEAVY"
+            confidence = 0.9
+        elif mean_centroid < 2000 or low_ratio > 0.5:
+            status = "MODERATE"
+            confidence = 0.7
+        else:
+            status = "LIGHT"
+            confidence = 0.4
+        return {
+            "status": status,
+            "confidence": round(confidence, 3),
+            "spectral_centroid": round(float(mean_centroid), 1),
+            "low_freq_ratio": round(float(low_ratio), 3)
+        }
+def run_enhanced_analysis():
+    """Run enhanced physics analysis on all tagged letters"""
+    print("=" * 60)
+    print("Enhanced Physics Analysis - Surah 90")
+    print("Using Lisan al-Arab Acoustic Principles")
+    print("=" * 60)
+    if not HAS_LIBROSA:
+        print("ERROR: librosa required for analysis")
+        return
+    # Load analyzer
+    analyzer = LisanPhysicsAnalyzer(AUDIO_PATH)
+    # Load timing data
+    with open(TIMING_PATH, 'r', encoding='utf-8') as f:
+        timing = json.load(f)
+    print(f"\n[1] Analyzing {len(timing)} letters...")
+    # Results
+    results = {
+        "qalqalah": [],
+        "madd": [],
+        "tafkheem": [],
+        "summary": {}
+    }
+    counts = {"qalqalah": 0, "madd": 0, "tafkheem": 0}
+    passed = {"qalqalah": 0, "madd": 0, "tafkheem": 0}
+    for entry in timing:
+        char = entry.get("char", "")
+        base_char = char[0] if char else ""  # First char is base letter
+        start = entry.get("start", 0)
+        end = entry.get("end", 0)
+        segment = analyzer.extract_segment(start, end)
+        # Analyze based on character type
+        if base_char in QALQALAH_LETTERS:
+            counts["qalqalah"] += 1
+            analysis = analyzer.analyze_qalqalah(segment)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["qalqalah"].append(analysis)
+            if analysis["confidence"] >= 0.4:
+                passed["qalqalah"] += 1
+        if base_char in MADD_LETTERS:
+            counts["madd"] += 1
+            madd_count = entry.get("madd_count", 2)
+            analysis = analyzer.analyze_madd(segment, char, madd_count)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["madd"].append(analysis)
+            if analysis["status"] in ["SUSTAINED", "PARTIAL"]:
+                passed["madd"] += 1
+        if base_char in TAFKHEEM_LETTERS:
+            counts["tafkheem"] += 1
+            analysis = analyzer.analyze_tafkheem(segment)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["tafkheem"].append(analysis)
+            if analysis["status"] in ["HEAVY", "MODERATE"]:
+                passed["tafkheem"] += 1
+    # Summary
+    results["summary"] = {
+        "qalqalah": {
+            "total": counts["qalqalah"],
+            "passed": passed["qalqalah"],
+            "rate": round(passed["qalqalah"] / max(1, counts["qalqalah"]), 2)
+        },
+        "madd": {
+            "total": counts["madd"],
+            "passed": passed["madd"],
+            "rate": round(passed["madd"] / max(1, counts["madd"]), 2)
+        },
+        "tafkheem": {
+            "total": counts["tafkheem"],
+            "passed": passed["tafkheem"],
+            "rate": round(passed["tafkheem"] / max(1, counts["tafkheem"]), 2)
+        },
+    }
+    # Print results
+    print("\n[2] Results (Using Lisan Acoustic Detection):")
+    print(f"    Qalqalah: {passed['qalqalah']}/{counts['qalqalah']} ({results['summary']['qalqalah']['rate']*100:.0f}%)")
+    print(f"    Madd: {passed['madd']}/{counts['madd']} ({results['summary']['madd']['rate']*100:.0f}%)")
+    print(f"    Tafkheem: {passed['tafkheem']}/{counts['tafkheem']} ({results['summary']['tafkheem']['rate']*100:.0f}%)")
+    # Save
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(convert_to_json_safe(results), f, ensure_ascii=False, indent=2)
+    print(f"\n[3] Saved: {OUTPUT_PATH}")
+    # Show samples
+    print("\n[4] Sample Qalqalah (Improved Detection):")
+    for r in results["qalqalah"][:5]:
+        print(f"    [{r['char']}] {r['time']} → {r['status']} (conf: {r['confidence']})")
+    print("\n[5] Sample Madd (Sustain Detection):")
+    for r in results["madd"][:5]:
+        print(f"    [{r['char']}] {r['actual_ms']:.0f}ms, sustain:{r['sustain_score']:.2f} → {r['status']}")
+    print("\n[6] Sample Tafkheem (Heavy Letter Detection):")
+    for r in results["tafkheem"][:5]:
+        print(f"    [{r['char']}] centroid:{r['spectral_centroid']:.0f}Hz, low_ratio:{r['low_freq_ratio']:.2f} → {r['status']}")
+    print("\n" + "=" * 60)
+    print("✓ Enhanced Physics Analysis Complete!")
+    print("=" * 60)
+    return results
+if __name__ == "__main__":
+    run_enhanced_analysis()

physics_analyzer_v3.py ADDED Viewed

	@@ -0,0 +1,542 @@

+#!/usr/bin/env python3
+"""
+TajweedSST Enhanced Analyzer v3
+Integrated improvements:
+1. Ghunnah detection (nasal resonance via parselmouth)
+2. Pitch tracking for Madd (F0 contour stability)
+3. Cross-word rules (Idgham, Ikhfa, Iqlab)
+4. Neural-style confidence calibration
+Architecture: Lisan al-Arab + DSP + Tajweed Science
+"""
+import json
+import numpy as np
+from pathlib import Path
+from scipy.ndimage import gaussian_filter1d
+try:
+    import librosa
+    HAS_LIBROSA = True
+except ImportError:
+    HAS_LIBROSA = False
+    print("WARNING: librosa not available")
+try:
+    import parselmouth
+    from parselmouth.praat import call
+    HAS_PARSELMOUTH = True
+except ImportError:
+    HAS_PARSELMOUTH = False
+    print("WARNING: parselmouth not available (Ghunnah detection disabled)")
+# Paths
+AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
+TIMING_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/letter_timing_90.json"
+OUTPUT_PATH = Path(__file__).parent / "output/surah_90_physics_v3.json"
+# Character sets
+MADD_LETTERS = set('اويٱى')
+QALQALAH_LETTERS = set('قطبجد')
+TAFKHEEM_LETTERS = set('صضطظخغق')
+GHUNNAH_LETTERS = set('نم')  # Nasal letters
+HALQ_LETTERS = set('ءهعحغخ')
+# Cross-word rule triggers
+IDGHAM_TARGETS = set('يرملونw')  # Letters that cause Idgham after ن
+IKHFA_TARGETS = set('تثجدذزسشصضطظفقك')  # Letters that cause Ikhfa after ن
+IQLAB_TARGET = 'ب'  # ن before ب becomes م
+def convert_to_json_safe(obj):
+    """Convert numpy types to JSON-serializable types"""
+    if isinstance(obj, dict):
+        return {k: convert_to_json_safe(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_to_json_safe(i) for i in obj]
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.bool_):
+        return bool(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    return obj
+class TajweedAnalyzerV3:
+    """
+    Enhanced Tajweed physics analyzer with full rule detection.
+    """
+    def __init__(self, audio_path, sr=16000, hop_length=256):
+        self.audio_path = str(audio_path)
+        self.sr = sr
+        self.hop_length = hop_length
+        print(f"Loading audio: {audio_path}")
+        self.audio, _ = librosa.load(self.audio_path, sr=self.sr)
+        self.duration = len(self.audio) / self.sr
+        print(f"  Duration: {self.duration:.1f}s")
+        # Load for parselmouth (needs original file)
+        if HAS_PARSELMOUTH:
+            self.sound = parselmouth.Sound(self.audio_path)
+    def extract_segment(self, start, end):
+        """Extract audio segment by time"""
+        start_sample = int(start * self.sr)
+        end_sample = int(end * self.sr)
+        return self.audio[start_sample:end_sample]
+    # ===== GHUNNAH DETECTION (Nasal Resonance) =====
+    def analyze_ghunnah(self, start, end, char):
+        """
+        Analyze Ghunnah (nasal resonance) using formant analysis.
+        Nasal sounds have:
+        1. Anti-formant (energy dip) around 500-1500 Hz
+        2. Higher formant bandwidth
+        3. Specific F1/F2 patterns
+        """
+        if not HAS_PARSELMOUTH:
+            return {"status": "SKIPPED", "confidence": 0.0, "reason": "parselmouth unavailable"}
+        try:
+            # Extract segment from parselmouth sound
+            segment = self.sound.extract_part(from_time=start, to_time=end, preserve_times=False)
+            if segment.get_total_duration() < 0.03:
+                return {"status": "TOO_SHORT", "confidence": 0.0}
+            # Get formants
+            formants = call(segment, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
+            # Average F1 and F2
+            n_frames = call(formants, "Get number of frames")
+            if n_frames < 1:
+                return {"status": "NO_FRAMES", "confidence": 0.0}
+            f1_values = []
+            f2_values = []
+            bandwidths = []
+            for i in range(1, n_frames + 1):
+                time = call(formants, "Get time from frame number", i)
+                f1 = call(formants, "Get value at time", 1, time, "Hertz", "Linear")
+                f2 = call(formants, "Get value at time", 2, time, "Hertz", "Linear")
+                bw1 = call(formants, "Get bandwidth at time", 1, time, "Hertz", "Linear")
+                if not np.isnan(f1):
+                    f1_values.append(f1)
+                if not np.isnan(f2):
+                    f2_values.append(f2)
+                if not np.isnan(bw1):
+                    bandwidths.append(bw1)
+            if not f1_values or not bandwidths:
+                return {"status": "NO_FORMANTS", "confidence": 0.0}
+            avg_f1 = np.mean(f1_values)
+            avg_f2 = np.mean(f2_values) if f2_values else 0
+            avg_bandwidth = np.mean(bandwidths)
+            # Ghunnah indicators:
+            # 1. Low F1 (nasal cavity resonance) - typically 200-400 Hz
+            # 2. High bandwidth (nasal damping)
+            # 3. F2 in nasal range
+            low_f1 = avg_f1 < 500
+            high_bandwidth = avg_bandwidth > 150
+            nasal_f2 = 800 < avg_f2 < 2000
+            indicators = sum([low_f1, high_bandwidth, nasal_f2])
+            if indicators >= 2:
+                status = "DETECTED"
+                confidence = 0.7 + (indicators - 2) * 0.15
+            elif indicators == 1:
+                status = "PARTIAL"
+                confidence = 0.5
+            else:
+                status = "NOT_DETECTED"
+                confidence = 0.2
+            return {
+                "status": status,
+                "confidence": round(confidence, 3),
+                "f1": round(avg_f1, 1),
+                "f2": round(avg_f2, 1),
+                "bandwidth": round(avg_bandwidth, 1),
+                "indicators": {"low_f1": low_f1, "high_bandwidth": high_bandwidth, "nasal_f2": nasal_f2}
+            }
+        except Exception as e:
+            return {"status": "ERROR", "confidence": 0.0, "error": str(e)}
+    # ===== PITCH TRACKING FOR MADD =====
+    def analyze_madd_pitch(self, segment, char, expected_count=2):
+        """
+        Analyze Madd using pitch (F0) stability.
+        Sustained vowels have stable pitch with minimal variation.
+        """
+        duration_ms = len(segment) / self.sr * 1000
+        # Extract pitch using librosa
+        try:
+            f0, voiced_flag, voiced_probs = librosa.pyin(
+                segment,
+                fmin=50,
+                fmax=500,
+                sr=self.sr,
+                frame_length=1024,
+                hop_length=256
+            )
+        except Exception as e:
+            # Fallback to basic sustain detection
+            return self._basic_madd_analysis(segment, duration_ms, expected_count)
+        # Filter to voiced frames only
+        f0_voiced = f0[~np.isnan(f0)]
+        if len(f0_voiced) < 3:
+            return self._basic_madd_analysis(segment, duration_ms, expected_count)
+        # Pitch stability: low coefficient of variation = sustained
+        pitch_mean = np.mean(f0_voiced)
+        pitch_std = np.std(f0_voiced)
+        pitch_cv = pitch_std / pitch_mean if pitch_mean > 0 else 1.0
+        # Voicing ratio: high means continuous sound
+        voicing_ratio = len(f0_voiced) / len(f0)
+        # Sustain score based on pitch stability and voicing
+        pitch_stable = pitch_cv < 0.15
+        well_voiced = voicing_ratio > 0.6
+        # Expected duration
+        base_haraka = 100  # ms
+        expected_duration = expected_count * base_haraka
+        duration_match = 0.7 <= (duration_ms / expected_duration) <= 1.5 if expected_duration > 0 else False
+        if pitch_stable and well_voiced and duration_match:
+            status = "SUSTAINED"
+            confidence = 0.85
+        elif (pitch_stable and well_voiced) or (well_voiced and duration_match):
+            status = "PARTIAL"
+            confidence = 0.6
+        elif well_voiced:
+            status = "VOICED"
+            confidence = 0.4
+        else:
+            status = "WEAK"
+            confidence = 0.2
+        return {
+            "status": status,
+            "confidence": round(confidence, 3),
+            "duration_ms": round(duration_ms, 1),
+            "expected_ms": round(expected_duration, 1),
+            "pitch_mean": round(pitch_mean, 1),
+            "pitch_cv": round(pitch_cv, 3),
+            "voicing_ratio": round(voicing_ratio, 3)
+        }
+    def _basic_madd_analysis(self, segment, duration_ms, expected_count):
+        """Fallback basic Madd analysis"""
+        expected_duration = expected_count * 100
+        ratio = duration_ms / expected_duration if expected_duration > 0 else 0
+        if 0.7 <= ratio <= 1.5:
+            return {"status": "SUSTAINED", "confidence": 0.5, "duration_ms": round(duration_ms, 1)}
+        return {"status": "WEAK", "confidence": 0.3, "duration_ms": round(duration_ms, 1)}
+    # ===== QALQALAH (Improved) =====
+    def analyze_qalqalah(self, segment):
+        """Improved Qalqalah detection with energy release pattern"""
+        if len(segment) < 256:
+            return {"status": "TOO_SHORT", "confidence": 0.0}
+        frame_length = min(256, len(segment) // 2)
+        hop = frame_length // 4
+        rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop)[0]
+        if len(rms) < 3:
+            return {"status": "INSUFFICIENT", "confidence": 0.0}
+        # Qalqalah: energy release at end
+        split = len(rms) * 2 // 3
+        first = np.mean(rms[:split])
+        last = np.mean(rms[split:])
+        max_rms = np.max(rms)
+        mean_rms = np.mean(rms)
+        has_energy = mean_rms > 0.01
+        has_release = last > first * 0.8
+        has_spike = max_rms > mean_rms * 1.3
+        if has_energy and has_release and has_spike:
+            confidence = min(0.9, (max_rms / mean_rms - 1) + 0.5)
+            return {"status": "DETECTED", "confidence": round(confidence, 3)}
+        elif has_energy:
+            return {"status": "PARTIAL", "confidence": 0.4}
+        return {"status": "NO_ENERGY", "confidence": 0.1}
+    # ===== TAFKHEEM (Heavy Letters) =====
+    def analyze_tafkheem(self, segment):
+        """Analyze Tafkheem using spectral characteristics"""
+        if len(segment) < 512:
+            return {"status": "TOO_SHORT", "confidence": 0.0}
+        centroid = librosa.feature.spectral_centroid(y=segment, sr=self.sr)[0]
+        mean_centroid = np.mean(centroid)
+        S = np.abs(librosa.stft(segment))
+        freqs = librosa.fft_frequencies(sr=self.sr)
+        low_idx = np.where(freqs < 1000)[0]
+        high_idx = np.where(freqs >= 1000)[0]
+        low_energy = np.sum(S[low_idx, :])
+        high_energy = np.sum(S[high_idx, :])
+        total = low_energy + high_energy
+        low_ratio = low_energy / total if total > 0 else 0.5
+        if mean_centroid < 1500 and low_ratio > 0.6:
+            return {"status": "HEAVY", "confidence": 0.9, "centroid": round(mean_centroid, 1)}
+        elif mean_centroid < 2000 or low_ratio > 0.5:
+            return {"status": "MODERATE", "confidence": 0.7, "centroid": round(mean_centroid, 1)}
+        return {"status": "LIGHT", "confidence": 0.4, "centroid": round(mean_centroid, 1)}
+    # ===== CROSS-WORD RULES =====
+    def analyze_cross_word_rules(self, timing_data):
+        """
+        Analyze cross-word Tajweed rules:
+        - Idgham: ن/م merges into following letter
+        - Ikhfa: ن partially hidden before certain letters
+        - Iqlab: ن becomes م sound before ب
+        """
+        results = {
+            "idgham": [],
+            "ikhfa": [],
+            "iqlab": []
+        }
+        for i, entry in enumerate(timing_data):
+            char = entry.get("char", "")
+            base_char = char[0] if char else ""
+            # Check if this is a Noon with Sukun or Tanween
+            has_sukun = 'ْ' in char
+            has_tanween = any(c in char for c in 'ًٌٍ')
+            is_noon_trigger = base_char == 'ن' and (has_sukun or has_tanween)
+            is_meem_trigger = base_char == 'م' and has_sukun
+            if not (is_noon_trigger or is_meem_trigger):
+                continue
+            # Look at next letter
+            if i + 1 >= len(timing_data):
+                continue
+            next_entry = timing_data[i + 1]
+            next_char = next_entry.get("char", "")
+            next_base = next_char[0] if next_char else ""
+            # Iqlab: ن before ب
+            if is_noon_trigger and next_base == IQLAB_TARGET:
+                # Analyze if ن sounds like م
+                segment = self.extract_segment(entry.get("start", 0), entry.get("end", 0))
+                ghunnah = self.analyze_ghunnah(entry.get("start", 0), entry.get("end", 0), char)
+                results["iqlab"].append({
+                    "position": i,
+                    "char": char,
+                    "next_char": next_char,
+                    "time": f"{entry.get('start', 0):.3f}-{entry.get('end', 0):.3f}",
+                    "ghunnah_detected": ghunnah.get("status") in ["DETECTED", "PARTIAL"],
+                    "confidence": ghunnah.get("confidence", 0)
+                })
+            # Ikhfa: ن before specific letters
+            elif is_noon_trigger and next_base in IKHFA_TARGETS:
+                # Analyze partial nasalization
+                segment = self.extract_segment(entry.get("start", 0), entry.get("end", 0))
+                ghunnah = self.analyze_ghunnah(entry.get("start", 0), entry.get("end", 0), char)
+                results["ikhfa"].append({
+                    "position": i,
+                    "char": char,
+                    "next_char": next_char,
+                    "time": f"{entry.get('start', 0):.3f}-{entry.get('end', 0):.3f}",
+                    "ghunnah_level": ghunnah.get("status"),
+                    "confidence": ghunnah.get("confidence", 0)
+                })
+            # Idgham: ن before يرملون
+            elif is_noon_trigger and next_base in IDGHAM_TARGETS:
+                # Check if ن is merged (very short duration)
+                noon_dur = (entry.get("end", 0) - entry.get("start", 0)) * 1000
+                results["idgham"].append({
+                    "position": i,
+                    "char": char,
+                    "next_char": next_char,
+                    "time": f"{entry.get('start', 0):.3f}-{entry.get('end', 0):.3f}",
+                    "noon_duration_ms": round(noon_dur, 1),
+                    "merged": noon_dur < 50,  # Very short = merged
+                    "confidence": 0.7 if noon_dur < 50 else 0.4
+                })
+        return results
+def run_comprehensive_analysis():
+    """Run comprehensive Tajweed analysis with all improvements"""
+    print("=" * 60)
+    print("TajweedSST Enhanced Analyzer v3")
+    print("Ghunnah + Pitch + Cross-Word Rules")
+    print("=" * 60)
+    if not HAS_LIBROSA:
+        print("ERROR: librosa required")
+        return
+    # Load analyzer
+    analyzer = TajweedAnalyzerV3(AUDIO_PATH)
+    # Load timing
+    with open(TIMING_PATH, 'r', encoding='utf-8') as f:
+        timing = json.load(f)
+    print(f"\n[1] Analyzing {len(timing)} letters...")
+    results = {
+        "qalqalah": [],
+        "madd": [],
+        "tafkheem": [],
+        "ghunnah": [],
+        "cross_word": {},
+        "summary": {}
+    }
+    counts = {k: 0 for k in ["qalqalah", "madd", "tafkheem", "ghunnah"]}
+    passed = {k: 0 for k in ["qalqalah", "madd", "tafkheem", "ghunnah"]}
+    for entry in timing:
+        char = entry.get("char", "")
+        base = char[0] if char else ""
+        start = entry.get("start", 0)
+        end = entry.get("end", 0)
+        segment = analyzer.extract_segment(start, end)
+        # Qalqalah
+        if base in QALQALAH_LETTERS:
+            counts["qalqalah"] += 1
+            analysis = analyzer.analyze_qalqalah(segment)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["qalqalah"].append(analysis)
+            if analysis["confidence"] >= 0.4:
+                passed["qalqalah"] += 1
+        # Madd (with pitch tracking)
+        if base in MADD_LETTERS:
+            counts["madd"] += 1
+            madd_count = entry.get("madd_count", 2)
+            analysis = analyzer.analyze_madd_pitch(segment, char, madd_count)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["madd"].append(analysis)
+            if analysis["status"] in ["SUSTAINED", "PARTIAL"]:
+                passed["madd"] += 1
+        # Tafkheem
+        if base in TAFKHEEM_LETTERS:
+            counts["tafkheem"] += 1
+            analysis = analyzer.analyze_tafkheem(segment)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["tafkheem"].append(analysis)
+            if analysis["status"] in ["HEAVY", "MODERATE"]:
+                passed["tafkheem"] += 1
+        # Ghunnah
+        if base in GHUNNAH_LETTERS:
+            counts["ghunnah"] += 1
+            analysis = analyzer.analyze_ghunnah(start, end, char)
+            analysis["char"] = char
+            analysis["time"] = f"{start:.3f}-{end:.3f}"
+            results["ghunnah"].append(analysis)
+            if analysis.get("status") in ["DETECTED", "PARTIAL"]:
+                passed["ghunnah"] += 1
+    # Cross-word analysis
+    print("\n[2] Analyzing cross-word rules...")
+    results["cross_word"] = analyzer.analyze_cross_word_rules(timing)
+    # Summary
+    results["summary"] = {
+        k: {
+            "total": counts[k],
+            "passed": passed[k],
+            "rate": round(passed[k] / max(1, counts[k]), 2)
+        }
+        for k in counts
+    }
+    results["summary"]["cross_word"] = {
+        "idgham": len(results["cross_word"].get("idgham", [])),
+        "ikhfa": len(results["cross_word"].get("ikhfa", [])),
+        "iqlab": len(results["cross_word"].get("iqlab", []))
+    }
+    # Print results
+    print("\n[3] Results:")
+    for rule, data in results["summary"].items():
+        if isinstance(data, dict) and "rate" in data:
+            print(f"    {rule}: {data['passed']}/{data['total']} ({data['rate']*100:.0f}%)")
+        elif isinstance(data, dict):
+            print(f"    {rule}: Idgham={data.get('idgham', 0)}, Ikhfa={data.get('ikhfa', 0)}, Iqlab={data.get('iqlab', 0)}")
+    # Save
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(convert_to_json_safe(results), f, ensure_ascii=False, indent=2)
+    print(f"\n[4] Saved: {OUTPUT_PATH}")
+    # Samples
+    print("\n[5] Sample Ghunnah (ن/م nasal detection):")
+    for r in results["ghunnah"][:5]:
+        f1 = r.get('f1', 'N/A')
+        print(f"    [{r['char']}] F1:{f1}Hz → {r['status']} (conf: {r['confidence']})")
+    print("\n[6] Sample Madd (Pitch Tracking):")
+    for r in results["madd"][:5]:
+        cv = r.get('pitch_cv', 'N/A')
+        print(f"    [{r['char']}] {r.get('duration_ms', 0):.0f}ms, pitch_cv:{cv} → {r['status']}")
+    print("\n[7] Cross-Word Rules Detected:")
+    for rule, items in results["cross_word"].items():
+        if items:
+            print(f"    {rule.upper()}: {len(items)} instances")
+            for item in items[:2]:
+                print(f"      - {item['char']} → {item['next_char']} @ {item['time']}")
+    print("\n" + "=" * 60)
+    print("✓ TajweedSST v3 Analysis Complete!")
+    print("=" * 60)
+    return results
+if __name__ == "__main__":
+    run_comprehensive_analysis()

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+# Core dependencies
+camel-tools>=1.5.0
+whisperx>=3.1.0
+librosa>=0.10.0
+parselmouth>=0.4.0
+numpy>=1.24.0
+scipy>=1.10.0
+torch>=2.0.0
+torchaudio>=2.0.0
+# Alignment
+montreal-forced-aligner>=3.0.0
+# Arabic NLP
+pyarabic>=0.6.0
+arabic-reshaper>=3.0.0
+# Utilities
+tqdm>=4.65.0
+pydub>=0.25.0
+soundfile>=0.12.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+TajweedSST - Quranic Precision Alignment & Tajweed Analysis Tool
+A Python-based pipeline that generates letter-level precise timing data
+for Quran recitations, prevents timing drift, and uses signal processing
+to validate Tajweed rules.
+Usage:
+    from tajweedsst.src.pipeline import TajweedPipeline
+    pipeline = TajweedPipeline()
+    result = pipeline.process(
+        audio_path="path/to/audio.mp3",
+        text="قُلْ هُوَ اللَّهُ أَحَدٌ",
+        surah=112,
+        ayah=1
+    )
+"""
+from .tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+from .alignment_engine import AlignmentEngine, MockAlignmentEngine
+from .physics_validator import PhysicsValidator, ValidationStatus
+from .pipeline import TajweedPipeline
+__version__ = "1.0.0"
+__all__ = [
+    "TajweedPipeline",
+    "TajweedParser",
+    "TajweedType",
+    "PhysicsCheck",
+    "AlignmentEngine",
+    "MockAlignmentEngine",
+    "PhysicsValidator",
+    "ValidationStatus"
+]

src/alignment_engine.py ADDED Viewed

	@@ -0,0 +1,407 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Step 2: Hierarchical Alignment Engine
+The Anti-Drift Engine:
+1. WhisperX: Get word-level anchors (rigid boundaries)
+2. MFA: Get phoneme-level precision within words
+3. Normalization: Clamp MFA durations to match WhisperX exactly
+Formula: Phoneme_New_Duration = Phoneme_Old * (Whisper_Word_Duration / Sum_MFA_Phonemes)
+"""
+import os
+import json
+import subprocess
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Tuple
+from pathlib import Path
+@dataclass
+class PhonemeAlignment:
+    """Single phoneme timing"""
+    phoneme: str
+    start: float
+    end: float
+    duration: float
+    @property
+    def normalized_duration(self) -> float:
+        return self.end - self.start
+@dataclass
+class WordAlignment:
+    """Word-level alignment with phoneme breakdown"""
+    word_text: str
+    whisper_start: float
+    whisper_end: float
+    phonemes: List[PhonemeAlignment] = field(default_factory=list)
+    @property
+    def whisper_duration(self) -> float:
+        return self.whisper_end - self.whisper_start
+@dataclass
+class AlignmentResult:
+    """Complete alignment for an audio segment"""
+    audio_path: str
+    surah: int
+    ayah: int
+    words: List[WordAlignment] = field(default_factory=list)
+    metadata: Dict = field(default_factory=dict)
+class AlignmentEngine:
+    """
+    Hierarchical alignment using WhisperX + MFA
+    """
+    def __init__(self,
+                 whisperx_model: str = "large-v3",
+                 mfa_acoustic_model: str = "arabic_mfa",
+                 mfa_dictionary: str = "arabic_mfa",
+                 device: str = "cuda",
+                 compute_type: str = "float16"):
+        """
+        Initialize alignment engine
+        Args:
+            whisperx_model: WhisperX model size
+            mfa_acoustic_model: MFA acoustic model for Arabic
+            mfa_dictionary: MFA pronunciation dictionary
+            device: cuda or cpu
+            compute_type: float16 or float32
+        """
+        self.whisperx_model = whisperx_model
+        self.mfa_acoustic_model = mfa_acoustic_model
+        self.mfa_dictionary = mfa_dictionary
+        self.device = device
+        self.compute_type = compute_type
+        self._whisperx = None
+        self._whisperx_align_model = None
+    def _load_whisperx(self):
+        """Lazy load WhisperX models"""
+        if self._whisperx is None:
+            import whisperx
+            self._whisperx = whisperx.load_model(
+                self.whisperx_model,
+                device=self.device,
+                compute_type=self.compute_type
+            )
+            # Load alignment model for Arabic
+            self._whisperx_align_model, self._whisperx_align_metadata = whisperx.load_align_model(
+                language_code="ar",
+                device=self.device
+            )
+    def align(self,
+              audio_path: str,
+              phonetic_words: List[str],
+              surah: int = 0,
+              ayah: int = 0) -> AlignmentResult:
+        """
+        Perform hierarchical alignment
+        Args:
+            audio_path: Path to audio file
+            phonetic_words: List of phonetic transcriptions from TajweedParser
+            surah: Surah number for metadata
+            ayah: Ayah number for metadata
+        Returns:
+            AlignmentResult with word and phoneme timings
+        """
+        result = AlignmentResult(
+            audio_path=audio_path,
+            surah=surah,
+            ayah=ayah
+        )
+        # Step 1: WhisperX word-level alignment
+        whisper_words = self._run_whisperx(audio_path)
+        # Step 2: MFA phoneme-level alignment for each word
+        mfa_phonemes = self._run_mfa(audio_path, phonetic_words)
+        # Step 3: Normalize MFA phonemes to WhisperX word boundaries
+        for i, (whisper_word, phonemes) in enumerate(zip(whisper_words, mfa_phonemes)):
+            word_alignment = WordAlignment(
+                word_text=whisper_word['word'],
+                whisper_start=whisper_word['start'],
+                whisper_end=whisper_word['end']
+            )
+            # Normalize phoneme durations
+            normalized_phonemes = self._normalize_phonemes(
+                phonemes=phonemes,
+                target_start=whisper_word['start'],
+                target_end=whisper_word['end']
+            )
+            word_alignment.phonemes = normalized_phonemes
+            result.words.append(word_alignment)
+        return result
+    def _run_whisperx(self, audio_path: str) -> List[Dict]:
+        """
+        Run WhisperX for word-level timing
+        Returns: List of {word, start, end} dicts
+        """
+        self._load_whisperx()
+        import whisperx
+        # Transcribe
+        audio = whisperx.load_audio(audio_path)
+        result = self._whisperx.transcribe(audio, batch_size=16)
+        # Align to get word-level timestamps
+        aligned = whisperx.align(
+            result["segments"],
+            self._whisperx_align_model,
+            self._whisperx_align_metadata,
+            audio,
+            self.device,
+            return_char_alignments=False
+        )
+        # Extract word timings
+        words = []
+        for segment in aligned["segments"]:
+            for word_data in segment.get("words", []):
+                words.append({
+                    "word": word_data["word"],
+                    "start": word_data["start"],
+                    "end": word_data["end"]
+                })
+        return words
+    def _run_mfa(self, audio_path: str, phonetic_words: List[str]) -> List[List[Dict]]:
+        """
+        Run MFA for phoneme-level timing within each word
+        Returns: List of phoneme lists per word
+        """
+        # Create temp directory for MFA
+        temp_dir = Path("/tmp/tajweedsst_mfa")
+        temp_dir.mkdir(exist_ok=True)
+        input_dir = temp_dir / "input"
+        output_dir = temp_dir / "output"
+        input_dir.mkdir(exist_ok=True)
+        output_dir.mkdir(exist_ok=True)
+        # Copy audio and create transcript
+        audio_name = Path(audio_path).stem
+        transcript_path = input_dir / f"{audio_name}.txt"
+        # Write phonetic transcript (space-separated words)
+        transcript = " ".join(phonetic_words)
+        transcript_path.write_text(transcript)
+        # Copy audio file
+        import shutil
+        audio_dest = input_dir / Path(audio_path).name
+        shutil.copy(audio_path, audio_dest)
+        # Run MFA
+        try:
+            subprocess.run([
+                "mfa", "align",
+                str(input_dir),
+                self.mfa_dictionary,
+                self.mfa_acoustic_model,
+                str(output_dir),
+                "--clean",
+                "--quiet"
+            ], check=True, capture_output=True)
+        except subprocess.CalledProcessError as e:
+            print(f"MFA Error: {e.stderr.decode()}")
+            return [[] for _ in phonetic_words]
+        # Parse TextGrid output
+        textgrid_path = output_dir / f"{audio_name}.TextGrid"
+        if textgrid_path.exists():
+            return self._parse_textgrid(textgrid_path, len(phonetic_words))
+        return [[] for _ in phonetic_words]
+    def _parse_textgrid(self, textgrid_path: Path, word_count: int) -> List[List[Dict]]:
+        """Parse MFA TextGrid output for phoneme timings"""
+        try:
+            import textgrid
+            tg = textgrid.TextGrid.fromFile(str(textgrid_path))
+            # Find phones tier
+            phones_tier = None
+            words_tier = None
+            for tier in tg:
+                if tier.name == "phones":
+                    phones_tier = tier
+                elif tier.name == "words":
+                    words_tier = tier
+            if not phones_tier or not words_tier:
+                return [[] for _ in range(word_count)]
+            # Group phonemes by word boundaries
+            result = []
+            word_idx = 0
+            current_word_phones = []
+            for interval in phones_tier:
+                if interval.mark and interval.mark != "":
+                    phone_data = {
+                        "phoneme": interval.mark,
+                        "start": interval.minTime,
+                        "end": interval.maxTime
+                    }
+                    # Check if we've moved to next word
+                    if word_idx < len(words_tier):
+                        word_interval = words_tier[word_idx]
+                        if interval.minTime >= word_interval.maxTime:
+                            result.append(current_word_phones)
+                            current_word_phones = []
+                            word_idx += 1
+                    current_word_phones.append(phone_data)
+            # Don't forget last word
+            if current_word_phones:
+                result.append(current_word_phones)
+            return result
+        except Exception as e:
+            print(f"TextGrid parse error: {e}")
+            return [[] for _ in range(word_count)]
+    def _normalize_phonemes(self,
+                           phonemes: List[Dict],
+                           target_start: float,
+                           target_end: float) -> List[PhonemeAlignment]:
+        """
+        Normalize MFA phonemes to fit exactly within WhisperX word boundaries
+        Formula: Phoneme_New_Duration = Phoneme_Old * (Whisper_Word_Duration / Sum_MFA_Phonemes)
+        """
+        if not phonemes:
+            return []
+        target_duration = target_end - target_start
+        # Calculate total MFA duration
+        mfa_total = sum(p['end'] - p['start'] for p in phonemes)
+        if mfa_total == 0:
+            return []
+        # Scale factor
+        scale = target_duration / mfa_total
+        # Normalize each phoneme
+        normalized = []
+        current_time = target_start
+        for phone in phonemes:
+            old_duration = phone['end'] - phone['start']
+            new_duration = old_duration * scale
+            normalized.append(PhonemeAlignment(
+                phoneme=phone['phoneme'],
+                start=current_time,
+                end=current_time + new_duration,
+                duration=new_duration
+            ))
+            current_time += new_duration
+        # Ensure last phoneme ends exactly at target_end (floating point fix)
+        if normalized:
+            normalized[-1].end = target_end
+            normalized[-1].duration = target_end - normalized[-1].start
+        return normalized
+class MockAlignmentEngine(AlignmentEngine):
+    """
+    Mock alignment engine for testing without WhisperX/MFA installed
+    """
+    def align(self,
+              audio_path: str,
+              phonetic_words: List[str],
+              surah: int = 0,
+              ayah: int = 0) -> AlignmentResult:
+        """Generate mock alignment data"""
+        result = AlignmentResult(
+            audio_path=audio_path,
+            surah=surah,
+            ayah=ayah
+        )
+        # Mock timing: 0.5s per word
+        current_time = 0.0
+        word_duration = 0.5
+        for word in phonetic_words:
+            phonemes = word.split()
+            phoneme_duration = word_duration / max(len(phonemes), 1)
+            word_alignment = WordAlignment(
+                word_text=word,
+                whisper_start=current_time,
+                whisper_end=current_time + word_duration
+            )
+            phoneme_time = current_time
+            for phoneme in phonemes:
+                word_alignment.phonemes.append(PhonemeAlignment(
+                    phoneme=phoneme,
+                    start=phoneme_time,
+                    end=phoneme_time + phoneme_duration,
+                    duration=phoneme_duration
+                ))
+                phoneme_time += phoneme_duration
+            result.words.append(word_alignment)
+            current_time += word_duration + 0.1  # Gap between words
+        return result
+def main():
+    """Test alignment engine"""
+    print("=" * 50)
+    print("TajweedSST Alignment Engine Test")
+    print("=" * 50)
+    # Use mock engine for testing
+    engine = MockAlignmentEngine()
+    # Test phonetic words from TajweedParser
+    phonetic_words = ["q l", "h w", "ā l l ā h", "ʾ ḥ d"]
+    result = engine.align(
+        audio_path="test.wav",
+        phonetic_words=phonetic_words,
+        surah=112,
+        ayah=1
+    )
+    print(f"Aligned {len(result.words)} words:")
+    for word in result.words:
+        print(f"\n  Word: '{word.word_text}'")
+        print(f"  Anchor: {word.whisper_start:.3f} - {word.whisper_end:.3f}s")
+        for phoneme in word.phonemes:
+            print(f"    [{phoneme.phoneme}] {phoneme.start:.3f} - {phoneme.end:.3f}s")
+if __name__ == "__main__":
+    main()

src/duration_model.py ADDED Viewed

	@@ -0,0 +1,311 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Duration Model
+Calibrates and validates letter durations based on Tajweed rules.
+Works with harakat (beat) counts and reciter-specific speech rates.
+Key Features:
+- Per-reciter harakat calibration
+- Madd type detection from Quranic context
+- Duration validation against Tajweed expectations
+- Speech rate normalization
+"""
+import json
+import numpy as np
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Tuple
+from pathlib import Path
+from enum import Enum
+class MaddType(Enum):
+    NONE = "none"
+    ASLI = "asli"           # 2 harakat
+    WAJIB = "wajib"         # 4-5 harakat
+    JAIZ = "jaiz"           # 2-4-6 harakat (flexible)
+    LAZIM = "lazim"         # 6 harakat
+    LEEN = "leen"           # 2-4-6 harakat (soft)
+    ARID = "arid"           # 2-4-6 harakat (for pause)
+    BADAL = "badal"         # 2 harakat (substitution)
+    SILAH = "silah"         # 2 harakat (connection)
+@dataclass
+class HarakatCalibration:
+    """Per-reciter timing calibration"""
+    reciter_name: str
+    harakat_base_ms: float = 100.0      # Base beat duration
+    speech_rate_wpm: float = 60.0       # Words per minute
+    pitch_range_hz: Tuple[float, float] = (80.0, 300.0)
+    sample_size: int = 0                # How many samples used for calibration
+@dataclass
+class DurationExpectation:
+    """Expected duration for a Tajweed rule"""
+    rule_name: str
+    min_harakat: int
+    max_harakat: int
+    expected_ms_range: Tuple[float, float]
+    tolerance: float = 0.25  # 25% tolerance
+@dataclass
+class DurationResult:
+    """Result of duration validation"""
+    is_valid: bool
+    actual_ms: float
+    expected_ms: float
+    harakat_count: float
+    deviation_percent: float
+    rule_applied: str
+class DurationModel:
+    """
+    Duration model for Tajweed-based timing validation
+    """
+    # Default expectations (will be calibrated per reciter)
+    DEFAULT_HARAKAT_MS = 100.0
+    # Tajweed duration rules (in harakat counts)
+    TAJWEED_DURATIONS = {
+        MaddType.ASLI: DurationExpectation("Madd Asli", 2, 2, (150, 280), 0.30),
+        MaddType.WAJIB: DurationExpectation("Madd Wajib", 4, 5, (350, 550), 0.25),
+        MaddType.LAZIM: DurationExpectation("Madd Lazim", 6, 6, (500, 800), 0.20),
+        MaddType.JAIZ: DurationExpectation("Madd Jaiz", 2, 6, (150, 700), 0.30),
+        MaddType.ARID: DurationExpectation("Madd Arid", 2, 6, (150, 700), 0.30),
+        MaddType.LEEN: DurationExpectation("Madd Leen", 2, 6, (150, 700), 0.30),
+    }
+    # Ghunnah duration
+    GHUNNAH_DURATION = DurationExpectation("Ghunnah", 2, 2, (80, 250), 0.30)
+    def __init__(self, lisan_path: Optional[str] = None):
+        """Initialize with optional path to lisan_phonemes.json"""
+        self.calibration: Optional[HarakatCalibration] = None
+        self.lisan_data: Dict = {}
+        if lisan_path and Path(lisan_path).exists():
+            with open(lisan_path, 'r', encoding='utf-8') as f:
+                self.lisan_data = json.load(f)
+    def calibrate_from_samples(self,
+                                reciter_name: str,
+                                vowel_durations: List[float],
+                                words_per_minute: float = 60.0) -> HarakatCalibration:
+        """
+        Calibrate harakat duration from sample vowel measurements
+        Args:
+            reciter_name: Name of reciter for identification
+            vowel_durations: List of short vowel durations in seconds
+            words_per_minute: Estimated speech rate
+        Returns:
+            HarakatCalibration object
+        """
+        if not vowel_durations:
+            # Use defaults
+            self.calibration = HarakatCalibration(
+                reciter_name=reciter_name,
+                harakat_base_ms=self.DEFAULT_HARAKAT_MS,
+                speech_rate_wpm=words_per_minute,
+                sample_size=0
+            )
+            return self.calibration
+        # Convert to milliseconds and compute median (robust to outliers)
+        durations_ms = [d * 1000 for d in vowel_durations]
+        harakat_base = np.median(durations_ms)
+        self.calibration = HarakatCalibration(
+            reciter_name=reciter_name,
+            harakat_base_ms=harakat_base,
+            speech_rate_wpm=words_per_minute,
+            sample_size=len(vowel_durations)
+        )
+        return self.calibration
+    def get_expected_duration(self,
+                               madd_type: MaddType,
+                               harakat_count: Optional[int] = None) -> Tuple[float, float]:
+        """
+        Get expected duration range for a Madd type
+        Returns:
+            Tuple of (min_ms, max_ms)
+        """
+        if not self.calibration:
+            base_ms = self.DEFAULT_HARAKAT_MS
+        else:
+            base_ms = self.calibration.harakat_base_ms
+        if madd_type in self.TAJWEED_DURATIONS:
+            expectation = self.TAJWEED_DURATIONS[madd_type]
+            if harakat_count:
+                # Use specific harakat count
+                center = harakat_count * base_ms
+                tolerance = expectation.tolerance
+                return (center * (1 - tolerance), center * (1 + tolerance))
+            else:
+                # Use range from Tajweed rule
+                min_ms = expectation.min_harakat * base_ms * (1 - expectation.tolerance)
+                max_ms = expectation.max_harakat * base_ms * (1 + expectation.tolerance)
+                return (min_ms, max_ms)
+        # Default: 1 harakat
+        return (base_ms * 0.7, base_ms * 1.3)
+    def validate_duration(self,
+                          actual_duration_s: float,
+                          madd_type: MaddType,
+                          expected_harakat: int = 2) -> DurationResult:
+        """
+        Validate if actual duration matches Tajweed expectation
+        Args:
+            actual_duration_s: Actual duration in seconds
+            madd_type: Type of Madd rule
+            expected_harakat: Expected harakat count
+        Returns:
+            DurationResult with validation details
+        """
+        actual_ms = actual_duration_s * 1000
+        min_ms, max_ms = self.get_expected_duration(madd_type, expected_harakat)
+        expected_ms = (min_ms + max_ms) / 2
+        is_valid = min_ms <= actual_ms <= max_ms
+        deviation = abs(actual_ms - expected_ms) / expected_ms * 100 if expected_ms > 0 else 0
+        # Calculate actual harakat count
+        base_ms = self.calibration.harakat_base_ms if self.calibration else self.DEFAULT_HARAKAT_MS
+        harakat_count = actual_ms / base_ms if base_ms > 0 else 0
+        return DurationResult(
+            is_valid=is_valid,
+            actual_ms=actual_ms,
+            expected_ms=expected_ms,
+            harakat_count=harakat_count,
+            deviation_percent=deviation,
+            rule_applied=madd_type.value
+        )
+    def validate_ghunnah_duration(self, actual_duration_s: float) -> DurationResult:
+        """Validate Ghunnah duration (2 harakat)"""
+        return self.validate_duration(actual_duration_s, MaddType.ASLI, 2)
+    def suggest_correction(self,
+                            actual_duration_s: float,
+                            madd_type: MaddType,
+                            expected_harakat: int = 2) -> Tuple[float, float]:
+        """
+        Suggest corrected start/end times based on Tajweed expectations
+        Returns:
+            Tuple of (suggested_duration_s, adjustment_s)
+        """
+        min_ms, max_ms = self.get_expected_duration(madd_type, expected_harakat)
+        actual_ms = actual_duration_s * 1000
+        if actual_ms < min_ms:
+            # Too short - suggest minimum
+            suggested_ms = min_ms
+        elif actual_ms > max_ms:
+            # Too long - suggest maximum
+            suggested_ms = max_ms
+        else:
+            # Already valid
+            suggested_ms = actual_ms
+        adjustment_ms = suggested_ms - actual_ms
+        return (suggested_ms / 1000, adjustment_ms / 1000)
+    def detect_madd_type_from_context(self,
+                                       current_letter: str,
+                                       next_letter: Optional[str],
+                                       next_harakat: Optional[str],
+                                       is_word_end: bool,
+                                       is_waqf: bool = False) -> MaddType:
+        """
+        Auto-detect Madd type from Quranic text context
+        Args:
+            current_letter: The Madd letter (ا و ي)
+            next_letter: Following letter (if any)
+            next_harakat: Harakat on next letter
+            is_word_end: Whether this is at word boundary
+            is_waqf: Whether reciter is pausing here
+        Returns:
+            Detected MaddType
+        """
+        SUKUN = '\u0652'
+        SHADDA = '\u0651'
+        # If at end with pause
+        if is_waqf and is_word_end:
+            return MaddType.ARID  # Flexible 2-4-6
+        # Check for Madd Lazim (Sukun or Shadda follows)
+        if next_harakat:
+            if SHADDA in next_harakat or SUKUN in next_harakat:
+                return MaddType.LAZIM
+        # Check for Madd Wajib (Hamza in same word follows)
+        if next_letter and next_letter in 'ءأإؤئ':
+            return MaddType.WAJIB
+        # Default: Madd Asli (natural 2 harakat)
+        return MaddType.ASLI
+def main():
+    """Test duration model"""
+    print("=" * 50)
+    print("TajweedSST Duration Model Test")
+    print("=" * 50)
+    model = DurationModel()
+    # Calibrate with sample data (simulated short vowels ~100ms each)
+    sample_vowels = [0.095, 0.105, 0.098, 0.102, 0.100, 0.103, 0.097]
+    calibration = model.calibrate_from_samples("Abdul_Basit", sample_vowels)
+    print(f"\nCalibration for {calibration.reciter_name}:")
+    print(f"  Harakat base: {calibration.harakat_base_ms:.1f} ms")
+    print(f"  Sample size: {calibration.sample_size}")
+    # Test duration validation
+    print("\nDuration Validation Tests:")
+    # Madd Asli (2 harakat ~ 200ms)
+    result = model.validate_duration(0.195, MaddType.ASLI, 2)
+    print(f"\n  Madd Asli (0.195s):")
+    print(f"    Valid: {result.is_valid}")
+    print(f"    Harakat: {result.harakat_count:.1f}")
+    print(f"    Deviation: {result.deviation_percent:.1f}%")
+    # Madd Lazim (6 harakat ~ 600ms)
+    result = model.validate_duration(0.580, MaddType.LAZIM, 6)
+    print(f"\n  Madd Lazim (0.580s):")
+    print(f"    Valid: {result.is_valid}")
+    print(f"    Harakat: {result.harakat_count:.1f}")
+    print(f"    Deviation: {result.deviation_percent:.1f}%")
+    # Test Madd type detection
+    print("\nMadd Type Detection:")
+    detected = model.detect_madd_type_from_context('ا', 'ء', None, False, False)
+    print(f"  ا before ء: {detected.value}")
+    detected = model.detect_madd_type_from_context('ا', 'ب', '\u0651', False, False)
+    print(f"  ا before بّ: {detected.value}")
+if __name__ == "__main__":
+    main()

src/lisan_phonemes.json ADDED Viewed

	@@ -0,0 +1,438 @@

+{
+    "meta": {
+        "version": "1.0.0",
+        "description": "Lisan al-Arab Digital Phonemes - Arabic letter physics for Tajweed validation",
+        "source": "Articulatory Phonetics + Classical Tajweed Rules"
+    },
+    "consonants": {
+        "ء": {
+            "type": "stop",
+            "place": "glottal",
+            "voiced": false,
+            "tafkheem": false
+        },
+        "ب": {
+            "type": "stop",
+            "place": "bilabial",
+            "voiced": true,
+            "tafkheem": false,
+            "qalqalah": true
+        },
+        "ت": {
+            "type": "stop",
+            "place": "dental",
+            "voiced": false,
+            "tafkheem": false
+        },
+        "ث": {
+            "type": "fricative",
+            "place": "dental",
+            "voiced": false,
+            "tafkheem": false,
+            "freq_range": [
+                4000,
+                8000
+            ]
+        },
+        "ج": {
+            "type": "affricate",
+            "place": "palatal",
+            "voiced": true,
+            "tafkheem": false,
+            "qalqalah": true
+        },
+        "ح": {
+            "type": "fricative",
+            "place": "pharyngeal",
+            "voiced": false,
+            "tafkheem": false
+        },
+        "خ": {
+            "type": "fricative",
+            "place": "velar",
+            "voiced": false,
+            "tafkheem": true,
+            "freq_range": [
+                1500,
+                3000
+            ]
+        },
+        "د": {
+            "type": "stop",
+            "place": "dental",
+            "voiced": true,
+            "tafkheem": false,
+            "qalqalah": true
+        },
+        "ذ": {
+            "type": "fricative",
+            "place": "dental",
+            "voiced": true,
+            "tafkheem": false
+        },
+        "ر": {
+            "type": "trill",
+            "place": "alveolar",
+            "voiced": true,
+            "tafkheem": "context"
+        },
+        "ز": {
+            "type": "fricative",
+            "place": "alveolar",
+            "voiced": true,
+            "tafkheem": false,
+            "freq_range": [
+                3500,
+                6000
+            ]
+        },
+        "س": {
+            "type": "fricative",
+            "place": "alveolar",
+            "voiced": false,
+            "tafkheem": false,
+            "freq_range": [
+                4000,
+                8000
+            ]
+        },
+        "ش": {
+            "type": "fricative",
+            "place": "palatal",
+            "voiced": false,
+            "tafkheem": false,
+            "freq_range": [
+                2500,
+                6000
+            ]
+        },
+        "ص": {
+            "type": "fricative",
+            "place": "alveolar",
+            "voiced": false,
+            "tafkheem": true,
+            "freq_range": [
+                3500,
+                7000
+            ]
+        },
+        "ض": {
+            "type": "stop",
+            "place": "dental",
+            "voiced": true,
+            "tafkheem": true
+        },
+        "ط": {
+            "type": "stop",
+            "place": "dental",
+            "voiced": false,
+            "tafkheem": true,
+            "qalqalah": true
+        },
+        "ظ": {
+            "type": "fricative",
+            "place": "dental",
+            "voiced": true,
+            "tafkheem": true
+        },
+        "ع": {
+            "type": "fricative",
+            "place": "pharyngeal",
+            "voiced": true,
+            "tafkheem": false
+        },
+        "غ": {
+            "type": "fricative",
+            "place": "velar",
+            "voiced": true,
+            "tafkheem": true
+        },
+        "ف": {
+            "type": "fricative",
+            "place": "labiodental",
+            "voiced": false,
+            "tafkheem": false,
+            "freq_range": [
+                3000,
+                6000
+            ]
+        },
+        "ق": {
+            "type": "stop",
+            "place": "uvular",
+            "voiced": false,
+            "tafkheem": true,
+            "qalqalah": true
+        },
+        "ك": {
+            "type": "stop",
+            "place": "velar",
+            "voiced": false,
+            "tafkheem": false
+        },
+        "ل": {
+            "type": "lateral",
+            "place": "alveolar",
+            "voiced": true,
+            "tafkheem": "allah_context"
+        },
+        "م": {
+            "type": "nasal",
+            "place": "bilabial",
+            "voiced": true,
+            "tafkheem": false,
+            "ghunnah_capable": true
+        },
+        "ن": {
+            "type": "nasal",
+            "place": "alveolar",
+            "voiced": true,
+            "tafkheem": false,
+            "ghunnah_capable": true
+        },
+        "ه": {
+            "type": "fricative",
+            "place": "glottal",
+            "voiced": false,
+            "tafkheem": false
+        },
+        "و": {
+            "type": "approximant",
+            "place": "bilabial",
+            "voiced": true,
+            "tafkheem": false
+        },
+        "ي": {
+            "type": "approximant",
+            "place": "palatal",
+            "voiced": true,
+            "tafkheem": false
+        }
+    },
+    "physics_signatures": {
+        "stop": {
+            "description": "Complete oral closure followed by burst release",
+            "detection": "silence_then_burst",
+            "metrics": [
+                "rms_dip",
+                "rms_spike",
+                "closure_duration_ms"
+            ]
+        },
+        "fricative": {
+            "description": "Continuous turbulent airflow through narrow constriction",
+            "detection": "high_frequency_noise",
+            "metrics": [
+                "spectral_centroid",
+                "zcr",
+                "noise_band_energy"
+            ]
+        },
+        "nasal": {
+            "description": "Airflow through nasal cavity with oral closure",
+            "detection": "nasal_formant",
+            "metrics": [
+                "f1_nasal_peak",
+                "antiformant_250hz",
+                "pitch_stability"
+            ]
+        },
+        "trill": {
+            "description": "Rapid vibration of articulator",
+            "detection": "periodic_amplitude_modulation",
+            "metrics": [
+                "modulation_rate_hz",
+                "periodicity"
+            ]
+        },
+        "approximant": {
+            "description": "Smooth airflow with minimal constriction",
+            "detection": "formant_transition",
+            "metrics": [
+                "f1_f2_trajectory",
+                "voicing_continuity"
+            ]
+        },
+        "lateral": {
+            "description": "Airflow around tongue sides",
+            "detection": "lateral_formant_pattern",
+            "metrics": [
+                "f2_f3_proximity"
+            ]
+        }
+    },
+    "tajweed_rules": {
+        "qalqalah": {
+            "letters": [
+                "ق",
+                "ط",
+                "ب",
+                "ج",
+                "د"
+            ],
+            "physics": "silence_then_burst",
+            "expected": {
+                "dip_threshold": 0.3,
+                "spike_threshold": 0.5
+            },
+            "duration_bonus_ms": 50
+        },
+        "madd_asli": {
+            "harakat": 2,
+            "expected_ms_range": [
+                120,
+                280
+            ],
+            "tolerance": 0.3
+        },
+        "madd_wajib": {
+            "harakat": 4,
+            "expected_ms_range": [
+                240,
+                500
+            ],
+            "tolerance": 0.25
+        },
+        "madd_lazim": {
+            "harakat": 6,
+            "expected_ms_range": [
+                400,
+                800
+            ],
+            "tolerance": 0.2
+        },
+        "ghunnah": {
+            "letters": [
+                "ن",
+                "م"
+            ],
+            "harakat": 2,
+            "expected_ms_range": [
+                80,
+                250
+            ],
+            "physics": "nasal_formant",
+            "pitch_stability_min": 0.7
+        },
+        "idgham_full": {
+            "description": "Complete merger, source letter disappears",
+            "trigger_letters": [
+                "ر",
+                "ل"
+            ],
+            "physics": "energy_continuity",
+            "expected": {
+                "boundary_sharpness": "low",
+                "transition_smoothness": "high"
+            }
+        },
+        "idgham_partial": {
+            "description": "Partial merger with ghunnah preserved",
+            "trigger_letters": [
+                "ي",
+                "ن",
+                "م",
+                "و"
+            ],
+            "physics": "nasal_formant_during_merge",
+            "expected": {
+                "ghunnah_present": true,
+                "transition_smoothness": "medium"
+            }
+        },
+        "ikhfa": {
+            "description": "Concealment with partial nasalization",
+            "trigger_letters": [
+                "ت",
+                "ث",
+                "ج",
+                "د",
+                "ذ",
+                "ز",
+                "س",
+                "ش",
+                "ص",
+                "ض",
+                "ط",
+                "ظ",
+                "ف",
+                "ق",
+                "ك"
+            ],
+            "physics": "gradual_nasalization",
+            "expected": {
+                "nasalization_gradient": true,
+                "transition_ms": [
+                    50,
+                    150
+                ]
+            }
+        },
+        "iqlab": {
+            "description": "Nun becomes Mim before Ba",
+            "trigger": "ن_before_ب",
+            "physics": "bilabial_nasal",
+            "expected": {
+                "lip_closure": true,
+                "nasal_formant": true
+            }
+        },
+        "izhar": {
+            "description": "Clear pronunciation without modification",
+            "trigger_letters": [
+                "ء",
+                "ه",
+                "ع",
+                "ح",
+                "غ",
+                "خ"
+            ],
+            "physics": "clean_boundary",
+            "expected": {
+                "boundary_sharpness": "high",
+                "nasalization": false
+            }
+        },
+        "tafkheem": {
+            "letters": [
+                "خ",
+                "ص",
+                "ض",
+                "غ",
+                "ط",
+                "ق",
+                "ظ"
+            ],
+            "physics": "f2_depression",
+            "expected": {
+                "f2_max_hz": 1200
+            }
+        },
+        "tarqeeq": {
+            "description": "Light pronunciation (opposite of tafkheem)",
+            "physics": "f2_elevation",
+            "expected": {
+                "f2_min_hz": 1400
+            }
+        },
+        "sakt": {
+            "description": "Brief pause without breath",
+            "physics": "silence_detection",
+            "expected": {
+                "duration_ms_range": [
+                    50,
+                    200
+                ],
+                "rms_threshold": 0.05
+            }
+        }
+    },
+    "reciter_calibration": {
+        "description": "Per-reciter parameters calibrated from sample",
+        "parameters": {
+            "harakat_base_ms": "Calibrate from short vowels",
+            "speech_rate": "Words per minute",
+            "pitch_range_hz": "Min/max F0"
+        }
+    }
+}

src/mfa_refiner.py ADDED Viewed

	@@ -0,0 +1,419 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - MFA Refiner Post-Processor
+Refines wav2vec/MFA alignments using Tajweed physics validation.
+This is the main integration layer that combines:
+1. Tajweed Parser (text → rules)
+2. Physics Validators (audio → boundaries)
+3. Duration Model (timing → corrections)
+Output: Refined alignment JSON with confidence scores.
+"""
+import json
+import numpy as np
+from dataclasses import dataclass, asdict
+from typing import List, Dict, Optional, Tuple
+from pathlib import Path
+# Import TajweedSST components
+from .tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+from .physics_validator import PhysicsValidator, ValidationStatus
+from .duration_model import DurationModel, MaddType
+@dataclass
+class RefinedLetter:
+    """A letter with refined timing and confidence"""
+    letter: str
+    phoneme: str
+    original_start: float
+    original_end: float
+    refined_start: float
+    refined_end: float
+    tajweed_rule: str
+    physics_score: float
+    duration_valid: bool
+    confidence: float
+@dataclass
+class RefinedWord:
+    """A word with refined letter timings"""
+    word_text: str
+    start: float
+    end: float
+    letters: List[RefinedLetter]
+    average_confidence: float
+@dataclass
+class RefinementResult:
+    """Complete refinement result for an audio segment"""
+    audio_path: str
+    original_alignment_path: str
+    words: List[RefinedWord]
+    overall_confidence: float
+    statistics: Dict
+class MFARefiner:
+    """
+    Post-processor that refines MFA/wav2vec alignments using Tajweed physics
+    """
+    def __init__(self,
+                 lisan_path: Optional[str] = None,
+                 sample_rate: int = 22050):
+        """
+        Initialize the refiner with Tajweed components
+        Args:
+            lisan_path: Path to lisan_phonemes.json
+            sample_rate: Audio sample rate
+        """
+        self.parser = TajweedParser()
+        self.physics = PhysicsValidator(sample_rate=sample_rate)
+        self.duration_model = DurationModel(lisan_path)
+        self.sample_rate = sample_rate
+        # Load Lisan data if available
+        if lisan_path and Path(lisan_path).exists():
+            with open(lisan_path, 'r', encoding='utf-8') as f:
+                self.lisan_data = json.load(f)
+        else:
+            self.lisan_data = {}
+    def refine_alignment(self,
+                         audio_path: str,
+                         alignment_json: Dict,
+                         quran_text: str) -> RefinementResult:
+        """
+        Refine an MFA/wav2vec alignment using Tajweed physics
+        Args:
+            audio_path: Path to audio file
+            alignment_json: Original alignment (word/phoneme timings)
+            quran_text: Original Quranic text (Uthmani)
+        Returns:
+            RefinementResult with refined timings and confidence scores
+        """
+        # Load audio
+        audio = self.physics.load_audio(audio_path)
+        # Parse Tajweed rules from text
+        word_tags = self.parser.parse_text(quran_text)
+        # Calibrate duration model from alignment
+        self._calibrate_from_alignment(audio, alignment_json)
+        # Process each word
+        refined_words = []
+        all_scores = []
+        alignment_words = alignment_json.get('words', alignment_json.get('segments', []))
+        for i, (word_align, word_tag) in enumerate(zip(alignment_words, word_tags)):
+            refined_word = self._refine_word(
+                audio=audio,
+                word_alignment=word_align,
+                word_tags=word_tag,
+                word_index=i
+            )
+            refined_words.append(refined_word)
+            all_scores.append(refined_word.average_confidence)
+        # Calculate statistics
+        overall_confidence = np.mean(all_scores) if all_scores else 0.0
+        stats = {
+            "total_words": len(refined_words),
+            "total_letters": sum(len(w.letters) for w in refined_words),
+            "average_physics_score": np.mean([
+                l.physics_score
+                for w in refined_words
+                for l in w.letters
+            ]) if refined_words else 0.0,
+            "duration_valid_percent": np.mean([
+                l.duration_valid
+                for w in refined_words
+                for l in w.letters
+            ]) * 100 if refined_words else 0.0
+        }
+        return RefinementResult(
+            audio_path=audio_path,
+            original_alignment_path="",
+            words=refined_words,
+            overall_confidence=overall_confidence,
+            statistics=stats
+        )
+    def _calibrate_from_alignment(self, audio: np.ndarray, alignment: Dict):
+        """Calibrate duration model from existing alignment"""
+        # Extract short vowel durations for calibration
+        vowel_segments = []
+        words = alignment.get('words', alignment.get('segments', []))
+        for word in words:
+            phonemes = word.get('phonemes', word.get('chars', []))
+            for phoneme in phonemes:
+                # Look for short vowels (single character, short duration)
+                p_text = phoneme.get('text', phoneme.get('char', ''))
+                p_start = phoneme.get('start', 0)
+                p_end = phoneme.get('end', 0)
+                duration = p_end - p_start
+                # Short vowels are typically 50-150ms
+                if 0.05 <= duration <= 0.15:
+                    vowel_segments.append(duration)
+        # Calibrate
+        if vowel_segments:
+            self.duration_model.calibrate_from_samples(
+                reciter_name="auto_calibrated",
+                vowel_durations=vowel_segments
+            )
+            self.physics.calibrate_average_vowel(
+                audio,
+                [(0, d) for d in vowel_segments]
+            )
+    def _refine_word(self,
+                     audio: np.ndarray,
+                     word_alignment: Dict,
+                     word_tags,
+                     word_index: int) -> RefinedWord:
+        """Refine a single word's letter timings"""
+        refined_letters = []
+        word_start = word_alignment.get('start', 0)
+        word_end = word_alignment.get('end', 0)
+        # Get phoneme/character alignments
+        phonemes = word_alignment.get('phonemes',
+                   word_alignment.get('chars',
+                   word_alignment.get('letters', [])))
+        # Match phonemes to letter tags
+        for j, letter_tag in enumerate(word_tags.letters):
+            # Find corresponding phoneme timing
+            if j < len(phonemes):
+                phoneme = phonemes[j]
+                orig_start = phoneme.get('start', word_start)
+                orig_end = phoneme.get('end', word_end)
+            else:
+                # Estimate timing if no phoneme data
+                letter_duration = (word_end - word_start) / len(word_tags.letters)
+                orig_start = word_start + j * letter_duration
+                orig_end = orig_start + letter_duration
+            # Run physics validation based on Tajweed type
+            physics_score, refined_start, refined_end = self._validate_and_refine(
+                audio=audio,
+                letter_tag=letter_tag,
+                start=orig_start,
+                end=orig_end,
+                next_start=phonemes[j+1].get('start') if j+1 < len(phonemes) else None
+            )
+            # Validate duration
+            duration_valid = self._check_duration(
+                letter_tag=letter_tag,
+                start=refined_start,
+                end=refined_end
+            )
+            # Calculate confidence
+            confidence = (physics_score + (1.0 if duration_valid else 0.5)) / 2
+            refined_letters.append(RefinedLetter(
+                letter=letter_tag.char_visual,
+                phoneme=letter_tag.char_phonetic,
+                original_start=orig_start,
+                original_end=orig_end,
+                refined_start=refined_start,
+                refined_end=refined_end,
+                tajweed_rule=letter_tag.tajweed_type.value,
+                physics_score=physics_score,
+                duration_valid=duration_valid,
+                confidence=confidence
+            ))
+        avg_confidence = np.mean([l.confidence for l in refined_letters]) if refined_letters else 0.0
+        # Adjust word boundaries based on refined letters
+        if refined_letters:
+            word_start = refined_letters[0].refined_start
+            word_end = refined_letters[-1].refined_end
+        return RefinedWord(
+            word_text=word_tags.word_text,
+            start=word_start,
+            end=word_end,
+            letters=refined_letters,
+            average_confidence=avg_confidence
+        )
+    def _validate_and_refine(self,
+                              audio: np.ndarray,
+                              letter_tag,
+                              start: float,
+                              end: float,
+                              next_start: Optional[float]) -> Tuple[float, float, float]:
+        """
+        Run appropriate physics validator and suggest refined boundaries
+        Returns:
+            Tuple of (physics_score, refined_start, refined_end)
+        """
+        physics_score = 0.5  # Default neutral score
+        refined_start = start
+        refined_end = end
+        # Select validator based on physics check type
+        check_type = letter_tag.physics_check
+        if check_type == PhysicsCheck.CHECK_RMS_BOUNCE:
+            # Qalqalah - look for dip→spike
+            result = self.physics.validate_qalqalah(audio, start, end)
+            physics_score = result.score
+        elif check_type == PhysicsCheck.CHECK_DURATION:
+            # Madd or Idgham - duration based
+            madd_count = letter_tag.madd_count if hasattr(letter_tag, 'madd_count') else 2
+            result = self.physics.validate_madd(audio, start, end, madd_count)
+            physics_score = result.score
+        elif check_type == PhysicsCheck.CHECK_GHUNNAH:
+            # Ghunnah, Ikhfa, Iqlab - nasal detection
+            tajweed_type = letter_tag.tajweed_type
+            if tajweed_type == TajweedType.IKHFA:
+                result = self.physics.validate_ikhfa(audio, start, end)
+            elif tajweed_type == TajweedType.IQLAB:
+                result = self.physics.validate_iqlab(audio, start, end)
+            else:
+                result = self.physics.validate_ghunnah(audio, start, end)
+            physics_score = result.score
+        elif check_type == PhysicsCheck.CHECK_FORMANT_F2:
+            # Tafkheem or Tarqeeq
+            if letter_tag.tajweed_type == TajweedType.TAFKHEEM:
+                result = self.physics.validate_tafkheem(audio, start, end)
+            else:
+                result = self.physics.validate_tarqeeq(audio, start, end)
+            physics_score = result.score
+        # For Idgham, check energy continuity
+        if letter_tag.tajweed_type in [TajweedType.IDGHAM_FULL, TajweedType.IDGHAM_PARTIAL]:
+            if next_start:
+                has_ghunnah = letter_tag.tajweed_type == TajweedType.IDGHAM_PARTIAL
+                result = self.physics.validate_idgham(
+                    audio, start, end, next_start, has_ghunnah
+                )
+                physics_score = result.score
+        # For Izhar, check clean boundaries
+        if next_start and letter_tag.char_visual == 'ن':
+            # Check if this should be Izhar
+            result = self.physics.validate_izhar(audio, start, end, next_start)
+            if result.status == ValidationStatus.PASS:
+                physics_score = max(physics_score, result.score)
+        return (physics_score, refined_start, refined_end)
+    def _check_duration(self, letter_tag, start: float, end: float) -> bool:
+        """Check if duration matches Tajweed expectations"""
+        duration = end - start
+        tajweed_type = letter_tag.tajweed_type
+        # Map Tajweed type to Madd type for duration check
+        madd_map = {
+            TajweedType.MADD_ASLI: MaddType.ASLI,
+            TajweedType.MADD_WAJIB: MaddType.WAJIB,
+            TajweedType.MADD_LAZIM: MaddType.LAZIM,
+        }
+        if tajweed_type in madd_map:
+            madd_type = madd_map[tajweed_type]
+            harakat = letter_tag.madd_count if hasattr(letter_tag, 'madd_count') else 2
+            result = self.duration_model.validate_duration(duration, madd_type, harakat)
+            return result.is_valid
+        if tajweed_type == TajweedType.GHUNNAH:
+            result = self.duration_model.validate_ghunnah_duration(duration)
+            return result.is_valid
+        # Default: duration is valid
+        return True
+    def save_refined_alignment(self,
+                                result: RefinementResult,
+                                output_path: str):
+        """Save refined alignment to JSON file"""
+        output = {
+            "audio_path": result.audio_path,
+            "original_alignment": result.original_alignment_path,
+            "overall_confidence": result.overall_confidence,
+            "statistics": result.statistics,
+            "words": [
+                {
+                    "word": w.word_text,
+                    "start": w.start,
+                    "end": w.end,
+                    "average_confidence": w.average_confidence,
+                    "letters": [asdict(l) for l in w.letters]
+                }
+                for w in result.words
+            ]
+        }
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(output, f, ensure_ascii=False, indent=2)
+        return output_path
+def main():
+    """Test MFA Refiner"""
+    print("=" * 50)
+    print("TajweedSST MFA Refiner Test")
+    print("=" * 50)
+    # Create refiner
+    lisan_path = Path(__file__).parent / "lisan_phonemes.json"
+    refiner = MFARefiner(str(lisan_path) if lisan_path.exists() else None)
+    print("\nRefiner initialized with:")
+    print(f"  - Tajweed Parser: Ready")
+    print(f"  - Physics Validator: 10 validators")
+    print(f"  - Duration Model: Ready")
+    print(f"  - Lisan Data: {'Loaded' if refiner.lisan_data else 'Not found'}")
+    # Mock alignment for testing
+    mock_alignment = {
+        "words": [
+            {
+                "text": "قُلْ",
+                "start": 0.0,
+                "end": 0.5,
+                "phonemes": [
+                    {"text": "ق", "start": 0.0, "end": 0.15},
+                    {"text": "ُ", "start": 0.15, "end": 0.25},
+                    {"text": "ل", "start": 0.25, "end": 0.5}
+                ]
+            }
+        ]
+    }
+    print("\nMock alignment test:")
+    print(f"  Input word: قُلْ")
+    print(f"  Phonemes: 3")
+    print(f"\nNote: Full test requires actual audio file.")
+if __name__ == "__main__":
+    main()

src/physics_validator.py ADDED Viewed

	@@ -0,0 +1,930 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Step 3: Physics & Signal Processing Validator
+Validates Tajweed rules using acoustic signal analysis:
+- Qalqalah: RMS energy dip→spike pattern
+- Madd: Duration vs Rate of Speech ratio
+- Ghunnah: Formant analysis + nasalization detection
+- Tafkheem: F2 formant depression
+"""
+import numpy as np
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Tuple
+from enum import Enum
+# Import signal processing libraries
+try:
+    import librosa
+    HAS_LIBROSA = True
+except ImportError:
+    HAS_LIBROSA = False
+    print("Warning: librosa not installed. RMS/ZCR analysis unavailable.")
+try:
+    import parselmouth
+    from parselmouth.praat import call
+    HAS_PARSELMOUTH = True
+except ImportError:
+    HAS_PARSELMOUTH = False
+    print("Warning: parselmouth not installed. Formant analysis unavailable.")
+class ValidationStatus(Enum):
+    PASS = "PASS"
+    FAIL = "FAIL"
+    MARGINAL = "MARGINAL"
+    SKIPPED = "SKIPPED"
+@dataclass
+class PhysicsResult:
+    """Result of a physics/signal analysis check"""
+    status: ValidationStatus
+    metric_name: str
+    expected_pattern: str
+    observed_pattern: str
+    score: float  # 0.0 to 1.0
+    details: Dict = field(default_factory=dict)
+@dataclass
+class QalqalahResult(PhysicsResult):
+    """Specific result for Qalqalah check"""
+    rms_profile: str = ""  # "dip_then_spike", "flat", "spike_only"
+    dip_depth: float = 0.0
+    spike_height: float = 0.0
+    closure_duration_ms: float = 0.0
+@dataclass
+class MaddResult(PhysicsResult):
+    """Specific result for Madd elongation check"""
+    actual_duration_ms: float = 0.0
+    expected_duration_ms: float = 0.0
+    ratio: float = 0.0  # Actual / Average vowel
+@dataclass
+class GhunnahResult(PhysicsResult):
+    """Specific result for Ghunnah nasalization check"""
+    nasal_formant_detected: bool = False
+    pitch_stability: float = 0.0
+    duration_elongation: float = 0.0
+@dataclass
+class TafkheemResult(PhysicsResult):
+    """Specific result for Tafkheem check"""
+    f2_value_hz: float = 0.0
+    f2_baseline_hz: float = 1500.0  # Average F2 for light sounds
+    depression_ratio: float = 0.0
+class PhysicsValidator:
+    """
+    Validates Tajweed rules using signal processing
+    """
+    # Thresholds for validation - tuned for real Abdul Basit recitation
+    QALQALAH_DIP_THRESHOLD = 0.08  # RMS must drop by 8%
+    QALQALAH_SPIKE_THRESHOLD = 0.15  # RMS must rise by 15%
+    MADD_RATIO_ASLI = 1.0   # 1.0x average vowel (baseline)
+    MADD_RATIO_WAJIB = 2.0  # 2.0x average vowel
+    MADD_RATIO_LAZIM = 3.5  # 3.5x average vowel
+    GHUNNAH_MIN_DURATION_MS = 30.0  # Very relaxed
+    TAFKHEEM_F2_MAX_HZ = 1500.0  # Maximum tolerance for F2
+    VALIDATION_TOLERANCE = 0.4  # 40% tolerance for all validations
+    # Precision thresholds - tuned for Arabic letters which can be very short
+    MIN_SEGMENT_MS = 30.0  # Minimum segment duration for valid analysis
+    MIN_SEGMENT_SAMPLES = 661  # ~30ms at 22050 Hz
+    def __init__(self, sample_rate: int = 22050):
+        self.sample_rate = sample_rate
+        self._audio_cache = {}
+        self._average_vowel_duration = 0.1  # Will be calibrated per reciter
+    def load_audio(self, audio_path: str) -> np.ndarray:
+        """Load audio file, with caching"""
+        if audio_path not in self._audio_cache:
+            if HAS_LIBROSA:
+                y, sr = librosa.load(audio_path, sr=self.sample_rate)
+                self._audio_cache[audio_path] = y
+            else:
+                # Fallback: generate noise for testing
+                self._audio_cache[audio_path] = np.random.randn(self.sample_rate * 10) * 0.1
+        return self._audio_cache[audio_path]
+    def safe_extract_segment(self, audio: np.ndarray, start: float, end: float) -> tuple:
+        """
+        PRECISION: Safely extract audio segment with bounds and validity checking.
+        Returns:
+            tuple: (segment, is_valid, error_reason)
+        """
+        # Bounds checking
+        start_sample = max(0, int(start * self.sample_rate))
+        end_sample = min(len(audio), int(end * self.sample_rate))
+        # Sanity check
+        if start_sample >= end_sample:
+            return None, False, "invalid_range"
+        segment = audio[start_sample:end_sample]
+        # Length check
+        if len(segment) < self.MIN_SEGMENT_SAMPLES:
+            return segment, False, f"too_short_{len(segment)}_samples"
+        # NaN/Inf check
+        if np.any(np.isnan(segment)) or np.any(np.isinf(segment)):
+            segment = np.nan_to_num(segment, nan=0.0, posinf=0.0, neginf=0.0)
+        return segment, True, None
+    def safe_rms(self, segment: np.ndarray, frame_length: int = 256, hop_length: int = 64) -> np.ndarray:
+        """
+        PRECISION: Calculate RMS with NaN protection.
+        """
+        if not HAS_LIBROSA:
+            return np.array([0.0])
+        rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
+        # Protect against NaN/Inf
+        rms = np.nan_to_num(rms, nan=0.0, posinf=1.0, neginf=0.0)
+        # Normalize to prevent division issues
+        if np.max(rms) > 0:
+            rms = rms / np.max(rms)
+        return rms
+    def validate_qalqalah(self,
+                          audio: np.ndarray,
+                          start: float,
+                          end: float) -> QalqalahResult:
+        """
+        Validate Qalqalah rule: Must show closure (RMS dip) then release (RMS spike)
+        Physics: The "bounce" is caused by complete oral closure followed by
+        abrupt release. RMS energy shows: stable→dip→spike pattern.
+        """
+        if not HAS_LIBROSA:
+            return QalqalahResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="RMS Energy",
+                expected_pattern="dip_then_spike",
+                observed_pattern="unknown",
+                score=0.0,
+                rms_profile="unknown"
+            )
+        # PRECISION: Use safe extraction
+        segment, is_valid, error = self.safe_extract_segment(audio, start, end)
+        if not is_valid:
+            return QalqalahResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="RMS Energy",
+                expected_pattern="dip_then_spike",
+                observed_pattern=error or "invalid_segment",
+                score=0.0,
+                rms_profile="unknown",
+                details={"reason": error}
+            )
+        # PRECISION: Use safe RMS with NaN protection
+        rms = self.safe_rms(segment)
+        if len(rms) < 3:
+            return QalqalahResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="RMS Energy",
+                expected_pattern="dip_then_spike",
+                observed_pattern="insufficient_frames",
+                score=0.0,
+                rms_profile="unknown",
+                details={"reason": f"Only {len(rms)} RMS frames < 3 minimum"}
+            )
+        # Analyze RMS pattern
+        # Qalqalah should show: high → dip → spike
+        # Find minimum and maximum in second half (release)
+        midpoint = len(rms) // 2
+        # First half: Find the dip (closure)
+        first_half_mean = np.mean(rms[:midpoint]) if midpoint > 0 else rms[0]
+        dip_idx = np.argmin(rms)
+        dip_value = rms[dip_idx]
+        # Second half: Find the spike (release)
+        spike_idx = midpoint + np.argmax(rms[midpoint:]) if midpoint < len(rms) else len(rms) - 1
+        spike_value = rms[spike_idx] if spike_idx < len(rms) else rms[-1]
+        # Calculate metrics
+        dip_depth = (first_half_mean - dip_value) / first_half_mean if first_half_mean > 0 else 0
+        spike_height = (spike_value - dip_value) / dip_value if dip_value > 0 else 0
+        # Determine pattern
+        if dip_depth >= self.QALQALAH_DIP_THRESHOLD and spike_height >= self.QALQALAH_SPIKE_THRESHOLD:
+            rms_profile = "dip_then_spike"
+            status = ValidationStatus.PASS
+            score = min(1.0, (dip_depth + spike_height) / 2)
+        elif spike_height >= self.QALQALAH_SPIKE_THRESHOLD:
+            rms_profile = "spike_only"
+            status = ValidationStatus.MARGINAL
+            score = spike_height / 2
+        else:
+            rms_profile = "flat"
+            status = ValidationStatus.FAIL
+            score = 0.0
+        # Estimate closure duration (using safe_rms default hop_length=64)
+        if dip_idx > 0:
+            frames_to_dip = dip_idx
+            closure_duration_ms = (frames_to_dip * 64 / self.sample_rate) * 1000
+        else:
+            closure_duration_ms = 0.0
+        return QalqalahResult(
+            status=status,
+            metric_name="RMS Energy",
+            expected_pattern="dip_then_spike",
+            observed_pattern=rms_profile,
+            score=score,
+            rms_profile=rms_profile,
+            dip_depth=dip_depth,
+            spike_height=spike_height,
+            closure_duration_ms=closure_duration_ms
+        )
+    def validate_madd(self,
+                      audio: np.ndarray,
+                      start: float,
+                      end: float,
+                      expected_count: int = 2) -> MaddResult:
+        """
+        Validate Madd rule: Duration must match expected elongation count
+        Physics: Madd is pure duration comparison.
+        - Asli (natural): 2 counts
+        - Wajib (obligatory): 4-5 counts
+        - Lazim (required): 6 counts
+        """
+        actual_duration = end - start
+        actual_duration_ms = actual_duration * 1000
+        # Expected duration based on average vowel and count
+        expected_duration = self._average_vowel_duration * expected_count
+        expected_duration_ms = expected_duration * 1000
+        # Calculate ratio
+        ratio = actual_duration / self._average_vowel_duration if self._average_vowel_duration > 0 else 0
+        # Determine pass/fail based on expected count
+        tolerance = 0.3  # 30% tolerance
+        if expected_count == 2:
+            threshold = self.MADD_RATIO_ASLI
+        elif expected_count == 4:
+            threshold = self.MADD_RATIO_WAJIB
+        else:
+            threshold = self.MADD_RATIO_LAZIM
+        if ratio >= threshold * (1 - tolerance):
+            if ratio <= threshold * (1 + tolerance):
+                status = ValidationStatus.PASS
+                score = 1.0
+            else:
+                status = ValidationStatus.MARGINAL  # Too long, but acceptable
+                score = 0.7
+        else:
+            status = ValidationStatus.FAIL
+            score = ratio / threshold if threshold > 0 else 0
+        return MaddResult(
+            status=status,
+            metric_name="Duration Ratio",
+            expected_pattern=f"{expected_count}x average vowel",
+            observed_pattern=f"{ratio:.1f}x average vowel",
+            score=score,
+            actual_duration_ms=actual_duration_ms,
+            expected_duration_ms=expected_duration_ms,
+            ratio=ratio
+        )
+    def validate_ghunnah(self,
+                         audio: np.ndarray,
+                         start: float,
+                         end: float) -> GhunnahResult:
+        """
+        Validate Ghunnah (nasalization) rule
+        Physics:
+        - Drop in high-frequency energy (nasal anti-formant ~500Hz)
+        - Stable pitch during nasalization
+        - Duration elongation (2 counts minimum)
+        """
+        if not HAS_PARSELMOUTH:
+            return GhunnahResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Formant Analysis",
+                expected_pattern="nasal_resonance",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        duration_ms = (end - start) * 1000
+        # Check minimum duration
+        if duration_ms < self.GHUNNAH_MIN_DURATION_MS:
+            return GhunnahResult(
+                status=ValidationStatus.MARGINAL,  # PRECISION: Changed from FAIL to MARGINAL
+                metric_name="Formant Analysis",
+                expected_pattern="nasal_resonance",
+                observed_pattern="short_but_valid",
+                score=duration_ms / self.GHUNNAH_MIN_DURATION_MS,
+                duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS,
+                details={"reason": f"Duration {duration_ms:.1f}ms < {self.GHUNNAH_MIN_DURATION_MS}ms minimum"}
+            )
+        # PRECISION: Use safe extraction
+        segment, is_valid, error = self.safe_extract_segment(audio, start, end)
+        if not is_valid:
+            return GhunnahResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Formant Analysis",
+                expected_pattern="nasal_resonance",
+                observed_pattern=error or "invalid_segment",
+                score=0.0,
+                details={"reason": error}
+            )
+        # Convert to Praat Sound object
+        try:
+            import tempfile
+            import soundfile as sf
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+                sf.write(f.name, segment, self.sample_rate)
+                sound = parselmouth.Sound(f.name)
+            # Get pitch for stability analysis
+            pitch = call(sound, "To Pitch", 0.0, 75, 600)
+            pitch_values = pitch.selected_array['frequency']
+            pitch_values = pitch_values[pitch_values > 0]  # Remove unvoiced
+            if len(pitch_values) > 1:
+                pitch_stability = 1.0 - (np.std(pitch_values) / np.mean(pitch_values))
+            else:
+                pitch_stability = 0.0
+            # Formant analysis for nasal detection
+            formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
+            # Nasalization shows anti-resonance around F1 region
+            # Check for characteristic nasal formant pattern
+            nasal_formant_detected = True  # Simplified detection
+        except Exception as e:
+            print(f"Parselmouth error: {e}")
+            return GhunnahResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Formant Analysis",
+                expected_pattern="nasal_resonance",
+                observed_pattern="analysis_error",
+                score=0.0
+            )
+        # Scoring
+        duration_score = min(1.0, duration_ms / (self.GHUNNAH_MIN_DURATION_MS * 2))
+        pitch_score = max(0.0, pitch_stability)
+        total_score = (duration_score + pitch_score) / 2
+        if total_score >= 0.7:
+            status = ValidationStatus.PASS
+        elif total_score >= 0.4:
+            status = ValidationStatus.MARGINAL
+        else:
+            status = ValidationStatus.FAIL
+        return GhunnahResult(
+            status=status,
+            metric_name="Formant Analysis",
+            expected_pattern="nasal_resonance",
+            observed_pattern="analyzed",
+            score=total_score,
+            nasal_formant_detected=nasal_formant_detected,
+            pitch_stability=pitch_stability,
+            duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS
+        )
+    def validate_tafkheem(self,
+                          audio: np.ndarray,
+                          start: float,
+                          end: float) -> TafkheemResult:
+        """
+        Validate Tafkheem (heavy letter) rule
+        Physics: Heavy letters show depressed F2 formant
+        - Normal letters: F2 ~1500 Hz
+        - Heavy letters: F2 ~1000-1200 Hz
+        """
+        if not HAS_PARSELMOUTH:
+            return TafkheemResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="F2 Formant",
+                expected_pattern="F2 < 1200 Hz",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        # PRECISION: Use safe extraction
+        segment, is_valid, error = self.safe_extract_segment(audio, start, end)
+        if not is_valid:
+            return TafkheemResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="F2 Formant",
+                expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz",
+                observed_pattern=error or "invalid_segment",
+                score=0.0,
+                details={"reason": error}
+            )
+        try:
+            import tempfile
+            import soundfile as sf
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+                sf.write(f.name, segment, self.sample_rate)
+                sound = parselmouth.Sound(f.name)
+            # Get F2 formant
+            formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
+            # Get average F2
+            f2_values = []
+            num_frames = call(formant, "Get number of frames")
+            for i in range(1, num_frames + 1):
+                f2 = call(formant, "Get value at time", 2, call(formant, "Get time from frame number", i), "Hertz", "Linear")
+                if not np.isnan(f2) and f2 > 0:
+                    f2_values.append(f2)
+            if f2_values:
+                f2_mean = np.mean(f2_values)
+            else:
+                f2_mean = 0
+        except Exception as e:
+            print(f"Parselmouth error: {e}")
+            return TafkheemResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="F2 Formant",
+                expected_pattern="F2 < 1200 Hz",
+                observed_pattern="analysis_error",
+                score=0.0
+            )
+        # Calculate depression ratio
+        baseline_f2 = 1500.0
+        depression_ratio = (baseline_f2 - f2_mean) / baseline_f2 if f2_mean > 0 and f2_mean < baseline_f2 else 0
+        # Scoring
+        if f2_mean <= self.TAFKHEEM_F2_MAX_HZ:
+            status = ValidationStatus.PASS
+            score = 1.0
+        elif f2_mean <= 1350:
+            status = ValidationStatus.MARGINAL
+            score = 0.6
+        else:
+            status = ValidationStatus.FAIL
+            score = max(0.0, depression_ratio)
+        return TafkheemResult(
+            status=status,
+            metric_name="F2 Formant",
+            expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz",
+            observed_pattern=f"F2 = {f2_mean:.0f} Hz",
+            score=score,
+            f2_value_hz=f2_mean,
+            f2_baseline_hz=baseline_f2,
+            depression_ratio=depression_ratio
+        )
+    # =========================================================================
+    # NEW VALIDATORS: Complete Tajweed Physics Coverage
+    # =========================================================================
+    def validate_idgham(self,
+                        audio: np.ndarray,
+                        nun_start: float,
+                        nun_end: float,
+                        next_letter_end: float,
+                        has_ghunnah: bool = True) -> PhysicsResult:
+        """
+        Validate Idgham (assimilation) rule
+        Physics:
+        - Full Idgham (ر/ل): Complete merger, smooth energy, no nun boundary
+        - Partial Idgham (ي/ن/م/و): Ghunnah preserved during transition
+        """
+        if not HAS_LIBROSA:
+            return PhysicsResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Energy Continuity",
+                expected_pattern="smooth_transition",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        # Extract the transition window (nun end to next letter)
+        start_sample = int(nun_start * self.sample_rate)
+        end_sample = int(next_letter_end * self.sample_rate)
+        segment = audio[start_sample:end_sample]
+        if len(segment) < 100:
+            return PhysicsResult(
+                status=ValidationStatus.FAIL,
+                metric_name="Energy Continuity",
+                expected_pattern="smooth_transition",
+                observed_pattern="segment_too_short",
+                score=0.0
+            )
+        # Calculate RMS to check for smooth energy transition
+        frame_length = 256
+        hop_length = 64
+        rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
+        # Calculate energy variance - low variance = smooth transition
+        rms_variance = np.std(rms) / np.mean(rms) if np.mean(rms) > 0 else 1.0
+        # For Idgham, we expect smooth continuous energy (low variance)
+        smoothness_score = 1.0 - min(1.0, rms_variance)
+        # Check for boundary sharpness (should be LOW for Idgham)
+        rms_diff = np.abs(np.diff(rms))
+        max_jump = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0
+        boundary_score = 1.0 - min(1.0, max_jump)
+        total_score = (smoothness_score + boundary_score) / 2
+        if total_score >= 0.6:
+            status = ValidationStatus.PASS
+        elif total_score >= 0.4:
+            status = ValidationStatus.MARGINAL
+        else:
+            status = ValidationStatus.FAIL
+        return PhysicsResult(
+            status=status,
+            metric_name="Energy Continuity",
+            expected_pattern="smooth_transition" if not has_ghunnah else "smooth_with_ghunnah",
+            observed_pattern=f"smoothness={smoothness_score:.2f}",
+            score=total_score,
+            details={"smoothness": smoothness_score, "boundary_score": boundary_score}
+        )
+    def validate_ikhfa(self,
+                       audio: np.ndarray,
+                       start: float,
+                       end: float) -> PhysicsResult:
+        """
+        Validate Ikhfa (concealment) rule
+        Physics:
+        - Gradual nasalization transition (not abrupt like pure Ghunnah)
+        - Partial nasal resonance that fades
+        """
+        if not HAS_LIBROSA:
+            return PhysicsResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Nasalization Gradient",
+                expected_pattern="gradual_nasal",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        start_sample = int(start * self.sample_rate)
+        end_sample = int(end * self.sample_rate)
+        segment = audio[start_sample:end_sample]
+        if len(segment) < 100:
+            return PhysicsResult(
+                status=ValidationStatus.FAIL,
+                metric_name="Nasalization Gradient",
+                expected_pattern="gradual_nasal",
+                observed_pattern="segment_too_short",
+                score=0.0
+            )
+        # Split into thirds to check for gradient
+        third = len(segment) // 3
+        # Calculate spectral centroid (nasal sounds have lower centroid)
+        sc = librosa.feature.spectral_centroid(y=segment, sr=self.sample_rate)[0]
+        if len(sc) < 3:
+            return PhysicsResult(
+                status=ValidationStatus.FAIL,
+                metric_name="Nasalization Gradient",
+                expected_pattern="gradual_nasal",
+                observed_pattern="insufficient_frames",
+                score=0.0
+            )
+        # Check for gradient pattern: centroid should change gradually
+        sc_diff = np.abs(np.diff(sc))
+        gradient_smoothness = 1.0 - min(1.0, np.std(sc_diff) / np.mean(sc_diff)) if np.mean(sc_diff) > 0 else 0.5
+        # Duration check (Ikhfa should have reasonable duration)
+        duration_ms = (end - start) * 1000
+        duration_score = min(1.0, duration_ms / 100) if duration_ms > 0 else 0
+        total_score = (gradient_smoothness + duration_score) / 2
+        if total_score >= 0.6:
+            status = ValidationStatus.PASS
+        elif total_score >= 0.4:
+            status = ValidationStatus.MARGINAL
+        else:
+            status = ValidationStatus.FAIL
+        return PhysicsResult(
+            status=status,
+            metric_name="Nasalization Gradient",
+            expected_pattern="gradual_nasal",
+            observed_pattern=f"gradient={gradient_smoothness:.2f}",
+            score=total_score,
+            details={"gradient_smoothness": gradient_smoothness, "duration_ms": duration_ms}
+        )
+    def validate_iqlab(self,
+                       audio: np.ndarray,
+                       start: float,
+                       end: float) -> PhysicsResult:
+        """
+        Validate Iqlab (ن→م before ب)
+        Physics:
+        - Same as Ghunnah but with bilabial closure
+        - Nasal formant + lip closure pattern (F1/F2 characteristic of /m/)
+        """
+        # Iqlab is essentially Ghunnah with bilabial characteristics
+        # Reuse ghunnah validation logic
+        ghunnah_result = self.validate_ghunnah(audio, start, end)
+        # Modify result type for Iqlab
+        return PhysicsResult(
+            status=ghunnah_result.status,
+            metric_name="Bilabial Nasal",
+            expected_pattern="mim_like_nasal",
+            observed_pattern=ghunnah_result.observed_pattern,
+            score=ghunnah_result.score,
+            details={"ghunnah_check": ghunnah_result.status.value}
+        )
+    def validate_izhar(self,
+                       audio: np.ndarray,
+                       letter_start: float,
+                       letter_end: float,
+                       next_letter_start: float) -> PhysicsResult:
+        """
+        Validate Izhar (clear pronunciation)
+        Physics:
+        - Clean, sharp boundary between letters
+        - No nasalization
+        - Clear articulation energy pattern
+        """
+        if not HAS_LIBROSA:
+            return PhysicsResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Boundary Sharpness",
+                expected_pattern="clean_boundary",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        # Check boundary region
+        boundary_start = max(0, letter_end - 0.02)  # 20ms before boundary
+        boundary_end = min(len(audio) / self.sample_rate, next_letter_start + 0.02)  # 20ms after
+        start_sample = int(boundary_start * self.sample_rate)
+        end_sample = int(boundary_end * self.sample_rate)
+        segment = audio[start_sample:end_sample]
+        if len(segment) < 50:
+            return PhysicsResult(
+                status=ValidationStatus.FAIL,
+                metric_name="Boundary Sharpness",
+                expected_pattern="clean_boundary",
+                observed_pattern="segment_too_short",
+                score=0.0
+            )
+        # Calculate RMS to find sharp transitions
+        frame_length = 128
+        hop_length = 32
+        rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
+        # Look for clear dip/change at boundary
+        rms_diff = np.abs(np.diff(rms))
+        max_change = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0
+        # High change = sharp boundary = good for Izhar
+        sharpness_score = min(1.0, max_change)
+        if sharpness_score >= 0.3:  # Clear boundary detected
+            status = ValidationStatus.PASS
+            score = min(1.0, sharpness_score * 2)
+        elif sharpness_score >= 0.15:
+            status = ValidationStatus.MARGINAL
+            score = sharpness_score * 2
+        else:
+            status = ValidationStatus.FAIL
+            score = sharpness_score
+        return PhysicsResult(
+            status=status,
+            metric_name="Boundary Sharpness",
+            expected_pattern="clean_boundary",
+            observed_pattern=f"sharpness={sharpness_score:.2f}",
+            score=score,
+            details={"boundary_sharpness": sharpness_score}
+        )
+    def validate_tarqeeq(self,
+                         audio: np.ndarray,
+                         start: float,
+                         end: float) -> PhysicsResult:
+        """
+        Validate Tarqeeq (light letters) - opposite of Tafkheem
+        Physics: Light letters show elevated F2 formant (F2 > 1400 Hz)
+        """
+        # Reuse Tafkheem logic but invert the threshold
+        tafkheem_result = self.validate_tafkheem(audio, start, end)
+        if tafkheem_result.status == ValidationStatus.SKIPPED:
+            return PhysicsResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="F2 Formant",
+                expected_pattern="F2 > 1400 Hz",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        # For Tarqeeq, we want HIGH F2 (opposite of Tafkheem)
+        f2_value = tafkheem_result.details.get('f2_value_hz', tafkheem_result.f2_value_hz if hasattr(tafkheem_result, 'f2_value_hz') else 0)
+        TARQEEQ_F2_MIN_HZ = 1400.0
+        if f2_value >= TARQEEQ_F2_MIN_HZ:
+            status = ValidationStatus.PASS
+            score = 1.0
+        elif f2_value >= 1300:
+            status = ValidationStatus.MARGINAL
+            score = 0.6
+        else:
+            status = ValidationStatus.FAIL
+            score = f2_value / TARQEEQ_F2_MIN_HZ if f2_value > 0 else 0
+        return PhysicsResult(
+            status=status,
+            metric_name="F2 Formant",
+            expected_pattern=f"F2 > {TARQEEQ_F2_MIN_HZ} Hz",
+            observed_pattern=f"F2 = {f2_value:.0f} Hz",
+            score=score,
+            details={"f2_value_hz": f2_value}
+        )
+    def validate_sakt(self,
+                      audio: np.ndarray,
+                      start: float,
+                      end: float) -> PhysicsResult:
+        """
+        Validate Sakt (brief pause without breath)
+        Physics:
+        - Brief silence (50-200ms)
+        - RMS below threshold
+        - No breathing artifacts
+        """
+        if not HAS_LIBROSA:
+            return PhysicsResult(
+                status=ValidationStatus.SKIPPED,
+                metric_name="Silence Detection",
+                expected_pattern="brief_silence",
+                observed_pattern="unknown",
+                score=0.0
+            )
+        start_sample = int(start * self.sample_rate)
+        end_sample = int(end * self.sample_rate)
+        segment = audio[start_sample:end_sample]
+        duration_ms = (end - start) * 1000
+        if len(segment) < 10:
+            return PhysicsResult(
+                status=ValidationStatus.FAIL,
+                metric_name="Silence Detection",
+                expected_pattern="brief_silence",
+                observed_pattern="segment_too_short",
+                score=0.0
+            )
+        # Calculate RMS
+        rms = np.sqrt(np.mean(segment**2))
+        # Thresholds
+        SAKT_RMS_THRESHOLD = 0.05
+        SAKT_MIN_MS = 50
+        SAKT_MAX_MS = 200
+        # Check RMS (should be very low)
+        is_silent = rms < SAKT_RMS_THRESHOLD
+        # Check duration
+        duration_ok = SAKT_MIN_MS <= duration_ms <= SAKT_MAX_MS
+        if is_silent and duration_ok:
+            status = ValidationStatus.PASS
+            score = 1.0
+        elif is_silent and (duration_ms > 30):
+            status = ValidationStatus.MARGINAL
+            score = 0.6
+        else:
+            status = ValidationStatus.FAIL
+            score = 0.0 if rms >= SAKT_RMS_THRESHOLD else 0.3
+        return PhysicsResult(
+            status=status,
+            metric_name="Silence Detection",
+            expected_pattern=f"silence_{SAKT_MIN_MS}-{SAKT_MAX_MS}ms",
+            observed_pattern=f"rms={rms:.3f}, dur={duration_ms:.0f}ms",
+            score=score,
+            details={"rms": rms, "duration_ms": duration_ms, "is_silent": is_silent}
+        )
+    def calibrate_average_vowel(self, audio: np.ndarray, vowel_segments: List[Tuple[float, float]]) -> float:
+        """
+        Calibrate average vowel duration for this reciter
+        This is crucial for Madd validation as reciter pace varies
+        """
+        if not vowel_segments:
+            return 0.1  # Default 100ms
+        durations = [end - start for start, end in vowel_segments]
+        self._average_vowel_duration = np.mean(durations)
+        return self._average_vowel_duration
+def main():
+    """Test physics validator"""
+    print("=" * 50)
+    print("TajweedSST Physics Validator Test")
+    print("=" * 50)
+    # Create mock audio
+    sample_rate = 22050
+    duration = 2.0
+    t = np.linspace(0, duration, int(sample_rate * duration))
+    # Create a test signal with dip→spike pattern (simulating Qalqalah)
+    audio = np.sin(2 * np.pi * 440 * t) * 0.5
+    # Add dip in middle
+    dip_start = int(len(audio) * 0.4)
+    dip_end = int(len(audio) * 0.5)
+    audio[dip_start:dip_end] *= 0.1
+    # Add spike after dip
+    spike_start = int(len(audio) * 0.5)
+    spike_end = int(len(audio) * 0.6)
+    audio[spike_start:spike_end] *= 2.0
+    validator = PhysicsValidator(sample_rate=sample_rate)
+    # Test Qalqalah
+    print("\nQalqalah Test:")
+    result = validator.validate_qalqalah(audio, 0.3, 0.8)
+    print(f"  Status: {result.status.value}")
+    print(f"  Profile: {result.rms_profile}")
+    print(f"  Score: {result.score:.2f}")
+    print(f"  Dip Depth: {result.dip_depth:.2f}")
+    print(f"  Spike Height: {result.spike_height:.2f}")
+    # Test Madd
+    print("\nMadd Test:")
+    validator._average_vowel_duration = 0.1  # 100ms average
+    result = validator.validate_madd(audio, 0.0, 0.4, expected_count=4)
+    print(f"  Status: {result.status.value}")
+    print(f"  Ratio: {result.ratio:.1f}x")
+    print(f"  Score: {result.score:.2f}")
+if __name__ == "__main__":
+    main()

src/pipeline.py ADDED Viewed

	@@ -0,0 +1,334 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Main Pipeline Orchestrator
+Execution Order:
+1. Text Parse: Generate Phonetic Script & Rule Tags
+2. WhisperX: Get Word Timestamps
+3. MFA: Get Phoneme Timestamps inside Words
+4. Math: Clamp/Normalize Phonemes to Words
+5. DSP: Run Physics checks on specific tagged timestamps
+6. Export: Save JSON
+"""
+import json
+from pathlib import Path
+from typing import List, Dict, Optional
+from dataclasses import dataclass, asdict
+from .tajweed_parser import TajweedParser, TajweedType, PhysicsCheck, WordTags
+from .alignment_engine import AlignmentEngine, MockAlignmentEngine, AlignmentResult
+from .physics_validator import PhysicsValidator, ValidationStatus
+@dataclass
+class PhonemeOutput:
+    """Output format for a single phoneme"""
+    char_visual: str
+    char_phonetic: str
+    start: float
+    end: float
+    tajweed_type: str
+    physics_analysis: Optional[Dict] = None
+    score: float = 1.0
+@dataclass
+class WordOutput:
+    """Output format for a single word"""
+    word_text: str
+    whisper_anchor: Dict
+    phonemes: List[Dict]
+@dataclass
+class AyahOutput:
+    """Output format for a complete ayah"""
+    surah: int
+    ayah: int
+    words: List[Dict]
+    metadata: Dict
+class TajweedPipeline:
+    """
+    Main orchestrator for the TajweedSST pipeline
+    """
+    def __init__(self,
+                 use_mock_alignment: bool = True,
+                 device: str = "cuda"):
+        """
+        Initialize pipeline
+        Args:
+            use_mock_alignment: Use mock alignment for testing (no WhisperX/MFA)
+            device: cuda or cpu
+        """
+        self.parser = TajweedParser()
+        if use_mock_alignment:
+            self.aligner = MockAlignmentEngine()
+        else:
+            self.aligner = AlignmentEngine(device=device)
+        self.validator = PhysicsValidator()
+        self.use_mock = use_mock_alignment
+    def process(self,
+                audio_path: str,
+                text: str,
+                surah: int,
+                ayah: int) -> Dict:
+        """
+        Process a single ayah through the complete pipeline
+        Args:
+            audio_path: Path to audio file
+            text: Uthmani Quran text for the ayah
+            surah: Surah number
+            ayah: Ayah number
+        Returns:
+            Complete JSON output with timing and Tajweed analysis
+        """
+        # Step 1: Parse text and generate Tajweed tags
+        word_tags = self.parser.parse_text(text)
+        # Extract phonetic words for alignment
+        phonetic_words = [w.phonetic_stream for w in word_tags]
+        # Step 2 & 3: Run alignment (WhisperX + MFA)
+        alignment = self.aligner.align(
+            audio_path=audio_path,
+            phonetic_words=phonetic_words,
+            surah=surah,
+            ayah=ayah
+        )
+        # Step 4: Normalization is done inside alignment_engine
+        # Step 5: Load audio and run physics validation
+        if not self.use_mock:
+            audio = self.validator.load_audio(audio_path)
+        else:
+            import numpy as np
+            audio = np.random.randn(22050 * 10) * 0.1  # Mock audio
+        # Build output
+        output_words = []
+        for word_idx, (word_tag, word_align) in enumerate(zip(word_tags, alignment.words)):
+            word_output = {
+                "word_text": word_tag.word_text,
+                "whisper_anchor": {
+                    "start": round(word_align.whisper_start, 3),
+                    "end": round(word_align.whisper_end, 3)
+                },
+                "phonemes": []
+            }
+            # Map phonemes to letters and run physics checks
+            for letter_idx, letter_tag in enumerate(word_tag.letters):
+                # Skip silent letters
+                if letter_tag.is_silent:
+                    continue
+                # Get corresponding phoneme timing
+                if letter_idx < len(word_align.phonemes):
+                    phoneme_align = word_align.phonemes[letter_idx]
+                    start = phoneme_align.start
+                    end = phoneme_align.end
+                else:
+                    # Estimate timing if not aligned
+                    word_duration = word_align.whisper_end - word_align.whisper_start
+                    num_letters = len([l for l in word_tag.letters if not l.is_silent])
+                    letter_duration = word_duration / max(num_letters, 1)
+                    start = word_align.whisper_start + (letter_idx * letter_duration)
+                    end = start + letter_duration
+                phoneme_output = {
+                    "char_visual": letter_tag.char_visual,
+                    "char_phonetic": letter_tag.char_phonetic,
+                    "start": round(start, 3),
+                    "end": round(end, 3),
+                    "tajweed_type": letter_tag.tajweed_type.value,
+                    "score": 1.0
+                }
+                # Step 5: Run physics validation if tagged
+                if letter_tag.physics_check != PhysicsCheck.NONE:
+                    physics_result = self._run_physics_check(
+                        audio=audio,
+                        start=start,
+                        end=end,
+                        check_type=letter_tag.physics_check,
+                        tajweed_type=letter_tag.tajweed_type,
+                        madd_count=letter_tag.madd_count
+                    )
+                    phoneme_output["physics_analysis"] = physics_result
+                    phoneme_output["score"] = physics_result.get("score", 1.0)
+                word_output["phonemes"].append(phoneme_output)
+            output_words.append(word_output)
+        # Final output structure
+        output = {
+            "surah": surah,
+            "ayah": ayah,
+            "words": output_words,
+            "metadata": {
+                "audio_path": audio_path,
+                "text": text,
+                "pipeline_version": "1.0.0",
+                "mock_alignment": self.use_mock
+            }
+        }
+        return output
+    def _run_physics_check(self,
+                           audio,
+                           start: float,
+                           end: float,
+                           check_type: PhysicsCheck,
+                           tajweed_type: TajweedType,
+                           madd_count: int = 0) -> Dict:
+        """Run appropriate physics check based on tag"""
+        if check_type == PhysicsCheck.CHECK_RMS_BOUNCE:
+            result = self.validator.validate_qalqalah(audio, start, end)
+            return {
+                "check_type": "Qalqalah_RMS",
+                "rms_profile": result.rms_profile,
+                "dip_depth": round(result.dip_depth, 3),
+                "spike_height": round(result.spike_height, 3),
+                "status": result.status.value,
+                "score": round(result.score, 3)
+            }
+        elif check_type == PhysicsCheck.CHECK_DURATION:
+            result = self.validator.validate_madd(audio, start, end, madd_count or 2)
+            return {
+                "check_type": "Madd_Duration",
+                "actual_duration_ms": round(result.actual_duration_ms, 1),
+                "expected_duration_ms": round(result.expected_duration_ms, 1),
+                "ratio": round(result.ratio, 2),
+                "status": result.status.value,
+                "score": round(result.score, 3)
+            }
+        elif check_type == PhysicsCheck.CHECK_GHUNNAH:
+            result = self.validator.validate_ghunnah(audio, start, end)
+            return {
+                "check_type": "Ghunnah_Formant",
+                "nasal_detected": result.nasal_formant_detected,
+                "pitch_stability": round(result.pitch_stability, 3),
+                "duration_elongation": round(result.duration_elongation, 2),
+                "status": result.status.value,
+                "score": round(result.score, 3)
+            }
+        elif check_type == PhysicsCheck.CHECK_FORMANT_F2:
+            result = self.validator.validate_tafkheem(audio, start, end)
+            return {
+                "check_type": "Tafkheem_F2",
+                "f2_value_hz": round(result.f2_value_hz, 0),
+                "depression_ratio": round(result.depression_ratio, 3),
+                "status": result.status.value,
+                "score": round(result.score, 3)
+            }
+        return {"check_type": "None", "status": "SKIPPED", "score": 1.0}
+    def process_batch(self,
+                      audio_dir: str,
+                      quran_json_path: str,
+                      output_dir: str,
+                      surah: int,
+                      start_ayah: int = 1,
+                      end_ayah: Optional[int] = None) -> List[str]:
+        """
+        Process multiple ayahs in batch
+        Args:
+            audio_dir: Directory containing audio files (named {surah}_{ayah}.mp3)
+            quran_json_path: Path to Quran text JSON
+            output_dir: Directory to save output JSON files
+            surah: Surah to process
+            start_ayah: Starting ayah number
+            end_ayah: Ending ayah number (None = all)
+        Returns:
+            List of output file paths
+        """
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Load Quran text
+        with open(quran_json_path, 'r', encoding='utf-8') as f:
+            quran_data = json.load(f)
+        output_files = []
+        # Process each ayah
+        for ayah in range(start_ayah, (end_ayah or len(quran_data.get(str(surah), []))) + 1):
+            audio_path = Path(audio_dir) / f"{surah}_{ayah}.mp3"
+            if not audio_path.exists():
+                print(f"Skipping {surah}:{ayah} - audio not found")
+                continue
+            # Get text
+            text = quran_data.get(str(surah), {}).get(str(ayah), "")
+            if not text:
+                print(f"Skipping {surah}:{ayah} - text not found")
+                continue
+            # Process
+            result = self.process(
+                audio_path=str(audio_path),
+                text=text,
+                surah=surah,
+                ayah=ayah
+            )
+            # Save
+            output_path = output_dir / f"{surah}_{ayah}.json"
+            with open(output_path, 'w', encoding='utf-8') as f:
+                json.dump(result, f, ensure_ascii=False, indent=2)
+            output_files.append(str(output_path))
+            print(f"Processed {surah}:{ayah} → {output_path}")
+        return output_files
+def main():
+    """Demo the pipeline"""
+    print("=" * 60)
+    print("TajweedSST Pipeline Demo")
+    print("=" * 60)
+    pipeline = TajweedPipeline(use_mock_alignment=True)
+    # Test with Surah Al-Ikhlas, Ayah 1
+    test_text = "قُلْ هُوَ اللَّهُ أَحَدٌ"
+    print(f"\nInput Text: {test_text}")
+    print("\nProcessing...")
+    result = pipeline.process(
+        audio_path="test_audio.mp3",
+        text=test_text,
+        surah=112,
+        ayah=1
+    )
+    print("\n" + "=" * 60)
+    print("OUTPUT JSON:")
+    print("=" * 60)
+    print(json.dumps(result, ensure_ascii=False, indent=2))
+if __name__ == "__main__":
+    main()

src/tajweed_parser.py ADDED Viewed

	@@ -0,0 +1,334 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Step 1: Tajweed Rule Parser
+Generates two parallel text streams and a Rule Map:
+- Visual Stream: Standard Uthmani text
+- Phonetic Stream: Pronounced text for MFA
+- Tajweed Map: Tags for physics validation
+Tajweed Rules Implemented:
+- Idgham (Assimilation)
+- Iqlab (Conversion)
+- Ikhfa (Concealment)
+- Qalqalah (Bounce)
+- Ghunnah (Nasalization)
+- Madd (Elongation)
+- Tafkheem/Tarqeeq (Heavy/Light)
+"""
+import re
+from dataclasses import dataclass, field
+from typing import List, Dict, Tuple, Optional
+from enum import Enum
+class TajweedType(Enum):
+    NONE = "None"
+    QALQALAH_SUGHRA = "Qalqalah_Sughra"
+    QALQALAH_KUBRA = "Qalqalah_Kubra"
+    GHUNNAH = "Ghunnah"
+    IDGHAM_FULL = "Idgham_Full"
+    IDGHAM_PARTIAL = "Idgham_Partial"
+    IQLAB = "Iqlab"
+    IKHFA = "Ikhfa"
+    MADD_ASLI = "Madd_Asli"
+    MADD_WAJIB = "Madd_Wajib"
+    MADD_LAZIM = "Madd_Lazim"
+    TAFKHEEM = "Tafkheem"
+    TARQEEQ = "Tarqeeq"
+    SILENT = "Silent"
+class PhysicsCheck(Enum):
+    CHECK_RMS_BOUNCE = "Check_RMS_Bounce"
+    CHECK_DURATION = "Check_Duration"
+    CHECK_GHUNNAH = "Check_Ghunnah"
+    CHECK_FORMANT_F2 = "Check_Formant_F2"
+    NONE = "None"
+@dataclass
+class LetterTag:
+    """Tag for a single Arabic letter with Tajweed info"""
+    char_visual: str
+    char_phonetic: str
+    position: int
+    tajweed_type: TajweedType = TajweedType.NONE
+    physics_check: PhysicsCheck = PhysicsCheck.NONE
+    is_silent: bool = False
+    madd_count: int = 0  # 0=none, 2=asli, 4=wajib, 6=lazim
+@dataclass
+class WordTags:
+    """Tajweed tags for a complete word"""
+    word_text: str
+    letters: List[LetterTag] = field(default_factory=list)
+    phonetic_stream: str = ""
+class TajweedParser:
+    """Parses Uthmani Quran text and generates Tajweed rule tags"""
+    # Qalqalah letters: ق ط ب ج د
+    QALQALAH_LETTERS = set('قطبجد')
+    # Heavy letters (Tafkheem): خ ص ض غ ط ق ظ
+    TAFKHEEM_LETTERS = set('خصضغطقظ')
+    # Idgham letters after Nun Sakinah: ي ر م ل و ن
+    IDGHAM_LETTERS = set('يرملون')
+    IDGHAM_WITH_GHUNNAH = set('ينمو')  # With Ghunnah
+    IDGHAM_WITHOUT_GHUNNAH = set('رل')  # Without Ghunnah
+    # Ikhfa letters (15 letters)
+    IKHFA_LETTERS = set('تثجدذزسشصضطظفقك')
+    # Harakat (vowel marks)
+    FATHA = '\u064E'
+    DAMMA = '\u064F'
+    KASRA = '\u0650'
+    SUKUN = '\u0652'
+    SHADDA = '\u0651'
+    TANWEEN_FATH = '\u064B'
+    TANWEEN_DAMM = '\u064C'
+    TANWEEN_KASR = '\u064D'
+    # Madd letters
+    MADD_ALIF = 'ا'
+    MADD_WAW = 'و'
+    MADD_YA = 'ي'
+    # Phonetic mapping (simplified Buckwalter-like)
+    PHONETIC_MAP = {
+        'ا': 'ā', 'ب': 'b', 'ت': 't', 'ث': 'ṯ', 'ج': 'j', 'ح': 'ḥ',
+        'خ': 'ḫ', 'د': 'd', 'ذ': 'ḏ', 'ر': 'r', 'ز': 'z', 'س': 's',
+        'ش': 'š', 'ص': 'ṣ', 'ض': 'ḍ', 'ط': 'ṭ', 'ظ': 'ẓ', 'ع': 'ʿ',
+        'غ': 'ġ', 'ف': 'f', 'ق': 'q', 'ك': 'k', 'ل': 'l', 'م': 'm',
+        'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ء': 'ʾ', 'ة': 'h',
+        'ى': 'ā', 'ئ': 'ʾ', 'ؤ': 'ʾ', 'أ': 'ʾa', 'إ': 'ʾi', 'آ': 'ʾā'
+    }
+    def __init__(self):
+        self.debug = False
+    def parse_text(self, text: str) -> List[WordTags]:
+        """Parse Uthmani text and return tagged words"""
+        words = text.strip().split()
+        result = []
+        for word in words:
+            word_tags = self._parse_word(word)
+            result.append(word_tags)
+        # Cross-word analysis (Nun Sakinah rules across words)
+        self._analyze_cross_word_rules(result)
+        return result
+    def _parse_word(self, word: str) -> WordTags:
+        """Parse a single word and generate letter tags"""
+        word_tags = WordTags(word_text=word)
+        # Extract base letters and diacritics
+        letters_with_harakat = self._split_letters(word)
+        for idx, (letter, harakat) in enumerate(letters_with_harakat):
+            tag = self._analyze_letter(
+                letter=letter,
+                harakat=harakat,
+                position=idx,
+                context=(letters_with_harakat, idx),
+                word=word
+            )
+            word_tags.letters.append(tag)
+        # Generate phonetic stream
+        word_tags.phonetic_stream = self._generate_phonetic_stream(word_tags.letters)
+        return word_tags
+    def _split_letters(self, word: str) -> List[Tuple[str, str]]:
+        """Split word into (letter, harakat) pairs"""
+        result = []
+        i = 0
+        harakat_chars = set([self.FATHA, self.DAMMA, self.KASRA, self.SUKUN,
+                            self.SHADDA, self.TANWEEN_FATH, self.TANWEEN_DAMM,
+                            self.TANWEEN_KASR, '\u0653', '\u0654', '\u0655',
+                            '\u0656', '\u0657', '\u0658', '\u065C', '\u0670'])
+        while i < len(word):
+            char = word[i]
+            # Skip if it's a harakat
+            if char in harakat_chars:
+                i += 1
+                continue
+            # Collect harakat following this letter
+            harakat = ""
+            j = i + 1
+            while j < len(word) and word[j] in harakat_chars:
+                harakat += word[j]
+                j += 1
+            result.append((char, harakat))
+            i = j
+        return result
+    def _analyze_letter(self, letter: str, harakat: str, position: int,
+                       context: Tuple[List, int], word: str) -> LetterTag:
+        """Analyze a single letter and assign Tajweed rules"""
+        letters_list, idx = context
+        is_last = idx == len(letters_list) - 1
+        has_sukun = self.SUKUN in harakat
+        has_shadda = self.SHADDA in harakat
+        tag = LetterTag(
+            char_visual=letter,
+            char_phonetic=self.PHONETIC_MAP.get(letter, letter),
+            position=position
+        )
+        # Rule 1: Qalqalah (ق ط ب ج د with Sukun)
+        if letter in self.QALQALAH_LETTERS and (has_sukun or is_last):
+            if is_last:
+                tag.tajweed_type = TajweedType.QALQALAH_KUBRA
+            else:
+                tag.tajweed_type = TajweedType.QALQALAH_SUGHRA
+            tag.physics_check = PhysicsCheck.CHECK_RMS_BOUNCE
+        # Rule 2: Tafkheem (Heavy letters)
+        elif letter in self.TAFKHEEM_LETTERS:
+            tag.tajweed_type = TajweedType.TAFKHEEM
+            tag.physics_check = PhysicsCheck.CHECK_FORMANT_F2
+        # Rule 3: Madd (Elongation) - check preceding vowel
+        elif letter in [self.MADD_ALIF, self.MADD_WAW, self.MADD_YA]:
+            # Check for Madd conditions
+            if idx > 0:
+                prev_letter, prev_harakat = letters_list[idx - 1]
+                if (letter == self.MADD_ALIF and self.FATHA in prev_harakat) or \
+                   (letter == self.MADD_WAW and self.DAMMA in prev_harakat) or \
+                   (letter == self.MADD_YA and self.KASRA in prev_harakat):
+                    # Check what follows for Madd type
+                    if is_last:
+                        tag.tajweed_type = TajweedType.MADD_ASLI
+                        tag.madd_count = 2
+                    elif idx + 1 < len(letters_list):
+                        next_letter, next_harakat = letters_list[idx + 1]
+                        if self.SHADDA in next_harakat or self.SUKUN in next_harakat:
+                            tag.tajweed_type = TajweedType.MADD_LAZIM
+                            tag.madd_count = 6
+                        else:
+                            tag.tajweed_type = TajweedType.MADD_WAJIB
+                            tag.madd_count = 4
+                    tag.physics_check = PhysicsCheck.CHECK_DURATION
+        # Rule 4: Ghunnah (Nun/Meem with Shadda)
+        if letter in 'نم' and has_shadda:
+            tag.tajweed_type = TajweedType.GHUNNAH
+            tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
+        # Rule 5: Nun Sakinah / Tanween rules
+        if letter == 'ن' and has_sukun:
+            if idx + 1 < len(letters_list):
+                next_letter, _ = letters_list[idx + 1]
+                # Iqlab: Nun + Ba → Mim + Ba
+                if next_letter == 'ب':
+                    tag.tajweed_type = TajweedType.IQLAB
+                    tag.char_phonetic = 'm'  # Pronounced as Mim
+                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
+                # Idgham
+                elif next_letter in self.IDGHAM_LETTERS:
+                    if next_letter in self.IDGHAM_WITH_GHUNNAH:
+                        tag.tajweed_type = TajweedType.IDGHAM_PARTIAL
+                    else:
+                        tag.tajweed_type = TajweedType.IDGHAM_FULL
+                    tag.physics_check = PhysicsCheck.CHECK_DURATION
+                # Ikhfa
+                elif next_letter in self.IKHFA_LETTERS:
+                    tag.tajweed_type = TajweedType.IKHFA
+                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
+        # Handle Tanween similarly
+        if any(tanween in harakat for tanween in [self.TANWEEN_FATH, self.TANWEEN_DAMM, self.TANWEEN_KASR]):
+            if idx + 1 < len(letters_list):
+                next_letter, _ = letters_list[idx + 1]
+                if next_letter == 'ب':
+                    tag.tajweed_type = TajweedType.IQLAB
+                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
+                elif next_letter in self.IKHFA_LETTERS:
+                    tag.tajweed_type = TajweedType.IKHFA
+                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
+        # Silent letters (Alif after Waw al-Jama'a, etc.)
+        if letter == 'ا' and not harakat and idx > 0:
+            prev_letter, prev_harakat = letters_list[idx - 1]
+            if prev_letter == 'و' and (self.DAMMA in prev_harakat or self.SUKUN in prev_harakat):
+                tag.is_silent = True
+                tag.tajweed_type = TajweedType.SILENT
+                tag.char_phonetic = ''
+        return tag
+    def _analyze_cross_word_rules(self, words: List[WordTags]) -> None:
+        """Analyze Tajweed rules that span word boundaries"""
+        for i in range(len(words) - 1):
+            current_word = words[i]
+            next_word = words[i + 1]
+            if not current_word.letters or not next_word.letters:
+                continue
+            last_letter = current_word.letters[-1]
+            first_of_next = next_word.letters[0]
+            # Check Nun Sakinah at end of word + next word's first letter
+            if last_letter.char_visual == 'ن' and last_letter.tajweed_type == TajweedType.NONE:
+                if first_of_next.char_visual == 'ب':
+                    last_letter.tajweed_type = TajweedType.IQLAB
+                    last_letter.char_phonetic = 'm'
+                    last_letter.physics_check = PhysicsCheck.CHECK_GHUNNAH
+                elif first_of_next.char_visual in self.IDGHAM_LETTERS:
+                    if first_of_next.char_visual in self.IDGHAM_WITH_GHUNNAH:
+                        last_letter.tajweed_type = TajweedType.IDGHAM_PARTIAL
+                    else:
+                        last_letter.tajweed_type = TajweedType.IDGHAM_FULL
+                    last_letter.physics_check = PhysicsCheck.CHECK_DURATION
+                elif first_of_next.char_visual in self.IKHFA_LETTERS:
+                    last_letter.tajweed_type = TajweedType.IKHFA
+                    last_letter.physics_check = PhysicsCheck.CHECK_GHUNNAH
+    def _generate_phonetic_stream(self, letters: List[LetterTag]) -> str:
+        """Generate phonetic transcription for MFA"""
+        phonemes = []
+        for letter in letters:
+            if not letter.is_silent and letter.char_phonetic:
+                phonemes.append(letter.char_phonetic)
+        return ' '.join(phonemes)
+def main():
+    """Test the Tajweed parser"""
+    parser = TajweedParser()
+    # Test with Surah Al-Ikhlas
+    test_text = "قُلْ هُوَ اللَّهُ أَحَدٌ"
+    print("=" * 50)
+    print("TajweedSST Parser Test")
+    print("=" * 50)
+    print(f"Input: {test_text}")
+    print()
+    words = parser.parse_text(test_text)
+    for word in words:
+        print(f"Word: {word.word_text}")
+        print(f"  Phonetic: {word.phonetic_stream}")
+        for letter in word.letters:
+            if letter.tajweed_type != TajweedType.NONE:
+                print(f"  [{letter.char_visual}] → {letter.tajweed_type.value} ({letter.physics_check.value})")
+        print()
+if __name__ == "__main__":
+    main()

surah_90_test.py ADDED Viewed

	@@ -0,0 +1,241 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Surah 90 Test
+Test script to generate letter-level timing data for Surah Al-Balad (90)
+and compare precision with existing timing in MahQuranApp.
+Usage:
+    cd /Documents/26apps/tajweedsst
+    python3 surah_90_test.py
+"""
+import json
+import sys
+import os
+from pathlib import Path
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent))
+from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+# Paths
+MAHQURAN_PATH = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = MAHQURAN_PATH / "public/data/verses_v4.json"
+AUDIO_PATH = MAHQURAN_PATH / "public/audio/abdul_basit/surah_090.mp3"
+EXISTING_TIMING_PATH = MAHQURAN_PATH / "public/data/letter_timing_90.json"
+OUTPUT_PATH = Path(__file__).parent / "output/surah_90_tajweed.json"
+def load_surah_90_text():
+    """Load Surah 90 text from verses_v4.json"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    surah_90 = data.get('90', [])
+    verses = []
+    for verse in surah_90:
+        verses.append({
+            'ayah': verse['ayah'],
+            'text': verse['text'].strip(),
+            'translation': verse.get('translation', ''),
+            'words': [w['arabic'] for w in verse.get('words', [])]
+        })
+    return verses
+def load_existing_timing():
+    """Load existing timing data from MahQuranApp"""
+    with open(EXISTING_TIMING_PATH, 'r', encoding='utf-8') as f:
+        return json.load(f)
+def parse_with_tajweed(verses):
+    """Parse all verses and generate Tajweed tags"""
+    parser = TajweedParser()
+    all_results = []
+    for verse in verses:
+        text = verse['text']
+        word_tags = parser.parse_text(text)
+        verse_result = {
+            'ayah': verse['ayah'],
+            'text': text,
+            'translation': verse['translation'],
+            'words': []
+        }
+        for word_tag in word_tags:
+            word_result = {
+                'word_text': word_tag.word_text,
+                'phonetic': word_tag.phonetic_stream,
+                'letters': []
+            }
+            for letter in word_tag.letters:
+                letter_result = {
+                    'char': letter.char_visual,
+                    'phonetic': letter.char_phonetic,
+                    'position': letter.position,
+                    'tajweed_type': letter.tajweed_type.value,
+                    'physics_check': letter.physics_check.value,
+                    'is_silent': letter.is_silent,
+                    'madd_count': letter.madd_count
+                }
+                word_result['letters'].append(letter_result)
+            verse_result['words'].append(word_result)
+        all_results.append(verse_result)
+    return all_results
+def analyze_tajweed_distribution(results):
+    """Analyze distribution of Tajweed rules in Surah 90"""
+    tajweed_counts = {}
+    physics_counts = {}
+    for verse in results:
+        for word in verse['words']:
+            for letter in word['letters']:
+                tajweed_type = letter['tajweed_type']
+                physics_check = letter['physics_check']
+                tajweed_counts[tajweed_type] = tajweed_counts.get(tajweed_type, 0) + 1
+                physics_counts[physics_check] = physics_counts.get(physics_check, 0) + 1
+    return tajweed_counts, physics_counts
+def convert_to_mahquran_format(results, existing_timing):
+    """
+    Convert TajweedSST output to MahQuranApp timing format.
+    Uses existing timing as base and adds Tajweed annotations.
+    """
+    output = []
+    char_idx = 0
+    # Build a flat list of all characters with Tajweed info
+    tajweed_map = {}
+    global_idx = 0
+    for verse in results:
+        for word in verse['words']:
+            for letter in word['letters']:
+                tajweed_map[global_idx] = {
+                    'tajweed_type': letter['tajweed_type'],
+                    'physics_check': letter['physics_check'],
+                    'phonetic': letter['phonetic'],
+                    'madd_count': letter['madd_count']
+                }
+                global_idx += 1
+    # Merge with existing timing
+    for i, timing_entry in enumerate(existing_timing):
+        entry = timing_entry.copy()
+        # Add Tajweed info if available
+        if i in tajweed_map:
+            entry['tajweed_type'] = tajweed_map[i]['tajweed_type']
+            entry['physics_check'] = tajweed_map[i]['physics_check']
+            entry['phonetic'] = tajweed_map[i]['phonetic']
+            if tajweed_map[i]['madd_count'] > 0:
+                entry['madd_count'] = tajweed_map[i]['madd_count']
+        output.append(entry)
+    return output
+def main():
+    print("=" * 60)
+    print("TajweedSST - Surah 90 (Al-Balad) Test")
+    print("=" * 60)
+    # Step 1: Load Surah 90 text
+    print("\n[1] Loading Surah 90 text...")
+    verses = load_surah_90_text()
+    print(f"    Loaded {len(verses)} verses")
+    print(f"    Verse 1: {verses[0]['text'][:50]}...")
+    # Step 2: Parse with Tajweed
+    print("\n[2] Parsing with Tajweed rules...")
+    results = parse_with_tajweed(verses)
+    # Step 3: Analyze distribution
+    print("\n[3] Tajweed Analysis:")
+    tajweed_counts, physics_counts = analyze_tajweed_distribution(results)
+    print("\n    Tajweed Rules Found:")
+    for rule, count in sorted(tajweed_counts.items(), key=lambda x: -x[1]):
+        if rule != "None":
+            print(f"      • {rule}: {count}")
+    print("\n    Physics Checks Required:")
+    for check, count in sorted(physics_counts.items(), key=lambda x: -x[1]):
+        if check != "None":
+            print(f"      • {check}: {count}")
+    # Step 4: Load existing timing
+    print("\n[4] Loading existing timing data...")
+    existing_timing = load_existing_timing()
+    print(f"    Found {len(existing_timing)} timing entries")
+    print(f"    First entry: {existing_timing[0]}")
+    # Step 5: Convert and merge
+    print("\n[5] Merging Tajweed with timing...")
+    merged = convert_to_mahquran_format(results, existing_timing)
+    # Count enhanced entries
+    enhanced = sum(1 for e in merged if e.get('tajweed_type') and e['tajweed_type'] != 'None')
+    print(f"    Enhanced entries with Tajweed: {enhanced}")
+    # Step 6: Save output
+    print("\n[6] Saving output...")
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    # Save full Tajweed analysis
+    full_output = {
+        'surah': 90,
+        'name': 'Al-Balad',
+        'name_arabic': 'البلد',
+        'total_verses': len(verses),
+        'tajweed_summary': tajweed_counts,
+        'physics_checks': physics_counts,
+        'verses': results
+    }
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(full_output, f, ensure_ascii=False, indent=2)
+    print(f"    Saved: {OUTPUT_PATH}")
+    # Save merged timing (compatible with MahQuranApp)
+    merged_path = OUTPUT_PATH.parent / "letter_timing_90_tajweed.json"
+    with open(merged_path, 'w', encoding='utf-8') as f:
+        json.dump(merged, f, ensure_ascii=False, indent=2)
+    print(f"    Saved: {merged_path}")
+    # Step 7: Show sample
+    print("\n[7] Sample Output (Verse 1, first 3 words):")
+    for word in results[0]['words'][:3]:
+        print(f"\n    Word: {word['word_text']}")
+        print(f"    Phonetic: {word['phonetic']}")
+        for letter in word['letters']:
+            if letter['tajweed_type'] != 'None':
+                print(f"      [{letter['char']}] → {letter['tajweed_type']} ({letter['physics_check']})")
+    print("\n" + "=" * 60)
+    print("✓ Test Complete!")
+    print("=" * 60)
+    return full_output
+if __name__ == "__main__":
+    main()

surah_91_full_pipeline.py ADDED Viewed

	@@ -0,0 +1,213 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Physics Refinement Pipeline for Surah 91
+Uses EXISTING timing from MahQuranApp + applies physics refinement.
+No WhisperX needed - just physics validation and boundary refinement.
+Usage:
+    cd /Documents/26apps/tajweedsst
+    source venv/bin/activate
+    python3 surah_91_full_pipeline.py
+"""
+import json
+import sys
+import numpy as np
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent))
+from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+from src.physics_validator import PhysicsValidator, ValidationStatus
+from src.duration_model import DurationModel, MaddType
+import librosa
+# Paths
+MAHQURAN_PATH = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = MAHQURAN_PATH / "public/data/verses_v4.json"
+AUDIO_PATH = MAHQURAN_PATH / "public/audio/abdul_basit/surah_091.mp3"
+EXISTING_TIMING = MAHQURAN_PATH / "public/data/abdul_basit/letter_timing_91.json"
+OUTPUT_TIMING = MAHQURAN_PATH / "public/data/abdul_basit/letter_timing_91_physics.json"
+def load_verses():
+    """Load Surah 91 verses"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data.get('91', [])
+def load_existing_timing():
+    """Load existing letter timing"""
+    with open(EXISTING_TIMING, 'r', encoding='utf-8') as f:
+        return json.load(f)
+def get_tajweed_tags(verses):
+    """Parse all verses for Tajweed tags"""
+    parser = TajweedParser()
+    all_tags = []
+    for verse in verses:
+        word_tags = parser.parse_text(verse['text'])
+        for word_tag in word_tags:
+            for letter in word_tag.letters:
+                all_tags.append({
+                    'char': letter.char_visual,
+                    'phonetic': letter.char_phonetic,
+                    'tajweed_type': letter.tajweed_type,
+                    'physics_check': letter.physics_check,
+                    'madd_count': letter.madd_count,
+                    'is_silent': letter.is_silent
+                })
+    return all_tags
+def refine_with_physics(timing_data, tags, audio, sr, physics, duration_model):
+    """Apply physics refinement to existing timing"""
+    refined = []
+    stats = {'total': 0, 'validated': 0, 'passed': 0, 'marginal': 0, 'failed': 0}
+    for i, entry in enumerate(timing_data):
+        stats['total'] += 1
+        # Copy existing data
+        result = entry.copy()
+        # CRITICAL PRECISION FIX: Times are stored in milliseconds, convert to seconds
+        start = entry['start'] / 1000.0
+        end = entry['end'] / 1000.0
+        # Get corresponding Tajweed tag
+        if i < len(tags):
+            tag = tags[i]
+            result['tajweed'] = tag['tajweed_type'].value
+            result['phonetic'] = tag['phonetic']
+            # Run physics validation if needed
+            if tag['physics_check'] != PhysicsCheck.NONE:
+                stats['validated'] += 1
+                try:
+                    check = tag['physics_check']
+                    if check == PhysicsCheck.CHECK_RMS_BOUNCE:
+                        val = physics.validate_qalqalah(audio, start, end)
+                    elif check == PhysicsCheck.CHECK_DURATION:
+                        val = physics.validate_madd(audio, start, end, tag['madd_count'] or 2)
+                    elif check == PhysicsCheck.CHECK_GHUNNAH:
+                        if tag['tajweed_type'] == TajweedType.IKHFA:
+                            val = physics.validate_ikhfa(audio, start, end)
+                        elif tag['tajweed_type'] == TajweedType.IQLAB:
+                            val = physics.validate_iqlab(audio, start, end)
+                        else:
+                            val = physics.validate_ghunnah(audio, start, end)
+                    elif check == PhysicsCheck.CHECK_FORMANT_F2:
+                        val = physics.validate_tafkheem(audio, start, end)
+                    else:
+                        val = None
+                    if val:
+                        result['physics'] = val.status.value
+                        result['score'] = round(val.score, 2)
+                        if val.status == ValidationStatus.PASS:
+                            stats['passed'] += 1
+                        elif val.status == ValidationStatus.MARGINAL:
+                            stats['marginal'] += 1
+                        else:
+                            stats['failed'] += 1
+                except Exception as e:
+                    result['error'] = str(e)
+            # Duration validation for Madd
+            if tag['tajweed_type'] in [TajweedType.MADD_ASLI, TajweedType.MADD_WAJIB, TajweedType.MADD_LAZIM]:
+                duration = end - start
+                madd_map = {
+                    TajweedType.MADD_ASLI: MaddType.ASLI,
+                    TajweedType.MADD_WAJIB: MaddType.WAJIB,
+                    TajweedType.MADD_LAZIM: MaddType.LAZIM
+                }
+                dur_result = duration_model.validate_duration(
+                    duration,
+                    madd_map.get(tag['tajweed_type'], MaddType.ASLI),
+                    tag['madd_count'] or 2
+                )
+                result['harakat'] = round(dur_result.harakat_count, 1)
+        refined.append(result)
+    return refined, stats
+def main():
+    print("=" * 60)
+    print("TajweedSST - Physics Refinement: Surah 91")
+    print("=" * 60)
+    # Load existing timing
+    print("\n[1] Loading existing timing...")
+    timing_data = load_existing_timing()
+    print(f"    Entries: {len(timing_data)}")
+    # Load verses and parse Tajweed
+    print("\n[2] Parsing Tajweed rules...")
+    verses = load_verses()
+    tags = get_tajweed_tags(verses)
+    print(f"    Tajweed tags: {len(tags)}")
+    # Load audio
+    print("\n[3] Loading audio...")
+    audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
+    print(f"    Duration: {len(audio)/sr:.1f}s")
+    # Initialize validators
+    physics = PhysicsValidator(sample_rate=sr)
+    duration_model = DurationModel()
+    # Calibrate
+    vowels = [e['end'] - e['start'] for e in timing_data if 0.05 <= (e['end'] - e['start']) <= 0.15]
+    if vowels:
+        duration_model.calibrate_from_samples("Abdul_Basit", vowels)
+        print(f"    Harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
+    # Refine
+    print("\n[4] Applying physics refinement...")
+    refined, stats = refine_with_physics(timing_data, tags, audio, sr, physics, duration_model)
+    print(f"\n[5] Statistics:")
+    print(f"    Total: {stats['total']}")
+    print(f"    Validated: {stats['validated']}")
+    print(f"    ✓ Passed: {stats['passed']}")
+    print(f"    ~ Marginal: {stats['marginal']}")
+    print(f"    ✗ Failed: {stats['failed']}")
+    if stats['validated'] > 0:
+        rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
+        print(f"    Pass Rate: {rate:.1f}%")
+    # Save
+    print(f"\n[6] Saving to MahQuranApp...")
+    with open(OUTPUT_TIMING, 'w', encoding='utf-8') as f:
+        json.dump(refined, f, ensure_ascii=False, indent=2)
+    print(f"    Saved: {OUTPUT_TIMING}")
+    # Show sample
+    print("\n[7] Sample refined entries:")
+    for entry in refined[:5]:
+        tj = entry.get('tajweed', 'None')
+        ph = entry.get('physics', '-')
+        sc = entry.get('score', '-')
+        print(f"    {entry['char']}: {tj} | physics={ph} score={sc}")
+    print("\n" + "=" * 60)
+    print("✓ Done! Test in MahQuranApp with:")
+    print(f"  letter_timing_91_physics.json")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

surah_91_test.py ADDED Viewed

	@@ -0,0 +1,297 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Surah 91 (Ash-Shams) Physics Test
+Tests the complete Tajweed physics system on Abdul Basit's recitation.
+This validates all 10 physics validators on real Quranic audio.
+Usage:
+    cd /Documents/26apps/tajweedsst
+    source venv/bin/activate
+    python3 surah_91_test.py
+"""
+import json
+import sys
+import os
+import numpy as np
+from pathlib import Path
+from dataclasses import asdict
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent))
+from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
+from src.physics_validator import PhysicsValidator, ValidationStatus
+from src.duration_model import DurationModel, MaddType
+# Check for librosa
+try:
+    import librosa
+    HAS_LIBROSA = True
+except ImportError:
+    HAS_LIBROSA = False
+    print("Warning: librosa not installed. Some tests will be skipped.")
+# Paths
+MAHQURAN_PATH = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+VERSES_PATH = MAHQURAN_PATH / "public/data/verses_v4.json"
+AUDIO_PATH = MAHQURAN_PATH / "public/audio/abdul_basit/surah_091.mp3"
+TIMING_PATH = MAHQURAN_PATH / "public/data/abdul_basit/letter_timing_91.json"
+OUTPUT_PATH = Path(__file__).parent / "output/surah_91_physics.json"
+def load_surah_91_text():
+    """Load Surah 91 text from verses_v4.json"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    surah_91 = data.get('91', [])
+    verses = []
+    for verse in surah_91:
+        verses.append({
+            'ayah': verse['ayah'],
+            'text': verse['text'].strip(),
+            'translation': verse.get('translation', ''),
+        })
+    return verses
+def load_timing_data():
+    """Load existing letter timing data"""
+    with open(TIMING_PATH, 'r', encoding='utf-8') as f:
+        return json.load(f)
+def load_audio():
+    """Load audio file"""
+    if not HAS_LIBROSA:
+        return None, 22050
+    print(f"    Loading audio from: {AUDIO_PATH}")
+    audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
+    print(f"    Duration: {len(audio)/sr:.1f}s")
+    return audio, sr
+def analyze_with_physics(verses, timing_data, audio, sr):
+    """Analyze letters with physics validators"""
+    parser = TajweedParser()
+    physics = PhysicsValidator(sample_rate=sr)
+    duration_model = DurationModel()
+    # Parse all verses for Tajweed rules
+    all_tags = []
+    for verse in verses:
+        word_tags = parser.parse_text(verse['text'])
+        for word_tag in word_tags:
+            for letter in word_tag.letters:
+                all_tags.append({
+                    'char': letter.char_visual,
+                    'phonetic': letter.char_phonetic,
+                    'tajweed_type': letter.tajweed_type.value,
+                    'physics_check': letter.physics_check.value,
+                    'madd_count': letter.madd_count
+                })
+    # Calibrate duration model from timing data
+    short_vowels = []
+    for entry in timing_data:
+        duration = entry['end'] - entry['start']
+        if 0.05 <= duration <= 0.15:  # Short vowel range
+            short_vowels.append(duration)
+    if short_vowels:
+        duration_model.calibrate_from_samples("Abdul_Basit", short_vowels)
+        print(f"    Calibrated harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
+    # Run physics validation on each letter
+    results = []
+    physics_stats = {
+        'total': 0,
+        'validated': 0,
+        'passed': 0,
+        'marginal': 0,
+        'failed': 0,
+        'skipped': 0
+    }
+    # Match timing entries with Tajweed tags
+    for i, entry in enumerate(timing_data):
+        if i >= len(all_tags):
+            break
+        tag = all_tags[i]
+        start = entry['start']
+        end = entry['end']
+        duration = end - start
+        result = {
+            'idx': i,
+            'char': entry['char'],
+            'start': start,
+            'end': end,
+            'duration_ms': duration * 1000,
+            'tajweed_type': tag['tajweed_type'],
+            'physics_check': tag['physics_check']
+        }
+        physics_stats['total'] += 1
+        # Skip if no physics check needed or no audio
+        if tag['physics_check'] == 'None' or audio is None:
+            result['validation'] = 'not_required'
+            results.append(result)
+            continue
+        physics_stats['validated'] += 1
+        # Run appropriate validator
+        check_type = tag['physics_check']
+        try:
+            if check_type == 'Check_RMS_Bounce':
+                # Qalqalah
+                val_result = physics.validate_qalqalah(audio, start, end)
+                result['metric'] = 'RMS Bounce'
+                result['profile'] = val_result.rms_profile if hasattr(val_result, 'rms_profile') else ''
+            elif check_type == 'Check_Duration':
+                # Madd
+                madd_count = tag['madd_count'] if tag['madd_count'] > 0 else 2
+                val_result = physics.validate_madd(audio, start, end, madd_count)
+                result['metric'] = 'Duration'
+                result['ratio'] = val_result.ratio if hasattr(val_result, 'ratio') else 0
+            elif check_type == 'Check_Ghunnah':
+                # Ghunnah/Ikhfa/Iqlab
+                if tag['tajweed_type'] == 'Ikhfa':
+                    val_result = physics.validate_ikhfa(audio, start, end)
+                elif tag['tajweed_type'] == 'Iqlab':
+                    val_result = physics.validate_iqlab(audio, start, end)
+                else:
+                    val_result = physics.validate_ghunnah(audio, start, end)
+                result['metric'] = 'Nasal'
+            elif check_type == 'Check_Formant_F2':
+                # Tafkheem
+                val_result = physics.validate_tafkheem(audio, start, end)
+                result['metric'] = 'F2 Formant'
+            else:
+                val_result = None
+            if val_result:
+                result['status'] = val_result.status.value
+                result['score'] = val_result.score
+                if val_result.status == ValidationStatus.PASS:
+                    physics_stats['passed'] += 1
+                elif val_result.status == ValidationStatus.MARGINAL:
+                    physics_stats['marginal'] += 1
+                elif val_result.status == ValidationStatus.FAIL:
+                    physics_stats['failed'] += 1
+                else:
+                    physics_stats['skipped'] += 1
+            else:
+                result['status'] = 'unknown'
+                result['score'] = 0
+        except Exception as e:
+            result['status'] = 'error'
+            result['error'] = str(e)
+            physics_stats['skipped'] += 1
+        results.append(result)
+    return results, physics_stats, duration_model
+def main():
+    print("=" * 60)
+    print("TajweedSST - Surah 91 (Ash-Shams) Physics Test")
+    print("=" * 60)
+    # Step 1: Load data
+    print("\n[1] Loading Surah 91 data...")
+    verses = load_surah_91_text()
+    print(f"    Verses: {len(verses)}")
+    print(f"    First verse: {verses[0]['text'][:40]}...")
+    timing_data = load_timing_data()
+    print(f"    Timing entries: {len(timing_data)}")
+    # Step 2: Load audio
+    print("\n[2] Loading audio...")
+    audio, sr = load_audio()
+    # Step 3: Run physics analysis
+    print("\n[3] Running physics validation...")
+    results, stats, duration_model = analyze_with_physics(verses, timing_data, audio, sr)
+    # Step 4: Print statistics
+    print("\n[4] Physics Validation Statistics:")
+    print(f"    Total letters: {stats['total']}")
+    print(f"    Validated: {stats['validated']}")
+    print(f"    ✓ Passed: {stats['passed']}")
+    print(f"    ~ Marginal: {stats['marginal']}")
+    print(f"    ✗ Failed: {stats['failed']}")
+    print(f"    ⊘ Skipped: {stats['skipped']}")
+    if stats['validated'] > 0:
+        pass_rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
+        print(f"\n    Pass Rate: {pass_rate:.1f}%")
+    # Step 5: Show samples of each Tajweed type
+    print("\n[5] Sample Results by Tajweed Type:")
+    tajweed_samples = {}
+    for r in results:
+        tj_type = r['tajweed_type']
+        if tj_type != 'None' and tj_type not in tajweed_samples:
+            tajweed_samples[tj_type] = r
+    for tj_type, sample in tajweed_samples.items():
+        status = sample.get('status', 'N/A')
+        score = sample.get('score', 0)
+        char = sample['char']
+        print(f"    {tj_type}:")
+        print(f"      Letter: {char}, Status: {status}, Score: {score:.2f}")
+    # Step 6: Duration analysis
+    print("\n[6] Duration Model Calibration:")
+    if duration_model.calibration:
+        print(f"    Reciter: {duration_model.calibration.reciter_name}")
+        print(f"    Harakat base: {duration_model.calibration.harakat_base_ms:.1f}ms")
+        print(f"    Sample size: {duration_model.calibration.sample_size}")
+    # Step 7: Save results
+    print("\n[7] Saving results...")
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    output = {
+        'surah': 91,
+        'name': 'Ash-Shams',
+        'name_arabic': 'الشمس',
+        'statistics': stats,
+        'calibration': {
+            'harakat_ms': duration_model.calibration.harakat_base_ms if duration_model.calibration else 100
+        },
+        'results': results
+    }
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+    print(f"    Saved: {OUTPUT_PATH}")
+    print("\n" + "=" * 60)
+    print("✓ Physics Test Complete!")
+    print("=" * 60)
+    return output
+if __name__ == "__main__":
+    main()

tests/test_alignment_engine.py ADDED Viewed

	@@ -0,0 +1,224 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Alignment Engine Unit Tests
+Tests word and phoneme timing accuracy:
+- WhisperX word alignment
+- MFA phoneme alignment
+- Phoneme normalization within word boundaries
+- Mock alignment for testing without models
+"""
+import pytest
+import os
+import sys
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
+from alignment_engine import (
+    AlignmentEngine,
+    MockAlignmentEngine,
+    PhonemeAlignment,
+    WordAlignment,
+    AlignmentResult
+)
+class TestDataclasses:
+    """Test alignment data structures"""
+    def test_phoneme_alignment(self):
+        """PhonemeAlignment stores timing correctly"""
+        pa = PhonemeAlignment(phoneme="ب", start=0.0, end=0.1, duration=0.1)
+        assert pa.phoneme == "ب"
+        assert pa.duration == 0.1
+    def test_phoneme_normalized_duration(self):
+        """Normalized duration calculation"""
+        pa = PhonemeAlignment(phoneme="ا", start=0.0, end=0.2, duration=0.2)
+        # normalized_duration is a property
+        assert pa.normalized_duration == 0.2
+    def test_word_alignment(self):
+        """WordAlignment stores word and phonemes"""
+        wa = WordAlignment(
+            word_text="بسم",
+            whisper_start=0.0,
+            whisper_end=0.5,
+            phonemes=[
+                PhonemeAlignment("ب", 0.0, 0.15, 0.15),
+                PhonemeAlignment("س", 0.15, 0.35, 0.20),
+                PhonemeAlignment("م", 0.35, 0.5, 0.15),
+            ]
+        )
+        assert wa.word_text == "بسم"
+        assert len(wa.phonemes) == 3
+        assert wa.whisper_duration == 0.5
+    def test_alignment_result(self):
+        """AlignmentResult stores full alignment"""
+        ar = AlignmentResult(
+            audio_path="/path/to/audio.wav",
+            surah=91,
+            ayah=1,
+            words=[]
+        )
+        assert ar.surah == 91
+        assert ar.ayah == 1
+class TestMockAlignmentEngine:
+    """Test mock alignment for development without models"""
+    @pytest.fixture
+    def mock_engine(self):
+        return MockAlignmentEngine()
+    def test_mock_align_returns_result(self, mock_engine):
+        """Mock alignment returns AlignmentResult"""
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=["b i s m", "a l l a h"],
+            surah=1,
+            ayah=1
+        )
+        assert isinstance(result, AlignmentResult)
+    def test_mock_align_word_count(self, mock_engine):
+        """Mock alignment produces correct word count"""
+        phonetic_words = ["b i s m", "a l l a h", "a r r a h m a n"]
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=phonetic_words,
+            surah=1,
+            ayah=1
+        )
+        assert len(result.words) == len(phonetic_words)
+    def test_mock_align_phoneme_generation(self, mock_engine):
+        """Mock alignment generates phonemes for each word"""
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=["b i s m"],
+            surah=1,
+            ayah=1
+        )
+        # "b i s m" should produce ~4 phonemes
+        assert len(result.words[0].phonemes) >= 3
+    def test_mock_align_timing_monotonic(self, mock_engine):
+        """Mock timing should be monotonically increasing"""
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=["word1", "word2", "word3"],
+            surah=1,
+            ayah=1
+        )
+        prev_end = 0.0
+        for word in result.words:
+            assert word.whisper_start >= prev_end, "Word start before previous end"
+            prev_end = word.whisper_end
+class TestTimingMonotonicity:
+    """Test that timing never goes backwards"""
+    @pytest.fixture
+    def mock_engine(self):
+        return MockAlignmentEngine()
+    def test_word_timing_monotonic(self, mock_engine):
+        """Word-level timing is strictly increasing"""
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=["w1", "w2", "w3", "w4", "w5"],
+            surah=1,
+            ayah=1
+        )
+        for i in range(1, len(result.words)):
+            prev = result.words[i-1]
+            curr = result.words[i]
+            assert curr.whisper_start >= prev.whisper_end, \
+                f"Word {i} starts ({curr.whisper_start}) before word {i-1} ends ({prev.whisper_end})"
+    def test_phoneme_timing_monotonic(self, mock_engine):
+        """Phoneme-level timing is strictly increasing within words"""
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=["a l r a h m a n"],
+            surah=1,
+            ayah=1
+        )
+        for word in result.words:
+            for i in range(1, len(word.phonemes)):
+                prev = word.phonemes[i-1]
+                curr = word.phonemes[i]
+                assert curr.start >= prev.end, \
+                    f"Phoneme {curr.phoneme} starts before {prev.phoneme} ends"
+class TestPhonemeNormalization:
+    """Test phoneme duration normalization"""
+    def test_phonemes_fit_word_boundary(self):
+        """Normalized phonemes should fit exactly in word boundaries"""
+        word = WordAlignment(
+            word_text="test",
+            whisper_start=1.0,
+            whisper_end=2.0,
+            phonemes=[
+                PhonemeAlignment("t", 1.0, 1.25, 0.25),
+                PhonemeAlignment("e", 1.25, 1.5, 0.25),
+                PhonemeAlignment("s", 1.5, 1.75, 0.25),
+                PhonemeAlignment("t", 1.75, 2.0, 0.25),
+            ]
+        )
+        # First phoneme should start at word start
+        assert word.phonemes[0].start == word.whisper_start
+        # Last phoneme should end at word end
+        assert word.phonemes[-1].end == word.whisper_end
+    def test_phonemes_cover_word_duration(self):
+        """Phoneme durations should sum to word duration"""
+        word = WordAlignment(
+            word_text="test",
+            whisper_start=0.0,
+            whisper_end=1.0,
+            phonemes=[
+                PhonemeAlignment("a", 0.0, 0.333, 0.333),
+                PhonemeAlignment("b", 0.333, 0.666, 0.333),
+                PhonemeAlignment("c", 0.666, 1.0, 0.334),
+            ]
+        )
+        total_phoneme_duration = sum(p.duration for p in word.phonemes)
+        word_duration = word.whisper_duration
+        # Allow small floating point error
+        assert abs(total_phoneme_duration - word_duration) < 0.01
+class TestArabicPhonemes:
+    """Test Arabic-specific phoneme handling"""
+    @pytest.fixture
+    def mock_engine(self):
+        return MockAlignmentEngine()
+    def test_arabic_phonetic_transcription(self, mock_engine):
+        """Engine handles Arabic phonetic transcription"""
+        result = mock_engine.align(
+            audio_path="/fake/path.wav",
+            phonetic_words=["b i s m i", "a l l aa h i"],  # Arabic transliteration
+            surah=1,
+            ayah=1
+        )
+        assert len(result.words) == 2
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

tests/test_physics_validator.py ADDED Viewed

	@@ -0,0 +1,303 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Physics Validator Unit Tests
+Tests all Tajweed acoustic validation rules:
+- Qalqalah (bounce)
+- Madd (elongation)
+- Ghunnah (nasalization)
+- Tafkheem (heavy letters)
+- Idgham (assimilation)
+- Ikhfa (concealment)
+- Iqlab (conversion)
+- Izhar (clarity)
+"""
+import pytest
+import numpy as np
+import os
+import sys
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
+from physics_validator import (
+    PhysicsValidator,
+    ValidationStatus,
+    PhysicsResult,
+    QalqalahResult,
+    MaddResult
+)
+class TestPhysicsValidatorInit:
+    """Test initialization and configuration"""
+    def test_default_init(self):
+        """Validator initializes with default sample rate"""
+        pv = PhysicsValidator()
+        assert pv.sample_rate == 22050
+        assert pv._average_vowel_duration > 0
+    def test_custom_sample_rate(self):
+        """Validator accepts custom sample rate"""
+        pv = PhysicsValidator(sample_rate=16000)
+        assert pv.sample_rate == 16000
+    def test_thresholds_exist(self):
+        """All Tajweed thresholds are defined"""
+        pv = PhysicsValidator()
+        assert hasattr(pv, 'QALQALAH_DIP_THRESHOLD')
+        assert hasattr(pv, 'MADD_RATIO_ASLI')
+        assert hasattr(pv, 'MADD_RATIO_WAJIB')
+        assert hasattr(pv, 'MADD_RATIO_LAZIM')
+class TestQalqalahValidation:
+    """Test Qalqalah (echo/bounce) detection"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def sample_audio(self):
+        """Generate test audio: silence -> speech -> silence (qalqalah pattern)"""
+        sr = 22050
+        duration = 0.5  # 500ms
+        t = np.linspace(0, duration, int(sr * duration))
+        # Create dip-spike pattern typical of qalqalah
+        envelope = np.ones_like(t)
+        # Dip at 30-40%
+        envelope[int(0.3*len(t)):int(0.4*len(t))] = 0.1
+        # Spike at 40-50%
+        envelope[int(0.4*len(t)):int(0.5*len(t))] = 1.5
+        signal = envelope * np.sin(2 * np.pi * 200 * t)
+        return signal.astype(np.float32)
+    def test_qalqalah_returns_physics_result(self, validator, sample_audio):
+        """Qalqalah validation returns PhysicsResult"""
+        result = validator.validate_qalqalah(sample_audio, 0.0, 0.5)
+        # Result type is QalqalahResult which inherits from PhysicsResult
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'metric_name')
+    def test_qalqalah_detects_dip_spike(self, validator, sample_audio):
+        """Qalqalah validator detects dip-spike pattern"""
+        result = validator.validate_qalqalah(sample_audio, 0.0, 0.5)
+        # Should at least have a score
+        assert result.score >= 0
+    def test_qalqalah_short_segment_handles_gracefully(self, validator):
+        """Very short segments should be handled gracefully"""
+        short_audio = np.zeros(100, dtype=np.float32)  # ~4.5ms at 22050
+        result = validator.validate_qalqalah(short_audio, 0.0, 0.005)
+        # Should not crash, status can be FAIL or SKIPPED
+        assert result.status in [ValidationStatus.SKIPPED, ValidationStatus.FAIL]
+class TestMaddValidation:
+    """Test Madd (elongation) detection"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def vowel_audio(self):
+        """Generate sustained vowel-like audio"""
+        sr = 22050
+        duration = 0.4  # 400ms (should be ~2 counts)
+        t = np.linspace(0, duration, int(sr * duration))
+        signal = np.sin(2 * np.pi * 200 * t)
+        return signal.astype(np.float32)
+    def test_madd_returns_physics_result(self, validator, vowel_audio):
+        """Madd validation returns PhysicsResult"""
+        result = validator.validate_madd(vowel_audio, 0.0, 0.4, expected_count=2)
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'score')
+    def test_madd_asli_duration(self, validator, vowel_audio):
+        """Madd Asli (2 counts) should pass for ~400ms vowel"""
+        result = validator.validate_madd(vowel_audio, 0.0, 0.4, expected_count=2)
+        # Natural madd is 2 counts
+        assert result.score >= 0
+class TestGhunnahValidation:
+    """Test Ghunnah (nasalization) detection"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def nasal_audio(self):
+        """Generate nasal-like audio with limited bandwidth"""
+        sr = 22050
+        duration = 0.3
+        t = np.linspace(0, duration, int(sr * duration))
+        # Low frequency nasal resonance
+        signal = np.sin(2 * np.pi * 300 * t) + 0.5 * np.sin(2 * np.pi * 500 * t)
+        return signal.astype(np.float32)
+    def test_ghunnah_returns_physics_result(self, validator, nasal_audio):
+        """Ghunnah validation returns PhysicsResult"""
+        result = validator.validate_ghunnah(nasal_audio, 0.0, 0.3)
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'score')
+class TestTafkheemValidation:
+    """Test Tafkheem (heavy letter) detection via F2 formant"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def heavy_audio(self):
+        """Generate audio with low F2 characteristic"""
+        sr = 22050
+        duration = 0.2
+        t = np.linspace(0, duration, int(sr * duration))
+        # Lower frequency components for "heavy" sound
+        signal = np.sin(2 * np.pi * 150 * t) + 0.3 * np.sin(2 * np.pi * 1000 * t)
+        return signal.astype(np.float32)
+    def test_tafkheem_returns_physics_result(self, validator, heavy_audio):
+        """Tafkheem validation returns PhysicsResult"""
+        result = validator.validate_tafkheem(heavy_audio, 0.0, 0.2)
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'score')
+class TestIdghamValidation:
+    """Test Idgham (assimilation) detection"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def merged_audio(self):
+        """Generate smoothly merged audio (no boundary)"""
+        sr = 22050
+        duration = 0.4
+        t = np.linspace(0, duration, int(sr * duration))
+        signal = np.sin(2 * np.pi * 200 * t)
+        return signal.astype(np.float32)
+    def test_idgham_returns_physics_result(self, validator, merged_audio):
+        """Idgham validation returns PhysicsResult"""
+        result = validator.validate_idgham(merged_audio, 0.0, 0.2, 0.4, has_ghunnah=True)
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'score')
+class TestIkhfaValidation:
+    """Test Ikhfa (concealment) detection"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def concealed_audio(self):
+        """Generate gradually fading nasal audio"""
+        sr = 22050
+        duration = 0.3
+        t = np.linspace(0, duration, int(sr * duration))
+        envelope = np.exp(-3 * t / duration)  # Fading
+        signal = envelope * np.sin(2 * np.pi * 300 * t)
+        return signal.astype(np.float32)
+    def test_ikhfa_returns_physics_result(self, validator, concealed_audio):
+        """Ikhfa validation returns PhysicsResult"""
+        result = validator.validate_ikhfa(concealed_audio, 0.0, 0.3)
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'score')
+class TestIzharValidation:
+    """Test Izhar (clear pronunciation) detection"""
+    @pytest.fixture
+    def validator(self):
+        return PhysicsValidator()
+    @pytest.fixture
+    def clear_audio(self):
+        """Generate audio with clear boundary between sounds"""
+        sr = 22050
+        duration = 0.4
+        t = np.linspace(0, duration, int(sr * duration))
+        signal = np.zeros_like(t)
+        # First letter
+        signal[:len(t)//2] = np.sin(2 * np.pi * 200 * t[:len(t)//2])
+        # Gap (silence)
+        # Second letter
+        signal[int(0.55*len(t)):] = np.sin(2 * np.pi * 300 * t[int(0.55*len(t)):])
+        return signal.astype(np.float32)
+    def test_izhar_returns_physics_result(self, validator, clear_audio):
+        """Izhar validation returns PhysicsResult"""
+        result = validator.validate_izhar(clear_audio, 0.0, 0.2, 0.22)
+        assert hasattr(result, 'status')
+        assert hasattr(result, 'score')
+class TestValidationResults:
+    """Test result dataclasses"""
+    def test_physics_result_fields(self):
+        """PhysicsResult has all required fields"""
+        result = PhysicsResult(
+            status=ValidationStatus.PASS,
+            metric_name="test",
+            expected_pattern="dip-spike",
+            observed_pattern="dip-spike",
+            score=0.95
+        )
+        assert result.status == ValidationStatus.PASS
+        assert result.score == 0.95
+    def test_qalqalah_result_fields(self):
+        """QalqalahResult has specific fields"""
+        # QalqalahResult inherits from PhysicsResult and has extra fields
+        from physics_validator import QalqalahResult, ValidationStatus
+        result = QalqalahResult(
+            status=ValidationStatus.PASS,
+            metric_name="RMS Energy",
+            expected_pattern="dip_then_spike",
+            observed_pattern="dip_then_spike",
+            score=0.8,
+            rms_profile="dip-spike",
+            dip_depth=0.3,
+            spike_height=1.5,
+            closure_duration_ms=50
+        )
+        assert result.dip_depth == 0.3
+        assert result.spike_height == 1.5
+    def test_madd_result_fields(self):
+        """MaddResult has duration fields"""
+        from physics_validator import MaddResult, ValidationStatus
+        result = MaddResult(
+            status=ValidationStatus.PASS,
+            metric_name="Duration Ratio",
+            expected_pattern="extended",
+            observed_pattern="extended",
+            score=1.0,
+            actual_duration_ms=400,
+            expected_duration_ms=400,
+            ratio=1.0
+        )
+        assert result.ratio == 1.0
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

tests/test_pipeline.py ADDED Viewed

	@@ -0,0 +1,118 @@

+#!/usr/bin/env python3
+"""
+TajweedSST - Pipeline Integration Tests
+Tests the full alignment pipeline end-to-end:
+- Text parsing → Alignment → Physics Validation
+"""
+import pytest
+import os
+import sys
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
+from alignment_engine import MockAlignmentEngine, AlignmentResult
+class TestFullPipeline:
+    """Integration tests for complete pipeline"""
+    @pytest.fixture
+    def mock_engine(self):
+        return MockAlignmentEngine()
+    def test_surah_91_ayah_1(self, mock_engine):
+        """Test alignment for Surah 91, Ayah 1: والشمس وضحاها"""
+        phonetic_words = [
+            "w a l sh sh a m s i",
+            "w a D u H aa h aa"
+        ]
+        result = mock_engine.align(
+            audio_path="/path/to/surah_91_ayah_1.wav",
+            phonetic_words=phonetic_words,
+            surah=91,
+            ayah=1
+        )
+        assert result.surah == 91
+        assert result.ayah == 1
+        assert len(result.words) == 2
+        # Verify monotonicity
+        for i in range(1, len(result.words)):
+            assert result.words[i].whisper_start >= result.words[i-1].whisper_end
+    def test_grapheme_count_matches(self, mock_engine):
+        """Total graphemes should match input"""
+        phonetic_words = ["a b c", "d e f g"]  # 7 phonemes total
+        result = mock_engine.align(
+            audio_path="/fake.wav",
+            phonetic_words=phonetic_words,
+            surah=1,
+            ayah=1
+        )
+        total_phonemes = sum(len(w.phonemes) for w in result.words)
+        # Each space-separated token should become a phoneme
+        expected = sum(len(w.split()) for w in phonetic_words)
+        assert total_phonemes >= expected - 2  # Allow some variance
+class TestTimingRegression:
+    """Tests to catch timing regressions"""
+    @pytest.fixture
+    def mock_engine(self):
+        return MockAlignmentEngine()
+    def test_no_negative_durations(self, mock_engine):
+        """No phoneme should have negative duration"""
+        result = mock_engine.align(
+            audio_path="/fake.wav",
+            phonetic_words=["a b c d e f g h i j"],
+            surah=1,
+            ayah=1
+        )
+        for word in result.words:
+            for phoneme in word.phonemes:
+                assert phoneme.duration >= 0, \
+                    f"Negative duration: {phoneme.phoneme} = {phoneme.duration}"
+    def test_no_zero_duration_phonemes(self, mock_engine):
+        """Phonemes should have positive duration"""
+        result = mock_engine.align(
+            audio_path="/fake.wav",
+            phonetic_words=["test word"],
+            surah=1,
+            ayah=1
+        )
+        for word in result.words:
+            for phoneme in word.phonemes:
+                assert phoneme.duration > 0, \
+                    f"Zero duration phoneme: {phoneme.phoneme}"
+    def test_no_overlapping_phonemes(self, mock_engine):
+        """Phonemes within a word should not overlap"""
+        result = mock_engine.align(
+            audio_path="/fake.wav",
+            phonetic_words=["a l r a h m a n"],
+            surah=1,
+            ayah=1
+        )
+        for word in result.words:
+            for i in range(1, len(word.phonemes)):
+                prev = word.phonemes[i-1]
+                curr = word.phonemes[i]
+                assert curr.start >= prev.end, \
+                    f"Overlap: {prev.phoneme} ({prev.end}) > {curr.phoneme} ({curr.start})"
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

whisperx_align_90.py ADDED Viewed

	@@ -0,0 +1,140 @@

+#!/usr/bin/env python3
+"""
+WhisperX Forced Alignment for Surah 90 (Al-Balad)
+Uses wav2vec2 to FORCE align the known Quran text to the audio.
+This gives perfect letter timing since we provide the exact text upfront.
+Based on MahQuranApp/scripts/whisperx_forced_align.py
+"""
+import os
+import json
+import torch
+import whisperx
+from pathlib import Path
+# Monkeypatch torch.load for PyTorch 2.6+ compatibility
+try:
+    from omegaconf import OmegaConf
+    from omegaconf.listconfig import ListConfig
+    from omegaconf.dictconfig import DictConfig
+    from omegaconf.base import ContainerMetadata
+    torch.serialization.add_safe_globals([ListConfig, DictConfig, ContainerMetadata])
+    print("Added OmegaConf to torch safe globals.")
+except ImportError:
+    print("OmegaConf not found, using aggressive torch.load patch.")
+original_load = torch.load
+def safe_load(*args, **kwargs):
+    kwargs['weights_only'] = False
+    return original_load(*args, **kwargs)
+torch.load = safe_load
+# Configuration
+SURAH_NUM = 90
+PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
+AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_090.mp3"
+OUTPUT_DIR = PROJECT_ROOT / "public/data"
+VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
+DEVICE = "cpu"  # Use CPU for compatibility
+def get_surah_text():
+    """Get Surah 90 text from verses_v4.json"""
+    with open(VERSES_PATH, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    text = ' '.join(v['text'] for v in data[str(SURAH_NUM)])
+    return text
+def main():
+    print("=" * 60)
+    print(f"WhisperX FORCED ALIGNMENT for Surah {SURAH_NUM} (Al-Balad)")
+    print("Using known Quran text for direct wav2vec2 alignment")
+    print("=" * 60)
+    # 1. Check audio exists
+    if not AUDIO_PATH.exists():
+        print(f"ERROR: Audio not found at {AUDIO_PATH}")
+        return
+    # 2. Get Quran text
+    quran_text = get_surah_text()
+    print(f"\nQuran text ({len(quran_text)} chars):")
+    print(quran_text[:100] + "...")
+    # 3. Load Alignment Model (wav2vec2)
+    print("\nLoading wav2vec2 alignment model (Arabic)...")
+    model_a, metadata = whisperx.load_align_model(language_code="ar", device=DEVICE)
+    print("Alignment model loaded.")
+    # 4. Load Audio
+    print("Loading audio...")
+    audio = whisperx.load_audio(str(AUDIO_PATH))
+    audio_duration = len(audio) / 16000  # Assuming 16kHz sample rate
+    print(f"Audio duration: {audio_duration:.2f}s")
+    # 5. Create "fake" segments from the known Quran text
+    # WhisperX's align() function expects segments with 'text', 'start', 'end'
+    # We provide the full Quran text as a single segment spanning the entire audio
+    print("\nCreating forced alignment segment from Quran text...")
+    segments = [{
+        "text": quran_text,
+        "start": 0.0,
+        "end": audio_duration
+    }]
+    # 6. Force Align
+    print("Performing FORCED ALIGNMENT with wav2vec2...")
+    result = whisperx.align(
+        segments,
+        model_a,
+        metadata,
+        audio,
+        DEVICE,
+        return_char_alignments=True
+    )
+    # 7. Extract character-level timing (SECONDS format for MahQuranApp)
+    print("\nExtracting character timings...")
+    output_timing = []
+    idx = 0
+    for seg in result.get("segments", []):
+        if "chars" in seg:
+            for ch in seg["chars"]:
+                char = ch.get("char", "")
+                start = ch.get("start", 0)
+                end = ch.get("end", 0)
+                # Skip spaces
+                if char.isspace():
+                    continue
+                output_timing.append({
+                    "char": char,
+                    "start": round(start, 3),  # seconds
+                    "end": round(end, 3),
+                    "idx": idx
+                })
+                idx += 1
+    print(f"Got {len(output_timing)} characters with timing")
+    # 8. Save output
+    output_path = OUTPUT_DIR / f"letter_timing_{SURAH_NUM}.json"
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(output_timing, f, ensure_ascii=False, indent=2)
+    print(f"\nSaved to {output_path}")
+    # Print first 20 for verification
+    print("\n=== First 20 characters ===")
+    for e in output_timing[:20]:
+        dur_ms = (e['end'] - e['start']) * 1000
+        print(f"  {e['idx']:3d}: '{e['char']}' @ {e['start']:.3f}s - {e['end']:.3f}s ({dur_ms:.0f}ms)")
+    print("\n" + "=" * 60)
+    print("✓ Forced alignment complete!")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

whisperx_surah90.py ADDED Viewed

	@@ -0,0 +1,118 @@

+#!/usr/bin/env python3
+"""
+Generate new precision timing for Surah 90 using faster-whisper
+Uses faster-whisper directly (which WhisperX wraps) to avoid pyannote VAD issues.
+"""
+import json
+from pathlib import Path
+from faster_whisper import WhisperModel
+# Audio path
+AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
+VERSES_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/verses_v4.json"
+OUTPUT_PATH = Path(__file__).parent / "output/surah_90_new.json"
+def run_alignment():
+    print("=" * 60)
+    print("Faster-Whisper Alignment - Surah 90")
+    print("=" * 60)
+    # Load model
+    print("\n[1] Loading Whisper model (large-v3)...")
+    model = WhisperModel("large-v3", device="cpu", compute_type="int8")
+    # Transcribe with word timestamps
+    print(f"\n[2] Transcribing: {AUDIO_PATH}")
+    segments, info = model.transcribe(
+        AUDIO_PATH,
+        language="ar",
+        word_timestamps=True,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500)
+    )
+    print(f"    Language: {info.language} (prob: {info.language_probability:.2f})")
+    print(f"    Duration: {info.duration:.1f}s")
+    # Extract word and character timing
+    print("\n[3] Extracting letter timing...")
+    letter_timing = []
+    global_idx = 0
+    all_segments = list(segments)
+    print(f"    Segments: {len(all_segments)}")
+    for segment in all_segments:
+        if segment.words:
+            for word in segment.words:
+                word_text = word.word.strip()
+                word_start = word.start
+                word_end = word.end
+                # Distribute timing across characters
+                chars = list(word_text)
+                if chars:
+                    char_duration = (word_end - word_start) / len(chars)
+                    for i, char in enumerate(chars):
+                        char_start = word_start + (i * char_duration)
+                        char_end = char_start + char_duration
+                        letter_timing.append({
+                            "char": char,
+                            "start": round(char_start, 3),
+                            "end": round(char_end, 3),
+                            "idx": global_idx,
+                            "word": word_text,
+                            "source": "faster_whisper"
+                        })
+                        global_idx += 1
+    print(f"    Total letters: {len(letter_timing)}")
+    # Save output
+    print(f"\n[4] Saving to: {OUTPUT_PATH}")
+    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    output_data = {
+        "surah": 90,
+        "name": "Al-Balad",
+        "source": "faster-whisper large-v3",
+        "language": info.language,
+        "language_probability": round(info.language_probability, 3),
+        "duration": round(info.duration, 1),
+        "total_letters": len(letter_timing),
+        "letters": letter_timing
+    }
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(output_data, f, ensure_ascii=False, indent=2)
+    # Save in MahQuranApp format
+    mahquran_format = []
+    for lt in letter_timing:
+        mahquran_format.append({
+            "char": lt["char"],
+            "start": lt["start"],
+            "end": lt["end"],
+            "idx": lt["idx"]
+        })
+    mahquran_path = OUTPUT_PATH.parent / "letter_timing_90_new.json"
+    with open(mahquran_path, 'w', encoding='utf-8') as f:
+        json.dump(mahquran_format, f, ensure_ascii=False, indent=2)
+    print(f"    Also saved: {mahquran_path}")
+    print("\n" + "=" * 60)
+    print("✓ Alignment complete!")
+    print("=" * 60)
+    # Show sample
+    print("\nSample (first 10 letters):")
+    for lt in letter_timing[:10]:
+        print(f"  [{lt['char']}] {lt['start']:.3f}s - {lt['end']:.3f}s  ({lt['word']})")
+    return letter_timing
+if __name__ == "__main__":
+    run_alignment()