Upload folder using huggingface_hub
Browse files- .gitignore +24 -0
- .pytest_cache/.gitignore +2 -0
- .pytest_cache/CACHEDIR.TAG +4 -0
- .pytest_cache/README.md +8 -0
- .pytest_cache/v/cache/lastfailed +3 -0
- .pytest_cache/v/cache/nodeids +37 -0
- README.md +138 -0
- align_graphemes.py +202 -0
- batch_align_all.py +226 -0
- ctc_align_90.py +127 -0
- ctc_align_90_physics.py +259 -0
- ctc_align_91.py +259 -0
- physics_analyzer.py +281 -0
- physics_analyzer_v2.py +370 -0
- physics_analyzer_v3.py +542 -0
- requirements.txt +21 -0
- src/__init__.py +35 -0
- src/alignment_engine.py +407 -0
- src/duration_model.py +311 -0
- src/lisan_phonemes.json +438 -0
- src/mfa_refiner.py +419 -0
- src/physics_validator.py +930 -0
- src/pipeline.py +334 -0
- src/tajweed_parser.py +334 -0
- surah_90_test.py +241 -0
- surah_91_full_pipeline.py +213 -0
- surah_91_test.py +297 -0
- tests/test_alignment_engine.py +224 -0
- tests/test_physics_validator.py +303 -0
- tests/test_pipeline.py +118 -0
- whisperx_align_90.py +140 -0
- whisperx_surah90.py +118 -0
.gitignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
venv/
|
| 8 |
+
env/
|
| 9 |
+
.venv/
|
| 10 |
+
ENV/
|
| 11 |
+
|
| 12 |
+
# IDE
|
| 13 |
+
.vscode/
|
| 14 |
+
.idea/
|
| 15 |
+
*.swp
|
| 16 |
+
|
| 17 |
+
# Output
|
| 18 |
+
output/
|
| 19 |
+
*.json
|
| 20 |
+
!src/*.json
|
| 21 |
+
|
| 22 |
+
# OS
|
| 23 |
+
.DS_Store
|
| 24 |
+
Thumbs.db
|
.pytest_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by pytest automatically.
|
| 2 |
+
*
|
.pytest_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
| 2 |
+
# This file is a cache directory tag created by pytest.
|
| 3 |
+
# For information about cache directory tags, see:
|
| 4 |
+
# https://bford.info/cachedir/spec.html
|
.pytest_cache/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pytest cache directory #
|
| 2 |
+
|
| 3 |
+
This directory contains data from the pytest's cache plugin,
|
| 4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
| 5 |
+
|
| 6 |
+
**Do not** commit this to version control.
|
| 7 |
+
|
| 8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
.pytest_cache/v/cache/lastfailed
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_short_segment_skipped": true
|
| 3 |
+
}
|
.pytest_cache/v/cache/nodeids
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"tests/test_alignment_engine.py::TestArabicPhonemes::test_arabic_phonetic_transcription",
|
| 3 |
+
"tests/test_alignment_engine.py::TestDataclasses::test_alignment_result",
|
| 4 |
+
"tests/test_alignment_engine.py::TestDataclasses::test_phoneme_alignment",
|
| 5 |
+
"tests/test_alignment_engine.py::TestDataclasses::test_phoneme_normalized_duration",
|
| 6 |
+
"tests/test_alignment_engine.py::TestDataclasses::test_word_alignment",
|
| 7 |
+
"tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_phoneme_generation",
|
| 8 |
+
"tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_returns_result",
|
| 9 |
+
"tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_timing_monotonic",
|
| 10 |
+
"tests/test_alignment_engine.py::TestMockAlignmentEngine::test_mock_align_word_count",
|
| 11 |
+
"tests/test_alignment_engine.py::TestPhonemeNormalization::test_phonemes_cover_word_duration",
|
| 12 |
+
"tests/test_alignment_engine.py::TestPhonemeNormalization::test_phonemes_fit_word_boundary",
|
| 13 |
+
"tests/test_alignment_engine.py::TestTimingMonotonicity::test_phoneme_timing_monotonic",
|
| 14 |
+
"tests/test_alignment_engine.py::TestTimingMonotonicity::test_word_timing_monotonic",
|
| 15 |
+
"tests/test_physics_validator.py::TestGhunnahValidation::test_ghunnah_returns_physics_result",
|
| 16 |
+
"tests/test_physics_validator.py::TestIdghamValidation::test_idgham_returns_physics_result",
|
| 17 |
+
"tests/test_physics_validator.py::TestIkhfaValidation::test_ikhfa_returns_physics_result",
|
| 18 |
+
"tests/test_physics_validator.py::TestIzharValidation::test_izhar_returns_physics_result",
|
| 19 |
+
"tests/test_physics_validator.py::TestMaddValidation::test_madd_asli_duration",
|
| 20 |
+
"tests/test_physics_validator.py::TestMaddValidation::test_madd_returns_physics_result",
|
| 21 |
+
"tests/test_physics_validator.py::TestPhysicsValidatorInit::test_custom_sample_rate",
|
| 22 |
+
"tests/test_physics_validator.py::TestPhysicsValidatorInit::test_default_init",
|
| 23 |
+
"tests/test_physics_validator.py::TestPhysicsValidatorInit::test_thresholds_exist",
|
| 24 |
+
"tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_detects_dip_spike",
|
| 25 |
+
"tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_returns_physics_result",
|
| 26 |
+
"tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_short_segment_handles_gracefully",
|
| 27 |
+
"tests/test_physics_validator.py::TestQalqalahValidation::test_qalqalah_short_segment_skipped",
|
| 28 |
+
"tests/test_physics_validator.py::TestTafkheemValidation::test_tafkheem_returns_physics_result",
|
| 29 |
+
"tests/test_physics_validator.py::TestValidationResults::test_madd_result_fields",
|
| 30 |
+
"tests/test_physics_validator.py::TestValidationResults::test_physics_result_fields",
|
| 31 |
+
"tests/test_physics_validator.py::TestValidationResults::test_qalqalah_result_fields",
|
| 32 |
+
"tests/test_pipeline.py::TestFullPipeline::test_grapheme_count_matches",
|
| 33 |
+
"tests/test_pipeline.py::TestFullPipeline::test_surah_91_ayah_1",
|
| 34 |
+
"tests/test_pipeline.py::TestTimingRegression::test_no_negative_durations",
|
| 35 |
+
"tests/test_pipeline.py::TestTimingRegression::test_no_overlapping_phonemes",
|
| 36 |
+
"tests/test_pipeline.py::TestTimingRegression::test_no_zero_duration_phonemes"
|
| 37 |
+
]
|
README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TajweedSST — Quranic Letter-Level Alignment & Tajweed Physics Engine
|
| 2 |
+
|
| 3 |
+
> CTC Forced Alignment + Acoustic Physics Validation for Quranic Recitation
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
TajweedSST is a Python pipeline that produces **letter-level timing data** for Quranic recitation audio. It combines **wav2vec2 CTC forced alignment** with **acoustic physics validation** (Tajweed rules) to generate timing files consumed by [MahQuranApp](https://github.com/ihyatafsir/MahQuranApp) for real-time letter highlighting.
|
| 8 |
+
|
| 9 |
+
## Pipeline Architecture
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 13 |
+
│ TajweedSST Pipeline │
|
| 14 |
+
│ │
|
| 15 |
+
│ 1. CTC Forced Alignment (wav2vec2) │
|
| 16 |
+
│ └─ Word-level timestamps from audio │
|
| 17 |
+
│ │
|
| 18 |
+
│ 2. Character Expansion │
|
| 19 |
+
│ └─ Word timestamps → individual character timing │
|
| 20 |
+
│ │
|
| 21 |
+
│ 3. Grapheme Matching │
|
| 22 |
+
│ └─ Merge base + diacritics to match App.tsx rendering │
|
| 23 |
+
│ │
|
| 24 |
+
│ 4. Tajweed Parsing │
|
| 25 |
+
│ └─ Map letters to Tajweed rules (Qalqalah, Ghunnah..) │
|
| 26 |
+
│ │
|
| 27 |
+
│ 5. Physics Validation │
|
| 28 |
+
│ └─ RMS bounce, duration, formant analysis │
|
| 29 |
+
│ │
|
| 30 |
+
│ 6. Export to MahQuranApp format │
|
| 31 |
+
│ └─ JSON with idx, char, ayah, start(ms), end, wordIdx │
|
| 32 |
+
└─────────────────────────────────────────────────────────────┘
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
## Quick Start
|
| 36 |
+
|
| 37 |
+
### Prerequisites
|
| 38 |
+
|
| 39 |
+
```bash
|
| 40 |
+
cd /path/to/tajweedsst
|
| 41 |
+
python3 -m venv venv
|
| 42 |
+
source venv/bin/activate
|
| 43 |
+
pip install torch torchaudio ctc-forced-aligner librosa
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
### Single Surah
|
| 47 |
+
|
| 48 |
+
```bash
|
| 49 |
+
# Align Surah 90 (Al-Balad) for Abdul Basit
|
| 50 |
+
python ctc_align_91.py # Template script
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Batch All Surahs
|
| 54 |
+
|
| 55 |
+
```bash
|
| 56 |
+
# Process all 114 surahs for Abdul Basit
|
| 57 |
+
python batch_align_all.py
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
## Output Format
|
| 61 |
+
|
| 62 |
+
Each `letter_timing_XX.json` contains an array of timing entries:
|
| 63 |
+
|
| 64 |
+
```json
|
| 65 |
+
{
|
| 66 |
+
"idx": 0,
|
| 67 |
+
"char": "لَ",
|
| 68 |
+
"ayah": 1,
|
| 69 |
+
"start": 3360,
|
| 70 |
+
"end": 3410,
|
| 71 |
+
"duration": 50,
|
| 72 |
+
"wordIdx": 0,
|
| 73 |
+
"weight": 1.0
|
| 74 |
+
}
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### Fields
|
| 78 |
+
|
| 79 |
+
| Field | Type | Description |
|
| 80 |
+
|-------|------|-------------|
|
| 81 |
+
| `idx` | int | Sequential letter index |
|
| 82 |
+
| `char` | string | Arabic grapheme (base + diacritics) |
|
| 83 |
+
| `ayah` | int | Verse number (1-indexed) |
|
| 84 |
+
| `start` | int | Start time in milliseconds |
|
| 85 |
+
| `end` | int | End time in milliseconds |
|
| 86 |
+
| `duration` | int | Duration in milliseconds |
|
| 87 |
+
| `wordIdx` | int | Word index within the surah |
|
| 88 |
+
| `weight` | float | Confidence weight |
|
| 89 |
+
|
| 90 |
+
## Critical: Grapheme Matching
|
| 91 |
+
|
| 92 |
+
The timing data **must** match the grapheme count produced by MahQuranApp's `splitIntoGraphemes()` function. This function combines base Arabic letters with their following diacritics:
|
| 93 |
+
|
| 94 |
+
**App.tsx Diacritics Set:**
|
| 95 |
+
```
|
| 96 |
+
ً ٌ ٍ َ ُ ِ ّ ْ ٰ ۖ ۗ ۘ ۙ ۚ ۛ ۜ ٔ ٓ ـ
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
Plus Unicode ranges: `0x064B–0x0652` and `0x0610–0x061A`
|
| 100 |
+
|
| 101 |
+
**Example:** The word `لَآ` splits into 2 graphemes: `['لَ', 'آ']`
|
| 102 |
+
|
| 103 |
+
If the timing count doesn't match the grapheme count, highlighting will drift!
|
| 104 |
+
|
| 105 |
+
## Physics Validation
|
| 106 |
+
|
| 107 |
+
TajweedSST validates timing against acoustic physics:
|
| 108 |
+
|
| 109 |
+
| Rule | Check | Method |
|
| 110 |
+
|------|-------|--------|
|
| 111 |
+
| Qalqalah | RMS dip + spike | Envelope analysis |
|
| 112 |
+
| Ghunnah | Nasal duration | Duration measurement |
|
| 113 |
+
| Madd | Extended vowel | Duration ratio |
|
| 114 |
+
| Tafkheem | Heavy articulation | Formant F2 analysis |
|
| 115 |
+
|
| 116 |
+
## Project Structure
|
| 117 |
+
|
| 118 |
+
```
|
| 119 |
+
tajweedsst/
|
| 120 |
+
├── src/
|
| 121 |
+
│ ├── tajweed_parser.py # Tajweed rule detection
|
| 122 |
+
│ ├── physics_validator.py # Acoustic validation
|
| 123 |
+
│ └── duration_model.py # Duration calibration
|
| 124 |
+
├── tests/ # 34 unit/integration tests
|
| 125 |
+
├── ctc_align_90.py # Single surah alignment
|
| 126 |
+
├── ctc_align_91.py # Template with physics
|
| 127 |
+
├── batch_align_all.py # Batch all surahs
|
| 128 |
+
└── README.md
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
## Reciter Support
|
| 132 |
+
|
| 133 |
+
Currently supported:
|
| 134 |
+
- **Abdul Basit** (114 surahs)
|
| 135 |
+
|
| 136 |
+
## License
|
| 137 |
+
|
| 138 |
+
MIT
|
align_graphemes.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Grapheme-Aligned Timing Generator for Surah 91
|
| 4 |
+
|
| 5 |
+
This script:
|
| 6 |
+
1. Reads verse text from verses_v4.json and extracts graphemes (exactly as MahQuranApp does)
|
| 7 |
+
2. Reads the original timing and maps it to the grapheme count
|
| 8 |
+
3. Outputs timing with exactly the right number of entries
|
| 9 |
+
|
| 10 |
+
The key is: timing entries must match the grapheme count from verse.words[].arabic
|
| 11 |
+
"""
|
| 12 |
+
import json
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
# Config
|
| 16 |
+
SURAH = 91
|
| 17 |
+
PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 18 |
+
VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
|
| 19 |
+
TIMING_PATH = PROJECT_ROOT / "public/data/abdul_basit_original/letter_timing_91.json"
|
| 20 |
+
OUTPUT_PATH = PROJECT_ROOT / "public/data/abdul_basit/letter_timing_91_aligned.json"
|
| 21 |
+
|
| 22 |
+
# Arabic diacritics (same as MahQuranApp App.tsx)
|
| 23 |
+
DIACRITICS = set('ًٌٍَُِّْٰۖۗۘۙۚۛۜٔٓـ')
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def split_graphemes(text: str) -> list[str]:
|
| 27 |
+
"""Split Arabic text into graphemes (base letter + following diacritics)
|
| 28 |
+
This matches the splitIntoGraphemes function in MahQuranApp"""
|
| 29 |
+
graphemes = []
|
| 30 |
+
current = ''
|
| 31 |
+
|
| 32 |
+
for ch in text:
|
| 33 |
+
is_diacritic = (ch in DIACRITICS or
|
| 34 |
+
(0x064B <= ord(ch) <= 0x0652) or
|
| 35 |
+
(0x0610 <= ord(ch) <= 0x061A))
|
| 36 |
+
|
| 37 |
+
if ch == ' ':
|
| 38 |
+
if current:
|
| 39 |
+
graphemes.append(current)
|
| 40 |
+
current = ''
|
| 41 |
+
elif is_diacritic and current:
|
| 42 |
+
current += ch
|
| 43 |
+
else:
|
| 44 |
+
if current:
|
| 45 |
+
graphemes.append(current)
|
| 46 |
+
current = ch
|
| 47 |
+
|
| 48 |
+
if current:
|
| 49 |
+
graphemes.append(current)
|
| 50 |
+
|
| 51 |
+
return graphemes
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_all_graphemes(surah_num: int) -> list[dict]:
|
| 55 |
+
"""Extract all graphemes from verse text, exactly as MahQuranApp renders them"""
|
| 56 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 57 |
+
verses = json.load(f).get(str(surah_num), [])
|
| 58 |
+
|
| 59 |
+
all_graphemes = []
|
| 60 |
+
word_idx = 0
|
| 61 |
+
|
| 62 |
+
for verse in verses:
|
| 63 |
+
ayah = verse.get('ayah', 0)
|
| 64 |
+
words = verse.get('words', [])
|
| 65 |
+
|
| 66 |
+
for word in words:
|
| 67 |
+
arabic = word.get('arabic', '')
|
| 68 |
+
graphemes = split_graphemes(arabic)
|
| 69 |
+
|
| 70 |
+
for g in graphemes:
|
| 71 |
+
all_graphemes.append({
|
| 72 |
+
'char': g,
|
| 73 |
+
'ayah': ayah,
|
| 74 |
+
'wordIdx': word_idx
|
| 75 |
+
})
|
| 76 |
+
|
| 77 |
+
word_idx += 1
|
| 78 |
+
|
| 79 |
+
return all_graphemes
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def strip_diacritics(text: str) -> str:
|
| 83 |
+
"""Remove diacritics from Arabic text"""
|
| 84 |
+
return ''.join(ch for ch in text if ch not in DIACRITICS and not (0x064B <= ord(ch) <= 0x0652))
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def is_standalone_diacritic(char: str) -> bool:
|
| 88 |
+
"""Check if char is a standalone diacritic"""
|
| 89 |
+
if len(char) != 1:
|
| 90 |
+
return False
|
| 91 |
+
return char in DIACRITICS or (0x064B <= ord(char) <= 0x0652)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def distribute_timing(graphemes: list[dict], original_timing: list[dict]) -> list[dict]:
|
| 95 |
+
"""Map original timing to graphemes by matching base letters, skipping diacritics"""
|
| 96 |
+
if not original_timing:
|
| 97 |
+
return []
|
| 98 |
+
|
| 99 |
+
# First, filter out standalone diacritics from original timing
|
| 100 |
+
# and merge their duration into the previous letter
|
| 101 |
+
filtered_timing = []
|
| 102 |
+
for entry in original_timing:
|
| 103 |
+
char = entry['char']
|
| 104 |
+
if is_standalone_diacritic(char):
|
| 105 |
+
# Merge duration into previous entry
|
| 106 |
+
if filtered_timing:
|
| 107 |
+
filtered_timing[-1]['end'] = entry['end']
|
| 108 |
+
filtered_timing[-1]['duration'] = filtered_timing[-1]['end'] - filtered_timing[-1]['start']
|
| 109 |
+
else:
|
| 110 |
+
filtered_timing.append(dict(entry)) # Copy
|
| 111 |
+
|
| 112 |
+
print(f" (Filtered timing: {len(filtered_timing)} base letters)")
|
| 113 |
+
|
| 114 |
+
aligned_timing = []
|
| 115 |
+
orig_idx = 0
|
| 116 |
+
|
| 117 |
+
for i, g in enumerate(graphemes):
|
| 118 |
+
grapheme_char = g['char']
|
| 119 |
+
base_letter = strip_diacritics(grapheme_char)
|
| 120 |
+
|
| 121 |
+
# Try to find matching original timing entry by base letter
|
| 122 |
+
matched = None
|
| 123 |
+
search_start = max(0, orig_idx - 2)
|
| 124 |
+
search_end = min(len(filtered_timing), orig_idx + 10) # Search wider
|
| 125 |
+
|
| 126 |
+
for j in range(search_start, search_end):
|
| 127 |
+
orig_char = filtered_timing[j]['char']
|
| 128 |
+
orig_base = strip_diacritics(orig_char)
|
| 129 |
+
if orig_base == base_letter or orig_char in grapheme_char or base_letter in orig_char:
|
| 130 |
+
matched = filtered_timing[j]
|
| 131 |
+
orig_idx = j + 1
|
| 132 |
+
break
|
| 133 |
+
|
| 134 |
+
if not matched and orig_idx < len(filtered_timing):
|
| 135 |
+
# Fallback: use next available timing
|
| 136 |
+
matched = filtered_timing[orig_idx]
|
| 137 |
+
orig_idx += 1
|
| 138 |
+
|
| 139 |
+
if matched:
|
| 140 |
+
aligned_timing.append({
|
| 141 |
+
'idx': i,
|
| 142 |
+
'char': grapheme_char,
|
| 143 |
+
'ayah': g['ayah'],
|
| 144 |
+
'start': matched['start'],
|
| 145 |
+
'end': matched['end'],
|
| 146 |
+
'duration': matched.get('duration', matched['end'] - matched['start']),
|
| 147 |
+
'wordIdx': g['wordIdx'],
|
| 148 |
+
'weight': matched.get('weight', 1.0)
|
| 149 |
+
})
|
| 150 |
+
else:
|
| 151 |
+
# Last resort: estimate from previous
|
| 152 |
+
if aligned_timing:
|
| 153 |
+
prev = aligned_timing[-1]
|
| 154 |
+
aligned_timing.append({
|
| 155 |
+
'idx': i,
|
| 156 |
+
'char': grapheme_char,
|
| 157 |
+
'ayah': g['ayah'],
|
| 158 |
+
'start': prev['end'],
|
| 159 |
+
'end': prev['end'] + 100,
|
| 160 |
+
'duration': 100,
|
| 161 |
+
'wordIdx': g['wordIdx'],
|
| 162 |
+
'weight': 1.0
|
| 163 |
+
})
|
| 164 |
+
|
| 165 |
+
return aligned_timing
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def main():
|
| 169 |
+
print("=" * 60)
|
| 170 |
+
print(f"Grapheme-Aligned Timing Generator: Surah {SURAH}")
|
| 171 |
+
print("=" * 60)
|
| 172 |
+
|
| 173 |
+
# Get graphemes from verse text
|
| 174 |
+
graphemes = get_all_graphemes(SURAH)
|
| 175 |
+
print(f"\n[1] Graphemes from verse text: {len(graphemes)}")
|
| 176 |
+
|
| 177 |
+
# Load original timing
|
| 178 |
+
with open(TIMING_PATH, 'r', encoding='utf-8') as f:
|
| 179 |
+
original_timing = json.load(f)
|
| 180 |
+
print(f"[2] Original timing entries: {len(original_timing)}")
|
| 181 |
+
|
| 182 |
+
# Distribute timing to graphemes
|
| 183 |
+
aligned_timing = distribute_timing(graphemes, original_timing)
|
| 184 |
+
print(f"[3] Aligned timing entries: {len(aligned_timing)}")
|
| 185 |
+
|
| 186 |
+
# Save
|
| 187 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 188 |
+
json.dump(aligned_timing, f, ensure_ascii=False, indent=2)
|
| 189 |
+
print(f"\n[4] Saved: {OUTPUT_PATH}")
|
| 190 |
+
|
| 191 |
+
# Show sample
|
| 192 |
+
print("\n=== First 10 graphemes ===")
|
| 193 |
+
for t in aligned_timing[:10]:
|
| 194 |
+
print(f" {t['idx']:3d}: '{t['char']}' @ {t['start']}-{t['end']}ms (ayah={t['ayah']})")
|
| 195 |
+
|
| 196 |
+
print("\n" + "=" * 60)
|
| 197 |
+
print("✓ Done! Copy to letter_timing_91.json to test")
|
| 198 |
+
print("=" * 60)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
if __name__ == "__main__":
|
| 202 |
+
main()
|
batch_align_all.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Batch CTC Alignment for All Abdul Basit Surahs
|
| 4 |
+
Processes all 114 surahs with the full pipeline:
|
| 5 |
+
1. CTC forced alignment (wav2vec2)
|
| 6 |
+
2. Grapheme matching (App.tsx compatible)
|
| 7 |
+
3. Export to MahQuranApp format
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
cd /Documents/26apps/tajweedsst
|
| 11 |
+
source venv/bin/activate
|
| 12 |
+
python batch_align_all.py
|
| 13 |
+
"""
|
| 14 |
+
import json
|
| 15 |
+
import sys
|
| 16 |
+
import time
|
| 17 |
+
import torch
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from ctc_forced_aligner import (
|
| 20 |
+
load_audio,
|
| 21 |
+
load_alignment_model,
|
| 22 |
+
generate_emissions,
|
| 23 |
+
preprocess_text,
|
| 24 |
+
get_alignments,
|
| 25 |
+
get_spans,
|
| 26 |
+
postprocess_results,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Config
|
| 30 |
+
PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 31 |
+
VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
|
| 32 |
+
OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
|
| 33 |
+
AUDIO_DIR = PROJECT_ROOT / "public/audio/abdul_basit"
|
| 34 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 35 |
+
BATCH_SIZE = 4
|
| 36 |
+
|
| 37 |
+
# Exact same DIACRITICS as App.tsx line 176
|
| 38 |
+
DIACRITICS = set(['ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٰ', 'ۖ', 'ۗ', 'ۘ', 'ۙ', 'ۚ', 'ۛ', 'ۜ', 'ٔ', 'ٓ', 'ـ'])
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def is_diacritic(ch):
|
| 42 |
+
"""Match App.tsx splitIntoGraphemes exactly"""
|
| 43 |
+
return ch in DIACRITICS or (0x064B <= ord(ch) <= 0x0652) or (0x0610 <= ord(ch) <= 0x061A)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def split_into_graphemes(text):
|
| 47 |
+
"""Exact same logic as App.tsx splitIntoGraphemes"""
|
| 48 |
+
graphemes = []
|
| 49 |
+
current = ''
|
| 50 |
+
for ch in text:
|
| 51 |
+
if ch == ' ':
|
| 52 |
+
if current:
|
| 53 |
+
graphemes.append(current)
|
| 54 |
+
current = ''
|
| 55 |
+
elif is_diacritic(ch) and current:
|
| 56 |
+
current += ch
|
| 57 |
+
else:
|
| 58 |
+
if current:
|
| 59 |
+
graphemes.append(current)
|
| 60 |
+
current = ch
|
| 61 |
+
if current:
|
| 62 |
+
graphemes.append(current)
|
| 63 |
+
return graphemes
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def load_quran_text(all_verses, surah_num):
|
| 67 |
+
"""Load Quran text for a surah"""
|
| 68 |
+
verses = all_verses.get(str(surah_num), [])
|
| 69 |
+
return ' '.join(v.get('text', '') for v in verses)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_grapheme_list(all_verses, surah_num):
|
| 73 |
+
"""Get graphemes with ayah info matching App.tsx rendering"""
|
| 74 |
+
verses = all_verses.get(str(surah_num), [])
|
| 75 |
+
grapheme_list = []
|
| 76 |
+
for v in verses:
|
| 77 |
+
for word in v['text'].split():
|
| 78 |
+
for g in split_into_graphemes(word):
|
| 79 |
+
grapheme_list.append({'char': g, 'ayah': v['ayah']})
|
| 80 |
+
return grapheme_list
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def process_surah(surah_num, alignment_model, alignment_tokenizer, all_verses):
|
| 84 |
+
"""Process a single surah through the full pipeline"""
|
| 85 |
+
audio_path = AUDIO_DIR / f"surah_{surah_num:03d}.mp3"
|
| 86 |
+
output_path = OUTPUT_DIR / f"letter_timing_{surah_num}.json"
|
| 87 |
+
|
| 88 |
+
if not audio_path.exists():
|
| 89 |
+
return None, "No audio file"
|
| 90 |
+
|
| 91 |
+
text = load_quran_text(all_verses, surah_num)
|
| 92 |
+
if not text.strip():
|
| 93 |
+
return None, "No verse text"
|
| 94 |
+
|
| 95 |
+
grapheme_list = get_grapheme_list(all_verses, surah_num)
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
# Step 1: Load audio
|
| 99 |
+
audio_waveform = load_audio(str(audio_path), alignment_model.dtype, alignment_model.device)
|
| 100 |
+
|
| 101 |
+
# Step 2: Generate CTC emissions
|
| 102 |
+
emissions, stride = generate_emissions(
|
| 103 |
+
alignment_model, audio_waveform, batch_size=BATCH_SIZE
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Step 3: Preprocess text
|
| 107 |
+
tokens_starred, text_starred = preprocess_text(
|
| 108 |
+
text, romanize=True, language="ara",
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Step 4: Get alignments
|
| 112 |
+
segments, scores, blank_token = get_alignments(
|
| 113 |
+
emissions, tokens_starred, alignment_tokenizer,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
# Step 5: Get spans & post-process
|
| 117 |
+
spans = get_spans(tokens_starred, segments, blank_token)
|
| 118 |
+
word_timestamps = postprocess_results(text_starred, spans, stride, scores)
|
| 119 |
+
|
| 120 |
+
# Step 6: Expand to character-level
|
| 121 |
+
char_timings = []
|
| 122 |
+
for wt in word_timestamps:
|
| 123 |
+
word = wt['text']
|
| 124 |
+
start = wt['start']
|
| 125 |
+
end = wt['end']
|
| 126 |
+
duration = end - start
|
| 127 |
+
char_dur = duration / len(word) if word else 0
|
| 128 |
+
for i, char in enumerate(word):
|
| 129 |
+
if not char.isspace():
|
| 130 |
+
char_timings.append({
|
| 131 |
+
'start': start + i * char_dur,
|
| 132 |
+
'end': start + (i + 1) * char_dur,
|
| 133 |
+
})
|
| 134 |
+
|
| 135 |
+
# Step 7: Map CTC chars to graphemes
|
| 136 |
+
timing = []
|
| 137 |
+
ci = 0
|
| 138 |
+
for gi, ginfo in enumerate(grapheme_list):
|
| 139 |
+
g = ginfo['char']
|
| 140 |
+
s, e = None, None
|
| 141 |
+
for _ in range(len(g)):
|
| 142 |
+
if ci < len(char_timings):
|
| 143 |
+
if s is None:
|
| 144 |
+
s = int(char_timings[ci]['start'] * 1000)
|
| 145 |
+
e = int(char_timings[ci]['end'] * 1000)
|
| 146 |
+
ci += 1
|
| 147 |
+
if s is None:
|
| 148 |
+
s = timing[-1]['end'] if timing else 0
|
| 149 |
+
e = s + 100
|
| 150 |
+
|
| 151 |
+
timing.append({
|
| 152 |
+
'idx': gi,
|
| 153 |
+
'char': g,
|
| 154 |
+
'ayah': ginfo['ayah'],
|
| 155 |
+
'start': s,
|
| 156 |
+
'end': e,
|
| 157 |
+
'duration': e - s,
|
| 158 |
+
'wordIdx': gi // 4,
|
| 159 |
+
'weight': 1.0
|
| 160 |
+
})
|
| 161 |
+
|
| 162 |
+
# Save
|
| 163 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 164 |
+
json.dump(timing, f, ensure_ascii=False, indent=2)
|
| 165 |
+
|
| 166 |
+
return len(timing), f"OK ({len(grapheme_list)} graphemes)"
|
| 167 |
+
|
| 168 |
+
except Exception as ex:
|
| 169 |
+
return None, f"Error: {ex}"
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def main():
|
| 173 |
+
start_time = time.time()
|
| 174 |
+
print("=" * 60)
|
| 175 |
+
print("Batch CTC Alignment - Abdul Basit (All 114 Surahs)")
|
| 176 |
+
print(f"Device: {DEVICE}")
|
| 177 |
+
print("=" * 60)
|
| 178 |
+
|
| 179 |
+
# Load model once
|
| 180 |
+
print("\n[1] Loading wav2vec alignment model...")
|
| 181 |
+
alignment_model, alignment_tokenizer = load_alignment_model(
|
| 182 |
+
DEVICE,
|
| 183 |
+
dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 184 |
+
)
|
| 185 |
+
print(" Model loaded.")
|
| 186 |
+
|
| 187 |
+
# Load all verses
|
| 188 |
+
print("[2] Loading verses...")
|
| 189 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 190 |
+
all_verses = json.load(f)
|
| 191 |
+
print(f" Loaded {len(all_verses)} surahs")
|
| 192 |
+
|
| 193 |
+
# Process each surah
|
| 194 |
+
results = []
|
| 195 |
+
for surah_num in range(1, 115):
|
| 196 |
+
elapsed = time.time() - start_time
|
| 197 |
+
print(f"\n[Surah {surah_num:03d}/114] ({elapsed:.0f}s elapsed)...")
|
| 198 |
+
|
| 199 |
+
count, status = process_surah(
|
| 200 |
+
surah_num, alignment_model, alignment_tokenizer, all_verses
|
| 201 |
+
)
|
| 202 |
+
results.append((surah_num, count, status))
|
| 203 |
+
|
| 204 |
+
if count:
|
| 205 |
+
print(f" ✓ {count} letters - {status}")
|
| 206 |
+
else:
|
| 207 |
+
print(f" ✗ {status}")
|
| 208 |
+
|
| 209 |
+
# Summary
|
| 210 |
+
elapsed = time.time() - start_time
|
| 211 |
+
ok = sum(1 for _, c, _ in results if c)
|
| 212 |
+
fail = sum(1 for _, c, _ in results if not c)
|
| 213 |
+
|
| 214 |
+
print("\n" + "=" * 60)
|
| 215 |
+
print(f"BATCH COMPLETE in {elapsed:.0f}s ({elapsed/60:.1f}min)")
|
| 216 |
+
print(f" ✓ Success: {ok}/114")
|
| 217 |
+
print(f" ✗ Failed: {fail}/114")
|
| 218 |
+
print("=" * 60)
|
| 219 |
+
|
| 220 |
+
# Cleanup
|
| 221 |
+
del alignment_model
|
| 222 |
+
torch.cuda.empty_cache()
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
if __name__ == "__main__":
|
| 226 |
+
main()
|
ctc_align_90.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CTC Forced Aligner for Surah 90 (Al-Balad)
|
| 4 |
+
Uses ctc-forced-aligner v0.3.0 from GitHub for word-level alignment.
|
| 5 |
+
Based on MahQuranApp/scripts/ctc_quran_aligner.py
|
| 6 |
+
"""
|
| 7 |
+
import json
|
| 8 |
+
import torch
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from ctc_forced_aligner import (
|
| 11 |
+
load_audio,
|
| 12 |
+
load_alignment_model,
|
| 13 |
+
generate_emissions,
|
| 14 |
+
preprocess_text,
|
| 15 |
+
get_alignments,
|
| 16 |
+
get_spans,
|
| 17 |
+
postprocess_results,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# Config
|
| 21 |
+
SURAH = 90
|
| 22 |
+
PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 23 |
+
VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
|
| 24 |
+
OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
|
| 25 |
+
AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_090.mp3"
|
| 26 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 27 |
+
BATCH_SIZE = 4
|
| 28 |
+
|
| 29 |
+
def load_quran_text(surah_num: int) -> str:
|
| 30 |
+
"""Load Quran text from verses_v4.json"""
|
| 31 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 32 |
+
all_verses = json.load(f)
|
| 33 |
+
verses = all_verses.get(str(surah_num), [])
|
| 34 |
+
return ' '.join(v.get('text', '') for v in verses)
|
| 35 |
+
|
| 36 |
+
def main():
|
| 37 |
+
print("=" * 60)
|
| 38 |
+
print(f"CTC Forced Aligner for Surah {SURAH} (Al-Balad)")
|
| 39 |
+
print(f"Device: {DEVICE}")
|
| 40 |
+
print("=" * 60)
|
| 41 |
+
|
| 42 |
+
# 1. Load alignment model
|
| 43 |
+
print("\n[1] Loading alignment model...")
|
| 44 |
+
alignment_model, alignment_tokenizer = load_alignment_model(
|
| 45 |
+
DEVICE,
|
| 46 |
+
dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 47 |
+
)
|
| 48 |
+
print(" Model loaded.")
|
| 49 |
+
|
| 50 |
+
# 2. Load audio
|
| 51 |
+
print("\n[2] Loading audio...")
|
| 52 |
+
audio_waveform = load_audio(str(AUDIO_PATH), alignment_model.dtype, alignment_model.device)
|
| 53 |
+
print(f" Audio loaded.")
|
| 54 |
+
|
| 55 |
+
# 3. Get Quran text
|
| 56 |
+
text = load_quran_text(SURAH)
|
| 57 |
+
print(f"\n[3] Text length: {len(text)} chars")
|
| 58 |
+
print(f" First 60: {text[:60]}...")
|
| 59 |
+
|
| 60 |
+
# 4. Generate emissions
|
| 61 |
+
print("\n[4] Generating emissions...")
|
| 62 |
+
emissions, stride = generate_emissions(
|
| 63 |
+
alignment_model, audio_waveform, batch_size=BATCH_SIZE
|
| 64 |
+
)
|
| 65 |
+
print(f" Emissions shape: {emissions.shape}")
|
| 66 |
+
|
| 67 |
+
# 5. Preprocess text
|
| 68 |
+
print("\n[5] Preprocessing text...")
|
| 69 |
+
tokens_starred, text_starred = preprocess_text(
|
| 70 |
+
text,
|
| 71 |
+
romanize=True,
|
| 72 |
+
language="ara",
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# 6. Get alignments
|
| 76 |
+
print("\n[6] Getting alignments...")
|
| 77 |
+
segments, scores, blank_token = get_alignments(
|
| 78 |
+
emissions, tokens_starred, alignment_tokenizer,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# 7. Get spans
|
| 82 |
+
spans = get_spans(tokens_starred, segments, blank_token)
|
| 83 |
+
|
| 84 |
+
# 8. Post-process results
|
| 85 |
+
word_timestamps = postprocess_results(text_starred, spans, stride, scores)
|
| 86 |
+
|
| 87 |
+
print(f" Got {len(word_timestamps)} word alignments")
|
| 88 |
+
|
| 89 |
+
# 9. Convert to character-level timing (seconds format)
|
| 90 |
+
char_timings = []
|
| 91 |
+
for wt in word_timestamps:
|
| 92 |
+
word = wt['text']
|
| 93 |
+
start = wt['start']
|
| 94 |
+
end = wt['end']
|
| 95 |
+
duration = end - start
|
| 96 |
+
char_dur = duration / len(word) if word else 0
|
| 97 |
+
|
| 98 |
+
for i, char in enumerate(word):
|
| 99 |
+
if not char.isspace():
|
| 100 |
+
char_timings.append({
|
| 101 |
+
"char": char,
|
| 102 |
+
"start": round(start + i * char_dur, 3),
|
| 103 |
+
"end": round(start + (i + 1) * char_dur, 3),
|
| 104 |
+
"idx": len(char_timings)
|
| 105 |
+
})
|
| 106 |
+
|
| 107 |
+
print(f"\n[7] Total chars: {len(char_timings)}")
|
| 108 |
+
|
| 109 |
+
# 10. Save output
|
| 110 |
+
output_path = OUTPUT_DIR / f"letter_timing_{SURAH}.json"
|
| 111 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 112 |
+
json.dump(char_timings, f, ensure_ascii=False, indent=2)
|
| 113 |
+
|
| 114 |
+
print(f"\n[8] Saved to {output_path}")
|
| 115 |
+
|
| 116 |
+
# Print first 20 for verification
|
| 117 |
+
print("\n=== First 20 characters ===")
|
| 118 |
+
for ct in char_timings[:20]:
|
| 119 |
+
dur_ms = (ct['end'] - ct['start']) * 1000
|
| 120 |
+
print(f" {ct['idx']:3d}: '{ct['char']}' @ {ct['start']:.3f}s - {ct['end']:.3f}s ({dur_ms:.0f}ms)")
|
| 121 |
+
|
| 122 |
+
print("\n" + "=" * 60)
|
| 123 |
+
print("✓ CTC Alignment complete!")
|
| 124 |
+
print("=" * 60)
|
| 125 |
+
|
| 126 |
+
if __name__ == "__main__":
|
| 127 |
+
main()
|
ctc_align_90_physics.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CTC Forced Aligner + Physics for Surah 91 (Al-Balad)
|
| 4 |
+
Uses ctc-forced-aligner (wav2vec CTC) + TajweedSST physics refinement.
|
| 5 |
+
|
| 6 |
+
Pipeline:
|
| 7 |
+
1. CTC Alignment: wav2vec forced alignment for letter timing
|
| 8 |
+
2. Tajweed Parser: Map letters to Tajweed rules
|
| 9 |
+
3. Physics Validation: Validate with acoustic physics
|
| 10 |
+
4. Export: MahQuranApp format
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
cd /Documents/26apps/tajweedsst
|
| 14 |
+
source venv/bin/activate
|
| 15 |
+
python3 ctc_align_91.py
|
| 16 |
+
"""
|
| 17 |
+
import json
|
| 18 |
+
import torch
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from ctc_forced_aligner import (
|
| 22 |
+
load_audio,
|
| 23 |
+
load_alignment_model,
|
| 24 |
+
generate_emissions,
|
| 25 |
+
preprocess_text,
|
| 26 |
+
get_alignments,
|
| 27 |
+
get_spans,
|
| 28 |
+
postprocess_results,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 32 |
+
from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 33 |
+
from src.physics_validator import PhysicsValidator, ValidationStatus
|
| 34 |
+
from src.duration_model import DurationModel, MaddType
|
| 35 |
+
|
| 36 |
+
import librosa
|
| 37 |
+
|
| 38 |
+
# Config
|
| 39 |
+
SURAH = 90
|
| 40 |
+
PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 41 |
+
VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
|
| 42 |
+
OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
|
| 43 |
+
AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_090.mp3"
|
| 44 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 45 |
+
BATCH_SIZE = 4
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def load_quran_text(surah_num: int) -> str:
|
| 49 |
+
"""Load Quran text from verses_v4.json"""
|
| 50 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 51 |
+
all_verses = json.load(f)
|
| 52 |
+
verses = all_verses.get(str(surah_num), [])
|
| 53 |
+
return ' '.join(v.get('text', '') for v in verses)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def run_ctc_alignment(text: str):
|
| 57 |
+
"""Run CTC forced alignment"""
|
| 58 |
+
print("\n[1] Loading wav2vec alignment model...")
|
| 59 |
+
alignment_model, alignment_tokenizer = load_alignment_model(
|
| 60 |
+
DEVICE,
|
| 61 |
+
dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
print("\n[2] Loading audio...")
|
| 65 |
+
audio_waveform = load_audio(str(AUDIO_PATH), alignment_model.dtype, alignment_model.device)
|
| 66 |
+
|
| 67 |
+
print("\n[3] Generating CTC emissions...")
|
| 68 |
+
emissions, stride = generate_emissions(
|
| 69 |
+
alignment_model, audio_waveform, batch_size=BATCH_SIZE
|
| 70 |
+
)
|
| 71 |
+
print(f" Emissions shape: {emissions.shape}")
|
| 72 |
+
|
| 73 |
+
print("\n[4] Preprocessing text...")
|
| 74 |
+
tokens_starred, text_starred = preprocess_text(
|
| 75 |
+
text,
|
| 76 |
+
romanize=True,
|
| 77 |
+
language="ara",
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
print("\n[5] Getting alignments...")
|
| 81 |
+
segments, scores, blank_token = get_alignments(
|
| 82 |
+
emissions, tokens_starred, alignment_tokenizer,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
spans = get_spans(tokens_starred, segments, blank_token)
|
| 86 |
+
word_timestamps = postprocess_results(text_starred, spans, stride, scores)
|
| 87 |
+
|
| 88 |
+
print(f" Got {len(word_timestamps)} word alignments")
|
| 89 |
+
|
| 90 |
+
# Cleanup GPU
|
| 91 |
+
del alignment_model
|
| 92 |
+
torch.cuda.empty_cache()
|
| 93 |
+
|
| 94 |
+
return word_timestamps
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def convert_to_char_timings(word_timestamps):
|
| 98 |
+
"""Convert word timestamps to character-level timing"""
|
| 99 |
+
char_timings = []
|
| 100 |
+
word_idx = 0
|
| 101 |
+
|
| 102 |
+
for wt in word_timestamps:
|
| 103 |
+
word = wt['text']
|
| 104 |
+
start = wt['start']
|
| 105 |
+
end = wt['end']
|
| 106 |
+
duration = end - start
|
| 107 |
+
char_dur = duration / len(word) if word else 0
|
| 108 |
+
|
| 109 |
+
word_has_chars = False
|
| 110 |
+
for i, char in enumerate(word):
|
| 111 |
+
if not char.isspace():
|
| 112 |
+
word_has_chars = True
|
| 113 |
+
char_timings.append({
|
| 114 |
+
"char": char,
|
| 115 |
+
"start": round(start + i * char_dur, 3),
|
| 116 |
+
"end": round(start + (i + 1) * char_dur, 3),
|
| 117 |
+
"idx": len(char_timings),
|
| 118 |
+
"wordIdx": word_idx
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
if word_has_chars:
|
| 122 |
+
word_idx += 1
|
| 123 |
+
|
| 124 |
+
return char_timings
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def apply_physics(char_timings, text):
|
| 128 |
+
"""Apply Tajweed parsing and physics validation"""
|
| 129 |
+
print("\n[6] Parsing Tajweed rules...")
|
| 130 |
+
parser = TajweedParser()
|
| 131 |
+
|
| 132 |
+
# Get all letter tags
|
| 133 |
+
all_tags = []
|
| 134 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 135 |
+
verses = json.load(f).get(str(SURAH), [])
|
| 136 |
+
|
| 137 |
+
for verse in verses:
|
| 138 |
+
word_tags = parser.parse_text(verse['text'])
|
| 139 |
+
for word_tag in word_tags:
|
| 140 |
+
for letter in word_tag.letters:
|
| 141 |
+
all_tags.append({
|
| 142 |
+
'char': letter.char_visual,
|
| 143 |
+
'tajweed_type': letter.tajweed_type,
|
| 144 |
+
'physics_check': letter.physics_check,
|
| 145 |
+
'madd_count': letter.madd_count
|
| 146 |
+
})
|
| 147 |
+
|
| 148 |
+
print(f" Tajweed tags: {len(all_tags)}")
|
| 149 |
+
|
| 150 |
+
# Load audio for physics
|
| 151 |
+
print("\n[7] Loading audio for physics...")
|
| 152 |
+
audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
|
| 153 |
+
physics = PhysicsValidator(sample_rate=sr)
|
| 154 |
+
duration_model = DurationModel()
|
| 155 |
+
|
| 156 |
+
# Calibrate
|
| 157 |
+
vowels = [t['end'] - t['start'] for t in char_timings if 0.05 <= (t['end'] - t['start']) <= 0.15]
|
| 158 |
+
if vowels:
|
| 159 |
+
duration_model.calibrate_from_samples("Abdul_Basit", vowels)
|
| 160 |
+
print(f" Harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
|
| 161 |
+
|
| 162 |
+
# Apply physics
|
| 163 |
+
print("\n[8] Applying physics validation...")
|
| 164 |
+
stats = {'total': 0, 'validated': 0, 'passed': 0, 'marginal': 0, 'failed': 0}
|
| 165 |
+
|
| 166 |
+
for i, entry in enumerate(char_timings):
|
| 167 |
+
stats['total'] += 1
|
| 168 |
+
|
| 169 |
+
if i < len(all_tags):
|
| 170 |
+
tag = all_tags[i]
|
| 171 |
+
entry['tajweed'] = tag['tajweed_type'].value
|
| 172 |
+
|
| 173 |
+
if tag['physics_check'] != PhysicsCheck.NONE:
|
| 174 |
+
stats['validated'] += 1
|
| 175 |
+
start, end = entry['start'], entry['end']
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
check = tag['physics_check']
|
| 179 |
+
|
| 180 |
+
if check == PhysicsCheck.CHECK_RMS_BOUNCE:
|
| 181 |
+
val = physics.validate_qalqalah(audio, start, end)
|
| 182 |
+
elif check == PhysicsCheck.CHECK_DURATION:
|
| 183 |
+
val = physics.validate_madd(audio, start, end, tag['madd_count'] or 2)
|
| 184 |
+
elif check == PhysicsCheck.CHECK_GHUNNAH:
|
| 185 |
+
val = physics.validate_ghunnah(audio, start, end)
|
| 186 |
+
elif check == PhysicsCheck.CHECK_FORMANT_F2:
|
| 187 |
+
val = physics.validate_tafkheem(audio, start, end)
|
| 188 |
+
else:
|
| 189 |
+
val = None
|
| 190 |
+
|
| 191 |
+
if val:
|
| 192 |
+
entry['physics'] = val.status.value
|
| 193 |
+
entry['score'] = float(round(val.score, 2))
|
| 194 |
+
|
| 195 |
+
if val.status == ValidationStatus.PASS:
|
| 196 |
+
stats['passed'] += 1
|
| 197 |
+
elif val.status == ValidationStatus.MARGINAL:
|
| 198 |
+
stats['marginal'] += 1
|
| 199 |
+
else:
|
| 200 |
+
stats['failed'] += 1
|
| 201 |
+
except Exception:
|
| 202 |
+
pass
|
| 203 |
+
|
| 204 |
+
return char_timings, stats
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def main():
|
| 208 |
+
print("=" * 60)
|
| 209 |
+
print(f"CTC + Physics Pipeline: Surah {SURAH} (Al-Balad)")
|
| 210 |
+
print(f"Device: {DEVICE}")
|
| 211 |
+
print("=" * 60)
|
| 212 |
+
|
| 213 |
+
# Get text
|
| 214 |
+
text = load_quran_text(SURAH)
|
| 215 |
+
print(f"\nText length: {len(text)} chars")
|
| 216 |
+
|
| 217 |
+
# Run CTC alignment
|
| 218 |
+
word_timestamps = run_ctc_alignment(text)
|
| 219 |
+
|
| 220 |
+
# Convert to char timings
|
| 221 |
+
char_timings = convert_to_char_timings(word_timestamps)
|
| 222 |
+
print(f"\n Total chars: {len(char_timings)}")
|
| 223 |
+
|
| 224 |
+
# Apply physics
|
| 225 |
+
char_timings, stats = apply_physics(char_timings, text)
|
| 226 |
+
|
| 227 |
+
# Print stats
|
| 228 |
+
print(f"\n[9] Statistics:")
|
| 229 |
+
print(f" Total: {stats['total']}")
|
| 230 |
+
print(f" Validated: {stats['validated']}")
|
| 231 |
+
print(f" ✓ Passed: {stats['passed']}")
|
| 232 |
+
print(f" ~ Marginal: {stats['marginal']}")
|
| 233 |
+
print(f" ✗ Failed: {stats['failed']}")
|
| 234 |
+
|
| 235 |
+
if stats['validated'] > 0:
|
| 236 |
+
rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
|
| 237 |
+
print(f" Pass Rate: {rate:.1f}%")
|
| 238 |
+
|
| 239 |
+
# Save
|
| 240 |
+
output_path = OUTPUT_DIR / f"letter_timing_{SURAH}_ctc.json"
|
| 241 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 242 |
+
json.dump(char_timings, f, ensure_ascii=False, indent=2)
|
| 243 |
+
print(f"\n[10] Saved: {output_path}")
|
| 244 |
+
|
| 245 |
+
# Show sample
|
| 246 |
+
print("\n=== First 15 characters ===")
|
| 247 |
+
for ct in char_timings[:15]:
|
| 248 |
+
tj = ct.get('tajweed', 'None')
|
| 249 |
+
ph = ct.get('physics', '-')
|
| 250 |
+
print(f" {ct['idx']:3d}: '{ct['char']}' @ {ct['start']:.3f}s | {tj} | {ph}")
|
| 251 |
+
|
| 252 |
+
print("\n" + "=" * 60)
|
| 253 |
+
print("✓ CTC + Physics Pipeline complete!")
|
| 254 |
+
print(f" Output: {output_path}")
|
| 255 |
+
print("=" * 60)
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
if __name__ == "__main__":
|
| 259 |
+
main()
|
ctc_align_91.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CTC Forced Aligner + Physics for Surah 91 (Ash-Shams)
|
| 4 |
+
Uses ctc-forced-aligner (wav2vec CTC) + TajweedSST physics refinement.
|
| 5 |
+
|
| 6 |
+
Pipeline:
|
| 7 |
+
1. CTC Alignment: wav2vec forced alignment for letter timing
|
| 8 |
+
2. Tajweed Parser: Map letters to Tajweed rules
|
| 9 |
+
3. Physics Validation: Validate with acoustic physics
|
| 10 |
+
4. Export: MahQuranApp format
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
cd /Documents/26apps/tajweedsst
|
| 14 |
+
source venv/bin/activate
|
| 15 |
+
python3 ctc_align_91.py
|
| 16 |
+
"""
|
| 17 |
+
import json
|
| 18 |
+
import torch
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from ctc_forced_aligner import (
|
| 22 |
+
load_audio,
|
| 23 |
+
load_alignment_model,
|
| 24 |
+
generate_emissions,
|
| 25 |
+
preprocess_text,
|
| 26 |
+
get_alignments,
|
| 27 |
+
get_spans,
|
| 28 |
+
postprocess_results,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 32 |
+
from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 33 |
+
from src.physics_validator import PhysicsValidator, ValidationStatus
|
| 34 |
+
from src.duration_model import DurationModel, MaddType
|
| 35 |
+
|
| 36 |
+
import librosa
|
| 37 |
+
|
| 38 |
+
# Config
|
| 39 |
+
SURAH = 91
|
| 40 |
+
PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 41 |
+
VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
|
| 42 |
+
OUTPUT_DIR = PROJECT_ROOT / "public/data/abdul_basit"
|
| 43 |
+
AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_091.mp3"
|
| 44 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 45 |
+
BATCH_SIZE = 4
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def load_quran_text(surah_num: int) -> str:
|
| 49 |
+
"""Load Quran text from verses_v4.json"""
|
| 50 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 51 |
+
all_verses = json.load(f)
|
| 52 |
+
verses = all_verses.get(str(surah_num), [])
|
| 53 |
+
return ' '.join(v.get('text', '') for v in verses)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def run_ctc_alignment(text: str):
|
| 57 |
+
"""Run CTC forced alignment"""
|
| 58 |
+
print("\n[1] Loading wav2vec alignment model...")
|
| 59 |
+
alignment_model, alignment_tokenizer = load_alignment_model(
|
| 60 |
+
DEVICE,
|
| 61 |
+
dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
print("\n[2] Loading audio...")
|
| 65 |
+
audio_waveform = load_audio(str(AUDIO_PATH), alignment_model.dtype, alignment_model.device)
|
| 66 |
+
|
| 67 |
+
print("\n[3] Generating CTC emissions...")
|
| 68 |
+
emissions, stride = generate_emissions(
|
| 69 |
+
alignment_model, audio_waveform, batch_size=BATCH_SIZE
|
| 70 |
+
)
|
| 71 |
+
print(f" Emissions shape: {emissions.shape}")
|
| 72 |
+
|
| 73 |
+
print("\n[4] Preprocessing text...")
|
| 74 |
+
tokens_starred, text_starred = preprocess_text(
|
| 75 |
+
text,
|
| 76 |
+
romanize=True,
|
| 77 |
+
language="ara",
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
print("\n[5] Getting alignments...")
|
| 81 |
+
segments, scores, blank_token = get_alignments(
|
| 82 |
+
emissions, tokens_starred, alignment_tokenizer,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
spans = get_spans(tokens_starred, segments, blank_token)
|
| 86 |
+
word_timestamps = postprocess_results(text_starred, spans, stride, scores)
|
| 87 |
+
|
| 88 |
+
print(f" Got {len(word_timestamps)} word alignments")
|
| 89 |
+
|
| 90 |
+
# Cleanup GPU
|
| 91 |
+
del alignment_model
|
| 92 |
+
torch.cuda.empty_cache()
|
| 93 |
+
|
| 94 |
+
return word_timestamps
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def convert_to_char_timings(word_timestamps):
|
| 98 |
+
"""Convert word timestamps to character-level timing"""
|
| 99 |
+
char_timings = []
|
| 100 |
+
word_idx = 0
|
| 101 |
+
|
| 102 |
+
for wt in word_timestamps:
|
| 103 |
+
word = wt['text']
|
| 104 |
+
start = wt['start']
|
| 105 |
+
end = wt['end']
|
| 106 |
+
duration = end - start
|
| 107 |
+
char_dur = duration / len(word) if word else 0
|
| 108 |
+
|
| 109 |
+
word_has_chars = False
|
| 110 |
+
for i, char in enumerate(word):
|
| 111 |
+
if not char.isspace():
|
| 112 |
+
word_has_chars = True
|
| 113 |
+
char_timings.append({
|
| 114 |
+
"char": char,
|
| 115 |
+
"start": round(start + i * char_dur, 3),
|
| 116 |
+
"end": round(start + (i + 1) * char_dur, 3),
|
| 117 |
+
"idx": len(char_timings),
|
| 118 |
+
"wordIdx": word_idx
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
if word_has_chars:
|
| 122 |
+
word_idx += 1
|
| 123 |
+
|
| 124 |
+
return char_timings
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def apply_physics(char_timings, text):
|
| 128 |
+
"""Apply Tajweed parsing and physics validation"""
|
| 129 |
+
print("\n[6] Parsing Tajweed rules...")
|
| 130 |
+
parser = TajweedParser()
|
| 131 |
+
|
| 132 |
+
# Get all letter tags
|
| 133 |
+
all_tags = []
|
| 134 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 135 |
+
verses = json.load(f).get(str(SURAH), [])
|
| 136 |
+
|
| 137 |
+
for verse in verses:
|
| 138 |
+
word_tags = parser.parse_text(verse['text'])
|
| 139 |
+
for word_tag in word_tags:
|
| 140 |
+
for letter in word_tag.letters:
|
| 141 |
+
all_tags.append({
|
| 142 |
+
'char': letter.char_visual,
|
| 143 |
+
'tajweed_type': letter.tajweed_type,
|
| 144 |
+
'physics_check': letter.physics_check,
|
| 145 |
+
'madd_count': letter.madd_count
|
| 146 |
+
})
|
| 147 |
+
|
| 148 |
+
print(f" Tajweed tags: {len(all_tags)}")
|
| 149 |
+
|
| 150 |
+
# Load audio for physics
|
| 151 |
+
print("\n[7] Loading audio for physics...")
|
| 152 |
+
audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
|
| 153 |
+
physics = PhysicsValidator(sample_rate=sr)
|
| 154 |
+
duration_model = DurationModel()
|
| 155 |
+
|
| 156 |
+
# Calibrate
|
| 157 |
+
vowels = [t['end'] - t['start'] for t in char_timings if 0.05 <= (t['end'] - t['start']) <= 0.15]
|
| 158 |
+
if vowels:
|
| 159 |
+
duration_model.calibrate_from_samples("Abdul_Basit", vowels)
|
| 160 |
+
print(f" Harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
|
| 161 |
+
|
| 162 |
+
# Apply physics
|
| 163 |
+
print("\n[8] Applying physics validation...")
|
| 164 |
+
stats = {'total': 0, 'validated': 0, 'passed': 0, 'marginal': 0, 'failed': 0}
|
| 165 |
+
|
| 166 |
+
for i, entry in enumerate(char_timings):
|
| 167 |
+
stats['total'] += 1
|
| 168 |
+
|
| 169 |
+
if i < len(all_tags):
|
| 170 |
+
tag = all_tags[i]
|
| 171 |
+
entry['tajweed'] = tag['tajweed_type'].value
|
| 172 |
+
|
| 173 |
+
if tag['physics_check'] != PhysicsCheck.NONE:
|
| 174 |
+
stats['validated'] += 1
|
| 175 |
+
start, end = entry['start'], entry['end']
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
check = tag['physics_check']
|
| 179 |
+
|
| 180 |
+
if check == PhysicsCheck.CHECK_RMS_BOUNCE:
|
| 181 |
+
val = physics.validate_qalqalah(audio, start, end)
|
| 182 |
+
elif check == PhysicsCheck.CHECK_DURATION:
|
| 183 |
+
val = physics.validate_madd(audio, start, end, tag['madd_count'] or 2)
|
| 184 |
+
elif check == PhysicsCheck.CHECK_GHUNNAH:
|
| 185 |
+
val = physics.validate_ghunnah(audio, start, end)
|
| 186 |
+
elif check == PhysicsCheck.CHECK_FORMANT_F2:
|
| 187 |
+
val = physics.validate_tafkheem(audio, start, end)
|
| 188 |
+
else:
|
| 189 |
+
val = None
|
| 190 |
+
|
| 191 |
+
if val:
|
| 192 |
+
entry['physics'] = val.status.value
|
| 193 |
+
entry['score'] = float(round(val.score, 2))
|
| 194 |
+
|
| 195 |
+
if val.status == ValidationStatus.PASS:
|
| 196 |
+
stats['passed'] += 1
|
| 197 |
+
elif val.status == ValidationStatus.MARGINAL:
|
| 198 |
+
stats['marginal'] += 1
|
| 199 |
+
else:
|
| 200 |
+
stats['failed'] += 1
|
| 201 |
+
except Exception:
|
| 202 |
+
pass
|
| 203 |
+
|
| 204 |
+
return char_timings, stats
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def main():
|
| 208 |
+
print("=" * 60)
|
| 209 |
+
print(f"CTC + Physics Pipeline: Surah {SURAH} (Ash-Shams)")
|
| 210 |
+
print(f"Device: {DEVICE}")
|
| 211 |
+
print("=" * 60)
|
| 212 |
+
|
| 213 |
+
# Get text
|
| 214 |
+
text = load_quran_text(SURAH)
|
| 215 |
+
print(f"\nText length: {len(text)} chars")
|
| 216 |
+
|
| 217 |
+
# Run CTC alignment
|
| 218 |
+
word_timestamps = run_ctc_alignment(text)
|
| 219 |
+
|
| 220 |
+
# Convert to char timings
|
| 221 |
+
char_timings = convert_to_char_timings(word_timestamps)
|
| 222 |
+
print(f"\n Total chars: {len(char_timings)}")
|
| 223 |
+
|
| 224 |
+
# Apply physics
|
| 225 |
+
char_timings, stats = apply_physics(char_timings, text)
|
| 226 |
+
|
| 227 |
+
# Print stats
|
| 228 |
+
print(f"\n[9] Statistics:")
|
| 229 |
+
print(f" Total: {stats['total']}")
|
| 230 |
+
print(f" Validated: {stats['validated']}")
|
| 231 |
+
print(f" ✓ Passed: {stats['passed']}")
|
| 232 |
+
print(f" ~ Marginal: {stats['marginal']}")
|
| 233 |
+
print(f" ✗ Failed: {stats['failed']}")
|
| 234 |
+
|
| 235 |
+
if stats['validated'] > 0:
|
| 236 |
+
rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
|
| 237 |
+
print(f" Pass Rate: {rate:.1f}%")
|
| 238 |
+
|
| 239 |
+
# Save
|
| 240 |
+
output_path = OUTPUT_DIR / f"letter_timing_{SURAH}_ctc.json"
|
| 241 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 242 |
+
json.dump(char_timings, f, ensure_ascii=False, indent=2)
|
| 243 |
+
print(f"\n[10] Saved: {output_path}")
|
| 244 |
+
|
| 245 |
+
# Show sample
|
| 246 |
+
print("\n=== First 15 characters ===")
|
| 247 |
+
for ct in char_timings[:15]:
|
| 248 |
+
tj = ct.get('tajweed', 'None')
|
| 249 |
+
ph = ct.get('physics', '-')
|
| 250 |
+
print(f" {ct['idx']:3d}: '{ct['char']}' @ {ct['start']:.3f}s | {tj} | {ph}")
|
| 251 |
+
|
| 252 |
+
print("\n" + "=" * 60)
|
| 253 |
+
print("✓ CTC + Physics Pipeline complete!")
|
| 254 |
+
print(f" Output: {output_path}")
|
| 255 |
+
print("=" * 60)
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
if __name__ == "__main__":
|
| 259 |
+
main()
|
physics_analyzer.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Physics Wave Analyzer for Surah 90
|
| 4 |
+
|
| 5 |
+
Validates Tajweed rules using actual audio signal processing:
|
| 6 |
+
- Qalqalah: RMS energy dip→spike pattern
|
| 7 |
+
- Madd: Duration verification (2x, 4x, 6x average)
|
| 8 |
+
- Tafkheem: Low-frequency energy presence
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import json
|
| 12 |
+
import numpy as np
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
import librosa
|
| 17 |
+
HAS_LIBROSA = True
|
| 18 |
+
except ImportError:
|
| 19 |
+
HAS_LIBROSA = False
|
| 20 |
+
print("WARNING: librosa not available")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def convert_to_json_safe(obj):
|
| 24 |
+
"""Convert numpy types to JSON-serializable Python types"""
|
| 25 |
+
if isinstance(obj, dict):
|
| 26 |
+
return {k: convert_to_json_safe(v) for k, v in obj.items()}
|
| 27 |
+
elif isinstance(obj, list):
|
| 28 |
+
return [convert_to_json_safe(i) for i in obj]
|
| 29 |
+
elif isinstance(obj, np.floating):
|
| 30 |
+
return float(obj)
|
| 31 |
+
elif isinstance(obj, np.integer):
|
| 32 |
+
return int(obj)
|
| 33 |
+
elif isinstance(obj, np.ndarray):
|
| 34 |
+
return obj.tolist()
|
| 35 |
+
return obj
|
| 36 |
+
|
| 37 |
+
# Paths
|
| 38 |
+
AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
|
| 39 |
+
TIMING_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/letter_timing_90.json"
|
| 40 |
+
OUTPUT_PATH = Path(__file__).parent / "output/surah_90_physics.json"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def load_audio():
|
| 44 |
+
"""Load audio file"""
|
| 45 |
+
print(f"Loading: {AUDIO_PATH}")
|
| 46 |
+
y, sr = librosa.load(AUDIO_PATH, sr=22050)
|
| 47 |
+
duration = len(y) / sr
|
| 48 |
+
print(f" Duration: {duration:.1f}s, Sample rate: {sr}Hz")
|
| 49 |
+
return y, sr
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def load_timing():
|
| 53 |
+
"""Load timing data with Tajweed tags"""
|
| 54 |
+
with open(TIMING_PATH, 'r', encoding='utf-8') as f:
|
| 55 |
+
return json.load(f)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def extract_segment(y, sr, start, end):
|
| 59 |
+
"""Extract audio segment"""
|
| 60 |
+
start_sample = int(start * sr)
|
| 61 |
+
end_sample = int(end * sr)
|
| 62 |
+
return y[start_sample:end_sample]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def analyze_qalqalah(segment, sr):
|
| 66 |
+
"""
|
| 67 |
+
Analyze Qalqalah (bounce) pattern.
|
| 68 |
+
Expected: RMS dip followed by spike at letter end.
|
| 69 |
+
"""
|
| 70 |
+
if len(segment) < 512:
|
| 71 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 72 |
+
|
| 73 |
+
# Calculate RMS energy
|
| 74 |
+
rms = librosa.feature.rms(y=segment, frame_length=256, hop_length=64)[0]
|
| 75 |
+
|
| 76 |
+
if len(rms) < 4:
|
| 77 |
+
return {"status": "INSUFFICIENT_FRAMES", "confidence": 0.0}
|
| 78 |
+
|
| 79 |
+
# Look for dip→spike pattern
|
| 80 |
+
# Divide into thirds
|
| 81 |
+
third = len(rms) // 3
|
| 82 |
+
if third < 1:
|
| 83 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 84 |
+
|
| 85 |
+
first_third = np.mean(rms[:third])
|
| 86 |
+
middle_third = np.mean(rms[third:2*third])
|
| 87 |
+
last_third = np.mean(rms[2*third:])
|
| 88 |
+
|
| 89 |
+
# Qalqalah pattern: middle should dip, end should spike
|
| 90 |
+
has_dip = middle_third < first_third * 0.9
|
| 91 |
+
has_spike = last_third > middle_third * 1.1
|
| 92 |
+
|
| 93 |
+
if has_dip and has_spike:
|
| 94 |
+
confidence = min(1.0, (first_third - middle_third) / first_third + (last_third - middle_third) / last_third)
|
| 95 |
+
return {
|
| 96 |
+
"status": "DETECTED",
|
| 97 |
+
"confidence": round(confidence, 3),
|
| 98 |
+
"pattern": {"first": round(float(first_third), 4), "middle": round(float(middle_third), 4), "last": round(float(last_third), 4)}
|
| 99 |
+
}
|
| 100 |
+
elif has_spike:
|
| 101 |
+
return {"status": "PARTIAL_SPIKE", "confidence": 0.5}
|
| 102 |
+
else:
|
| 103 |
+
return {"status": "NOT_DETECTED", "confidence": 0.2}
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def analyze_madd(segment, sr, expected_count):
|
| 107 |
+
"""
|
| 108 |
+
Analyze Madd (elongation) duration.
|
| 109 |
+
Verify letter duration matches expected count (2, 4, or 6 harakaat).
|
| 110 |
+
"""
|
| 111 |
+
duration_ms = len(segment) / sr * 1000
|
| 112 |
+
|
| 113 |
+
# Average haraka duration ~100-150ms for Tarteel recitation
|
| 114 |
+
base_haraka = 120 # ms
|
| 115 |
+
expected_duration = expected_count * base_haraka
|
| 116 |
+
|
| 117 |
+
ratio = duration_ms / expected_duration if expected_duration > 0 else 0
|
| 118 |
+
|
| 119 |
+
# Allow ±30% tolerance
|
| 120 |
+
if 0.7 <= ratio <= 1.3:
|
| 121 |
+
status = "CORRECT"
|
| 122 |
+
confidence = 1.0 - abs(1.0 - ratio)
|
| 123 |
+
elif 0.5 <= ratio <= 1.5:
|
| 124 |
+
status = "CLOSE"
|
| 125 |
+
confidence = 0.6
|
| 126 |
+
else:
|
| 127 |
+
status = "MISMATCH"
|
| 128 |
+
confidence = 0.3
|
| 129 |
+
|
| 130 |
+
return {
|
| 131 |
+
"status": status,
|
| 132 |
+
"confidence": round(confidence, 3),
|
| 133 |
+
"actual_ms": round(duration_ms, 1),
|
| 134 |
+
"expected_ms": round(expected_duration, 1),
|
| 135 |
+
"ratio": round(ratio, 2)
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def analyze_tafkheem(segment, sr):
|
| 140 |
+
"""
|
| 141 |
+
Analyze Tafkheem (heaviness) - heavy letters have stronger low frequencies.
|
| 142 |
+
"""
|
| 143 |
+
if len(segment) < 1024:
|
| 144 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 145 |
+
|
| 146 |
+
# Compute spectral centroid - lower = heavier
|
| 147 |
+
centroid = librosa.feature.spectral_centroid(y=segment, sr=sr)[0]
|
| 148 |
+
mean_centroid = np.mean(centroid)
|
| 149 |
+
|
| 150 |
+
# Heavy letters typically have centroid < 2000Hz
|
| 151 |
+
# Light letters typically > 2500Hz
|
| 152 |
+
if mean_centroid < 1800:
|
| 153 |
+
status = "HEAVY"
|
| 154 |
+
confidence = 0.9
|
| 155 |
+
elif mean_centroid < 2200:
|
| 156 |
+
status = "MODERATE"
|
| 157 |
+
confidence = 0.7
|
| 158 |
+
else:
|
| 159 |
+
status = "LIGHT"
|
| 160 |
+
confidence = 0.4
|
| 161 |
+
|
| 162 |
+
return {
|
| 163 |
+
"status": status,
|
| 164 |
+
"confidence": round(confidence, 3),
|
| 165 |
+
"spectral_centroid": round(float(mean_centroid), 1)
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def run_analysis():
|
| 170 |
+
"""Run physics analysis on all tagged letters"""
|
| 171 |
+
|
| 172 |
+
print("=" * 60)
|
| 173 |
+
print("Physics Wave Analysis - Surah 90")
|
| 174 |
+
print("=" * 60)
|
| 175 |
+
|
| 176 |
+
if not HAS_LIBROSA:
|
| 177 |
+
print("ERROR: librosa required for analysis")
|
| 178 |
+
return
|
| 179 |
+
|
| 180 |
+
# Load data
|
| 181 |
+
y, sr = load_audio()
|
| 182 |
+
timing = load_timing()
|
| 183 |
+
|
| 184 |
+
print(f"\n[1] Analyzing {len(timing)} letters...")
|
| 185 |
+
|
| 186 |
+
# Analyze each tagged letter
|
| 187 |
+
results = {
|
| 188 |
+
"qalqalah": [],
|
| 189 |
+
"madd": [],
|
| 190 |
+
"tafkheem": [],
|
| 191 |
+
"summary": {}
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
counts = {"qalqalah": 0, "madd": 0, "tafkheem": 0, "other": 0}
|
| 195 |
+
passed = {"qalqalah": 0, "madd": 0, "tafkheem": 0}
|
| 196 |
+
|
| 197 |
+
for entry in timing:
|
| 198 |
+
tajweed = entry.get("tajweed_type", "None")
|
| 199 |
+
physics = entry.get("physics_check", "None")
|
| 200 |
+
|
| 201 |
+
if tajweed == "None" or physics == "None":
|
| 202 |
+
continue
|
| 203 |
+
|
| 204 |
+
start = entry.get("start", 0)
|
| 205 |
+
end = entry.get("end", 0)
|
| 206 |
+
char = entry.get("char", "")
|
| 207 |
+
|
| 208 |
+
segment = extract_segment(y, sr, start, end)
|
| 209 |
+
|
| 210 |
+
if "qalqalah" in tajweed.lower():
|
| 211 |
+
counts["qalqalah"] += 1
|
| 212 |
+
analysis = analyze_qalqalah(segment, sr)
|
| 213 |
+
analysis["char"] = char
|
| 214 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 215 |
+
analysis["tajweed"] = tajweed
|
| 216 |
+
results["qalqalah"].append(analysis)
|
| 217 |
+
if analysis["confidence"] >= 0.5:
|
| 218 |
+
passed["qalqalah"] += 1
|
| 219 |
+
|
| 220 |
+
elif "madd" in tajweed.lower():
|
| 221 |
+
counts["madd"] += 1
|
| 222 |
+
madd_count = entry.get("madd_count", 2)
|
| 223 |
+
analysis = analyze_madd(segment, sr, madd_count)
|
| 224 |
+
analysis["char"] = char
|
| 225 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 226 |
+
analysis["tajweed"] = tajweed
|
| 227 |
+
analysis["expected_count"] = madd_count
|
| 228 |
+
results["madd"].append(analysis)
|
| 229 |
+
if analysis["confidence"] >= 0.5:
|
| 230 |
+
passed["madd"] += 1
|
| 231 |
+
|
| 232 |
+
elif "tafkheem" in tajweed.lower():
|
| 233 |
+
counts["tafkheem"] += 1
|
| 234 |
+
analysis = analyze_tafkheem(segment, sr)
|
| 235 |
+
analysis["char"] = char
|
| 236 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 237 |
+
analysis["tajweed"] = tajweed
|
| 238 |
+
results["tafkheem"].append(analysis)
|
| 239 |
+
if analysis["status"] in ["HEAVY", "MODERATE"]:
|
| 240 |
+
passed["tafkheem"] += 1
|
| 241 |
+
|
| 242 |
+
else:
|
| 243 |
+
counts["other"] += 1
|
| 244 |
+
|
| 245 |
+
# Summary
|
| 246 |
+
results["summary"] = {
|
| 247 |
+
"qalqalah": {"total": counts["qalqalah"], "passed": passed["qalqalah"], "rate": round(passed["qalqalah"]/max(1,counts["qalqalah"]), 2)},
|
| 248 |
+
"madd": {"total": counts["madd"], "passed": passed["madd"], "rate": round(passed["madd"]/max(1,counts["madd"]), 2)},
|
| 249 |
+
"tafkheem": {"total": counts["tafkheem"], "passed": passed["tafkheem"], "rate": round(passed["tafkheem"]/max(1,counts["tafkheem"]), 2)},
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
# Print results
|
| 253 |
+
print("\n[2] Results:")
|
| 254 |
+
print(f" Qalqalah: {passed['qalqalah']}/{counts['qalqalah']} passed ({results['summary']['qalqalah']['rate']*100:.0f}%)")
|
| 255 |
+
print(f" Madd: {passed['madd']}/{counts['madd']} passed ({results['summary']['madd']['rate']*100:.0f}%)")
|
| 256 |
+
print(f" Tafkheem: {passed['tafkheem']}/{counts['tafkheem']} passed ({results['summary']['tafkheem']['rate']*100:.0f}%)")
|
| 257 |
+
|
| 258 |
+
# Save
|
| 259 |
+
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 260 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 261 |
+
json.dump(convert_to_json_safe(results), f, ensure_ascii=False, indent=2)
|
| 262 |
+
print(f"\n[3] Saved: {OUTPUT_PATH}")
|
| 263 |
+
|
| 264 |
+
# Show samples
|
| 265 |
+
print("\n[4] Sample Qalqalah Analysis:")
|
| 266 |
+
for r in results["qalqalah"][:3]:
|
| 267 |
+
print(f" [{r['char']}] {r['time']} → {r['status']} (conf: {r['confidence']})")
|
| 268 |
+
|
| 269 |
+
print("\n[5] Sample Madd Analysis:")
|
| 270 |
+
for r in results["madd"][:3]:
|
| 271 |
+
print(f" [{r['char']}] {r['actual_ms']:.0f}ms vs {r['expected_ms']:.0f}ms → {r['status']}")
|
| 272 |
+
|
| 273 |
+
print("\n" + "=" * 60)
|
| 274 |
+
print("✓ Physics Analysis Complete!")
|
| 275 |
+
print("=" * 60)
|
| 276 |
+
|
| 277 |
+
return results
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
if __name__ == "__main__":
|
| 281 |
+
run_analysis()
|
physics_analyzer_v2.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Enhanced Physics Wave Analyzer - Using Lisan al-Arab Principles
|
| 4 |
+
|
| 5 |
+
Integrated from MahQuranApp/scripts/lisan_madd_detector.py
|
| 6 |
+
|
| 7 |
+
Key techniques:
|
| 8 |
+
1. Sustained region detection (spectral flux + energy stability)
|
| 9 |
+
2. Anti-drift stabilization (gap closing + minimum duration)
|
| 10 |
+
3. Per-character Tajweed physics analysis
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
+
import numpy as np
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from scipy.ndimage import gaussian_filter1d
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
import librosa
|
| 20 |
+
HAS_LIBROSA = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
HAS_LIBROSA = False
|
| 23 |
+
print("WARNING: librosa not available")
|
| 24 |
+
|
| 25 |
+
# Paths
|
| 26 |
+
AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
|
| 27 |
+
TIMING_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/letter_timing_90.json"
|
| 28 |
+
OUTPUT_PATH = Path(__file__).parent / "output/surah_90_physics_v2.json"
|
| 29 |
+
|
| 30 |
+
# Tajweed character sets
|
| 31 |
+
MADD_LETTERS = set('اويٱى')
|
| 32 |
+
QALQALAH_LETTERS = set('قطبجد')
|
| 33 |
+
TAFKHEEM_LETTERS = set('صضطظخغق')
|
| 34 |
+
HALQ_LETTERS = set('ءهعحغخ')
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def convert_to_json_safe(obj):
|
| 38 |
+
"""Convert numpy types to JSON-serializable Python types"""
|
| 39 |
+
if isinstance(obj, dict):
|
| 40 |
+
return {k: convert_to_json_safe(v) for k, v in obj.items()}
|
| 41 |
+
elif isinstance(obj, list):
|
| 42 |
+
return [convert_to_json_safe(i) for i in obj]
|
| 43 |
+
elif isinstance(obj, np.floating):
|
| 44 |
+
return float(obj)
|
| 45 |
+
elif isinstance(obj, np.integer):
|
| 46 |
+
return int(obj)
|
| 47 |
+
elif isinstance(obj, np.ndarray):
|
| 48 |
+
return obj.tolist()
|
| 49 |
+
return obj
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class LisanPhysicsAnalyzer:
|
| 53 |
+
"""
|
| 54 |
+
Physics analyzer using Lisan al-Arab acoustic principles.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(self, audio_path, sr=16000, hop_length=256):
|
| 58 |
+
self.audio_path = str(audio_path)
|
| 59 |
+
self.sr = sr
|
| 60 |
+
self.hop_length = hop_length
|
| 61 |
+
|
| 62 |
+
print(f"Loading audio: {audio_path}")
|
| 63 |
+
self.audio, _ = librosa.load(self.audio_path, sr=self.sr)
|
| 64 |
+
self.duration = len(self.audio) / self.sr
|
| 65 |
+
print(f" Duration: {self.duration:.1f}s, Sample rate: {sr}Hz")
|
| 66 |
+
|
| 67 |
+
def extract_segment(self, start, end):
|
| 68 |
+
"""Extract audio segment by time"""
|
| 69 |
+
start_sample = int(start * self.sr)
|
| 70 |
+
end_sample = int(end * self.sr)
|
| 71 |
+
return self.audio[start_sample:end_sample]
|
| 72 |
+
|
| 73 |
+
def detect_sustained_regions(self, segment):
|
| 74 |
+
"""
|
| 75 |
+
Detect regions where sound is SUSTAINED (استمرّ).
|
| 76 |
+
From LisanMaddDetector - detects madd vowels being held.
|
| 77 |
+
|
| 78 |
+
Returns: array of sustain scores per frame (higher = more sustained)
|
| 79 |
+
"""
|
| 80 |
+
if len(segment) < 512:
|
| 81 |
+
return np.zeros(1)
|
| 82 |
+
|
| 83 |
+
# 1. Compute spectral flux (low flux = sustained sound)
|
| 84 |
+
S = np.abs(librosa.stft(segment, hop_length=self.hop_length))
|
| 85 |
+
flux = np.sqrt(np.sum(np.diff(S, axis=1)**2, axis=0))
|
| 86 |
+
flux = np.concatenate([[0], flux])
|
| 87 |
+
flux = gaussian_filter1d(flux.astype(np.float64), sigma=2)
|
| 88 |
+
|
| 89 |
+
# Invert: high score where flux is LOW (sustained sound)
|
| 90 |
+
max_flux = np.max(flux) if np.max(flux) > 0 else 1
|
| 91 |
+
sustain_score = 1 - (flux / max_flux)
|
| 92 |
+
|
| 93 |
+
# 2. Check energy stability (sustained sounds have stable RMS)
|
| 94 |
+
energy = librosa.feature.rms(y=segment, hop_length=self.hop_length)[0]
|
| 95 |
+
energy = gaussian_filter1d(energy.astype(np.float64), sigma=2)
|
| 96 |
+
|
| 97 |
+
# Energy stability: low variance in local windows
|
| 98 |
+
stability = np.zeros_like(energy)
|
| 99 |
+
window = 5
|
| 100 |
+
for i in range(window, len(energy) - window):
|
| 101 |
+
local_std = np.std(energy[max(0, i-window):i+window])
|
| 102 |
+
local_mean = np.mean(energy[max(0, i-window):i+window])
|
| 103 |
+
if local_mean > 0:
|
| 104 |
+
stability[i] = 1 - min(local_std / local_mean, 1)
|
| 105 |
+
|
| 106 |
+
# Pad stability to match sustain_score length
|
| 107 |
+
min_len = min(len(sustain_score), len(stability))
|
| 108 |
+
sustain_score = sustain_score[:min_len]
|
| 109 |
+
stability = stability[:min_len]
|
| 110 |
+
|
| 111 |
+
# Combined score: both low flux AND stable energy = sustained vowel
|
| 112 |
+
combined = sustain_score * stability
|
| 113 |
+
|
| 114 |
+
return combined
|
| 115 |
+
|
| 116 |
+
def analyze_madd(self, segment, char, expected_count=2):
|
| 117 |
+
"""
|
| 118 |
+
Analyze Madd (elongation) using sustain detection.
|
| 119 |
+
"""
|
| 120 |
+
duration_ms = len(segment) / self.sr * 1000
|
| 121 |
+
|
| 122 |
+
# Detect sustained regions
|
| 123 |
+
sustain_scores = self.detect_sustained_regions(segment)
|
| 124 |
+
avg_sustain = np.mean(sustain_scores) if len(sustain_scores) > 0 else 0
|
| 125 |
+
|
| 126 |
+
# Calculate expected duration
|
| 127 |
+
base_haraka = 100 # ms per haraka (Abdul Basit is slower)
|
| 128 |
+
expected_duration = expected_count * base_haraka
|
| 129 |
+
|
| 130 |
+
# Determine if sustain matches expected madd
|
| 131 |
+
if avg_sustain > 0.5:
|
| 132 |
+
detected_count = 3 if avg_sustain > 0.7 else 2
|
| 133 |
+
else:
|
| 134 |
+
detected_count = 1
|
| 135 |
+
|
| 136 |
+
ratio = duration_ms / expected_duration if expected_duration > 0 else 0
|
| 137 |
+
|
| 138 |
+
if ratio >= 0.7 and avg_sustain >= 0.4:
|
| 139 |
+
status = "SUSTAINED"
|
| 140 |
+
confidence = 0.8 if avg_sustain > 0.6 else 0.6
|
| 141 |
+
elif ratio >= 0.5:
|
| 142 |
+
status = "PARTIAL"
|
| 143 |
+
confidence = 0.5
|
| 144 |
+
else:
|
| 145 |
+
status = "SHORT"
|
| 146 |
+
confidence = 0.3
|
| 147 |
+
|
| 148 |
+
return {
|
| 149 |
+
"status": status,
|
| 150 |
+
"confidence": round(confidence, 3),
|
| 151 |
+
"actual_ms": round(duration_ms, 1),
|
| 152 |
+
"expected_ms": round(expected_duration, 1),
|
| 153 |
+
"ratio": round(ratio, 2),
|
| 154 |
+
"sustain_score": round(avg_sustain, 3),
|
| 155 |
+
"detected_count": detected_count
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
def analyze_qalqalah(self, segment):
|
| 159 |
+
"""
|
| 160 |
+
Analyze Qalqalah (bounce) using RMS energy patterns.
|
| 161 |
+
Improved: checks for energy release at end of segment.
|
| 162 |
+
"""
|
| 163 |
+
if len(segment) < 256:
|
| 164 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 165 |
+
|
| 166 |
+
# Use smaller frame for short segments
|
| 167 |
+
frame_length = min(256, len(segment) // 2)
|
| 168 |
+
hop = frame_length // 4
|
| 169 |
+
|
| 170 |
+
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop)[0]
|
| 171 |
+
|
| 172 |
+
if len(rms) < 3:
|
| 173 |
+
return {"status": "INSUFFICIENT_FRAMES", "confidence": 0.0}
|
| 174 |
+
|
| 175 |
+
# Qalqalah pattern: should have energy release at end
|
| 176 |
+
# Look at last third vs first two-thirds
|
| 177 |
+
split_idx = len(rms) * 2 // 3
|
| 178 |
+
first_part = np.mean(rms[:split_idx])
|
| 179 |
+
last_part = np.mean(rms[split_idx:])
|
| 180 |
+
|
| 181 |
+
# Also check for any spike in segment
|
| 182 |
+
max_rms = np.max(rms)
|
| 183 |
+
mean_rms = np.mean(rms)
|
| 184 |
+
|
| 185 |
+
has_energy = mean_rms > 0.01
|
| 186 |
+
has_release = last_part > first_part * 0.8 # Energy maintained or released at end
|
| 187 |
+
has_spike = max_rms > mean_rms * 1.3
|
| 188 |
+
|
| 189 |
+
if has_energy and has_release and has_spike:
|
| 190 |
+
confidence = min(0.9, (max_rms / mean_rms - 1) + 0.5)
|
| 191 |
+
return {
|
| 192 |
+
"status": "DETECTED",
|
| 193 |
+
"confidence": round(confidence, 3),
|
| 194 |
+
"pattern": {
|
| 195 |
+
"first": round(float(first_part), 4),
|
| 196 |
+
"last": round(float(last_part), 4),
|
| 197 |
+
"max": round(float(max_rms), 4),
|
| 198 |
+
"mean": round(float(mean_rms), 4)
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
elif has_energy:
|
| 202 |
+
return {"status": "PARTIAL", "confidence": 0.4}
|
| 203 |
+
else:
|
| 204 |
+
return {"status": "NO_ENERGY", "confidence": 0.1}
|
| 205 |
+
|
| 206 |
+
def analyze_tafkheem(self, segment):
|
| 207 |
+
"""
|
| 208 |
+
Analyze Tafkheem (heaviness) using spectral centroid.
|
| 209 |
+
Heavy consonants have lower spectral centroid (more bass).
|
| 210 |
+
"""
|
| 211 |
+
if len(segment) < 512:
|
| 212 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 213 |
+
|
| 214 |
+
# Compute spectral centroid
|
| 215 |
+
centroid = librosa.feature.spectral_centroid(y=segment, sr=self.sr)[0]
|
| 216 |
+
mean_centroid = np.mean(centroid)
|
| 217 |
+
|
| 218 |
+
# Also check low-frequency energy ratio
|
| 219 |
+
S = np.abs(librosa.stft(segment))
|
| 220 |
+
freqs = librosa.fft_frequencies(sr=self.sr)
|
| 221 |
+
low_freq_idx = np.where(freqs < 1000)[0]
|
| 222 |
+
high_freq_idx = np.where(freqs >= 1000)[0]
|
| 223 |
+
|
| 224 |
+
low_energy = np.sum(S[low_freq_idx, :])
|
| 225 |
+
high_energy = np.sum(S[high_freq_idx, :])
|
| 226 |
+
total_energy = low_energy + high_energy
|
| 227 |
+
|
| 228 |
+
low_ratio = low_energy / total_energy if total_energy > 0 else 0.5
|
| 229 |
+
|
| 230 |
+
# Heavy letters: low centroid + high low-frequency ratio
|
| 231 |
+
if mean_centroid < 1500 and low_ratio > 0.6:
|
| 232 |
+
status = "HEAVY"
|
| 233 |
+
confidence = 0.9
|
| 234 |
+
elif mean_centroid < 2000 or low_ratio > 0.5:
|
| 235 |
+
status = "MODERATE"
|
| 236 |
+
confidence = 0.7
|
| 237 |
+
else:
|
| 238 |
+
status = "LIGHT"
|
| 239 |
+
confidence = 0.4
|
| 240 |
+
|
| 241 |
+
return {
|
| 242 |
+
"status": status,
|
| 243 |
+
"confidence": round(confidence, 3),
|
| 244 |
+
"spectral_centroid": round(float(mean_centroid), 1),
|
| 245 |
+
"low_freq_ratio": round(float(low_ratio), 3)
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def run_enhanced_analysis():
|
| 250 |
+
"""Run enhanced physics analysis on all tagged letters"""
|
| 251 |
+
|
| 252 |
+
print("=" * 60)
|
| 253 |
+
print("Enhanced Physics Analysis - Surah 90")
|
| 254 |
+
print("Using Lisan al-Arab Acoustic Principles")
|
| 255 |
+
print("=" * 60)
|
| 256 |
+
|
| 257 |
+
if not HAS_LIBROSA:
|
| 258 |
+
print("ERROR: librosa required for analysis")
|
| 259 |
+
return
|
| 260 |
+
|
| 261 |
+
# Load analyzer
|
| 262 |
+
analyzer = LisanPhysicsAnalyzer(AUDIO_PATH)
|
| 263 |
+
|
| 264 |
+
# Load timing data
|
| 265 |
+
with open(TIMING_PATH, 'r', encoding='utf-8') as f:
|
| 266 |
+
timing = json.load(f)
|
| 267 |
+
|
| 268 |
+
print(f"\n[1] Analyzing {len(timing)} letters...")
|
| 269 |
+
|
| 270 |
+
# Results
|
| 271 |
+
results = {
|
| 272 |
+
"qalqalah": [],
|
| 273 |
+
"madd": [],
|
| 274 |
+
"tafkheem": [],
|
| 275 |
+
"summary": {}
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
counts = {"qalqalah": 0, "madd": 0, "tafkheem": 0}
|
| 279 |
+
passed = {"qalqalah": 0, "madd": 0, "tafkheem": 0}
|
| 280 |
+
|
| 281 |
+
for entry in timing:
|
| 282 |
+
char = entry.get("char", "")
|
| 283 |
+
base_char = char[0] if char else "" # First char is base letter
|
| 284 |
+
start = entry.get("start", 0)
|
| 285 |
+
end = entry.get("end", 0)
|
| 286 |
+
|
| 287 |
+
segment = analyzer.extract_segment(start, end)
|
| 288 |
+
|
| 289 |
+
# Analyze based on character type
|
| 290 |
+
if base_char in QALQALAH_LETTERS:
|
| 291 |
+
counts["qalqalah"] += 1
|
| 292 |
+
analysis = analyzer.analyze_qalqalah(segment)
|
| 293 |
+
analysis["char"] = char
|
| 294 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 295 |
+
results["qalqalah"].append(analysis)
|
| 296 |
+
if analysis["confidence"] >= 0.4:
|
| 297 |
+
passed["qalqalah"] += 1
|
| 298 |
+
|
| 299 |
+
if base_char in MADD_LETTERS:
|
| 300 |
+
counts["madd"] += 1
|
| 301 |
+
madd_count = entry.get("madd_count", 2)
|
| 302 |
+
analysis = analyzer.analyze_madd(segment, char, madd_count)
|
| 303 |
+
analysis["char"] = char
|
| 304 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 305 |
+
results["madd"].append(analysis)
|
| 306 |
+
if analysis["status"] in ["SUSTAINED", "PARTIAL"]:
|
| 307 |
+
passed["madd"] += 1
|
| 308 |
+
|
| 309 |
+
if base_char in TAFKHEEM_LETTERS:
|
| 310 |
+
counts["tafkheem"] += 1
|
| 311 |
+
analysis = analyzer.analyze_tafkheem(segment)
|
| 312 |
+
analysis["char"] = char
|
| 313 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 314 |
+
results["tafkheem"].append(analysis)
|
| 315 |
+
if analysis["status"] in ["HEAVY", "MODERATE"]:
|
| 316 |
+
passed["tafkheem"] += 1
|
| 317 |
+
|
| 318 |
+
# Summary
|
| 319 |
+
results["summary"] = {
|
| 320 |
+
"qalqalah": {
|
| 321 |
+
"total": counts["qalqalah"],
|
| 322 |
+
"passed": passed["qalqalah"],
|
| 323 |
+
"rate": round(passed["qalqalah"] / max(1, counts["qalqalah"]), 2)
|
| 324 |
+
},
|
| 325 |
+
"madd": {
|
| 326 |
+
"total": counts["madd"],
|
| 327 |
+
"passed": passed["madd"],
|
| 328 |
+
"rate": round(passed["madd"] / max(1, counts["madd"]), 2)
|
| 329 |
+
},
|
| 330 |
+
"tafkheem": {
|
| 331 |
+
"total": counts["tafkheem"],
|
| 332 |
+
"passed": passed["tafkheem"],
|
| 333 |
+
"rate": round(passed["tafkheem"] / max(1, counts["tafkheem"]), 2)
|
| 334 |
+
},
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
# Print results
|
| 338 |
+
print("\n[2] Results (Using Lisan Acoustic Detection):")
|
| 339 |
+
print(f" Qalqalah: {passed['qalqalah']}/{counts['qalqalah']} ({results['summary']['qalqalah']['rate']*100:.0f}%)")
|
| 340 |
+
print(f" Madd: {passed['madd']}/{counts['madd']} ({results['summary']['madd']['rate']*100:.0f}%)")
|
| 341 |
+
print(f" Tafkheem: {passed['tafkheem']}/{counts['tafkheem']} ({results['summary']['tafkheem']['rate']*100:.0f}%)")
|
| 342 |
+
|
| 343 |
+
# Save
|
| 344 |
+
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 345 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 346 |
+
json.dump(convert_to_json_safe(results), f, ensure_ascii=False, indent=2)
|
| 347 |
+
print(f"\n[3] Saved: {OUTPUT_PATH}")
|
| 348 |
+
|
| 349 |
+
# Show samples
|
| 350 |
+
print("\n[4] Sample Qalqalah (Improved Detection):")
|
| 351 |
+
for r in results["qalqalah"][:5]:
|
| 352 |
+
print(f" [{r['char']}] {r['time']} → {r['status']} (conf: {r['confidence']})")
|
| 353 |
+
|
| 354 |
+
print("\n[5] Sample Madd (Sustain Detection):")
|
| 355 |
+
for r in results["madd"][:5]:
|
| 356 |
+
print(f" [{r['char']}] {r['actual_ms']:.0f}ms, sustain:{r['sustain_score']:.2f} → {r['status']}")
|
| 357 |
+
|
| 358 |
+
print("\n[6] Sample Tafkheem (Heavy Letter Detection):")
|
| 359 |
+
for r in results["tafkheem"][:5]:
|
| 360 |
+
print(f" [{r['char']}] centroid:{r['spectral_centroid']:.0f}Hz, low_ratio:{r['low_freq_ratio']:.2f} → {r['status']}")
|
| 361 |
+
|
| 362 |
+
print("\n" + "=" * 60)
|
| 363 |
+
print("✓ Enhanced Physics Analysis Complete!")
|
| 364 |
+
print("=" * 60)
|
| 365 |
+
|
| 366 |
+
return results
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
if __name__ == "__main__":
|
| 370 |
+
run_enhanced_analysis()
|
physics_analyzer_v3.py
ADDED
|
@@ -0,0 +1,542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST Enhanced Analyzer v3
|
| 4 |
+
|
| 5 |
+
Integrated improvements:
|
| 6 |
+
1. Ghunnah detection (nasal resonance via parselmouth)
|
| 7 |
+
2. Pitch tracking for Madd (F0 contour stability)
|
| 8 |
+
3. Cross-word rules (Idgham, Ikhfa, Iqlab)
|
| 9 |
+
4. Neural-style confidence calibration
|
| 10 |
+
|
| 11 |
+
Architecture: Lisan al-Arab + DSP + Tajweed Science
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import numpy as np
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from scipy.ndimage import gaussian_filter1d
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
import librosa
|
| 21 |
+
HAS_LIBROSA = True
|
| 22 |
+
except ImportError:
|
| 23 |
+
HAS_LIBROSA = False
|
| 24 |
+
print("WARNING: librosa not available")
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
import parselmouth
|
| 28 |
+
from parselmouth.praat import call
|
| 29 |
+
HAS_PARSELMOUTH = True
|
| 30 |
+
except ImportError:
|
| 31 |
+
HAS_PARSELMOUTH = False
|
| 32 |
+
print("WARNING: parselmouth not available (Ghunnah detection disabled)")
|
| 33 |
+
|
| 34 |
+
# Paths
|
| 35 |
+
AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
|
| 36 |
+
TIMING_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/letter_timing_90.json"
|
| 37 |
+
OUTPUT_PATH = Path(__file__).parent / "output/surah_90_physics_v3.json"
|
| 38 |
+
|
| 39 |
+
# Character sets
|
| 40 |
+
MADD_LETTERS = set('اويٱى')
|
| 41 |
+
QALQALAH_LETTERS = set('قطبجد')
|
| 42 |
+
TAFKHEEM_LETTERS = set('صضطظخغق')
|
| 43 |
+
GHUNNAH_LETTERS = set('نم') # Nasal letters
|
| 44 |
+
HALQ_LETTERS = set('ءهعحغخ')
|
| 45 |
+
|
| 46 |
+
# Cross-word rule triggers
|
| 47 |
+
IDGHAM_TARGETS = set('يرملونw') # Letters that cause Idgham after ن
|
| 48 |
+
IKHFA_TARGETS = set('تثجدذزسشصضطظفقك') # Letters that cause Ikhfa after ن
|
| 49 |
+
IQLAB_TARGET = 'ب' # ن before ب becomes م
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def convert_to_json_safe(obj):
|
| 53 |
+
"""Convert numpy types to JSON-serializable types"""
|
| 54 |
+
if isinstance(obj, dict):
|
| 55 |
+
return {k: convert_to_json_safe(v) for k, v in obj.items()}
|
| 56 |
+
elif isinstance(obj, list):
|
| 57 |
+
return [convert_to_json_safe(i) for i in obj]
|
| 58 |
+
elif isinstance(obj, np.floating):
|
| 59 |
+
return float(obj)
|
| 60 |
+
elif isinstance(obj, np.integer):
|
| 61 |
+
return int(obj)
|
| 62 |
+
elif isinstance(obj, np.bool_):
|
| 63 |
+
return bool(obj)
|
| 64 |
+
elif isinstance(obj, np.ndarray):
|
| 65 |
+
return obj.tolist()
|
| 66 |
+
return obj
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class TajweedAnalyzerV3:
|
| 70 |
+
"""
|
| 71 |
+
Enhanced Tajweed physics analyzer with full rule detection.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
def __init__(self, audio_path, sr=16000, hop_length=256):
|
| 75 |
+
self.audio_path = str(audio_path)
|
| 76 |
+
self.sr = sr
|
| 77 |
+
self.hop_length = hop_length
|
| 78 |
+
|
| 79 |
+
print(f"Loading audio: {audio_path}")
|
| 80 |
+
self.audio, _ = librosa.load(self.audio_path, sr=self.sr)
|
| 81 |
+
self.duration = len(self.audio) / self.sr
|
| 82 |
+
print(f" Duration: {self.duration:.1f}s")
|
| 83 |
+
|
| 84 |
+
# Load for parselmouth (needs original file)
|
| 85 |
+
if HAS_PARSELMOUTH:
|
| 86 |
+
self.sound = parselmouth.Sound(self.audio_path)
|
| 87 |
+
|
| 88 |
+
def extract_segment(self, start, end):
|
| 89 |
+
"""Extract audio segment by time"""
|
| 90 |
+
start_sample = int(start * self.sr)
|
| 91 |
+
end_sample = int(end * self.sr)
|
| 92 |
+
return self.audio[start_sample:end_sample]
|
| 93 |
+
|
| 94 |
+
# ===== GHUNNAH DETECTION (Nasal Resonance) =====
|
| 95 |
+
|
| 96 |
+
def analyze_ghunnah(self, start, end, char):
|
| 97 |
+
"""
|
| 98 |
+
Analyze Ghunnah (nasal resonance) using formant analysis.
|
| 99 |
+
Nasal sounds have:
|
| 100 |
+
1. Anti-formant (energy dip) around 500-1500 Hz
|
| 101 |
+
2. Higher formant bandwidth
|
| 102 |
+
3. Specific F1/F2 patterns
|
| 103 |
+
"""
|
| 104 |
+
if not HAS_PARSELMOUTH:
|
| 105 |
+
return {"status": "SKIPPED", "confidence": 0.0, "reason": "parselmouth unavailable"}
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
# Extract segment from parselmouth sound
|
| 109 |
+
segment = self.sound.extract_part(from_time=start, to_time=end, preserve_times=False)
|
| 110 |
+
|
| 111 |
+
if segment.get_total_duration() < 0.03:
|
| 112 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 113 |
+
|
| 114 |
+
# Get formants
|
| 115 |
+
formants = call(segment, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
|
| 116 |
+
|
| 117 |
+
# Average F1 and F2
|
| 118 |
+
n_frames = call(formants, "Get number of frames")
|
| 119 |
+
if n_frames < 1:
|
| 120 |
+
return {"status": "NO_FRAMES", "confidence": 0.0}
|
| 121 |
+
|
| 122 |
+
f1_values = []
|
| 123 |
+
f2_values = []
|
| 124 |
+
bandwidths = []
|
| 125 |
+
|
| 126 |
+
for i in range(1, n_frames + 1):
|
| 127 |
+
time = call(formants, "Get time from frame number", i)
|
| 128 |
+
f1 = call(formants, "Get value at time", 1, time, "Hertz", "Linear")
|
| 129 |
+
f2 = call(formants, "Get value at time", 2, time, "Hertz", "Linear")
|
| 130 |
+
bw1 = call(formants, "Get bandwidth at time", 1, time, "Hertz", "Linear")
|
| 131 |
+
|
| 132 |
+
if not np.isnan(f1):
|
| 133 |
+
f1_values.append(f1)
|
| 134 |
+
if not np.isnan(f2):
|
| 135 |
+
f2_values.append(f2)
|
| 136 |
+
if not np.isnan(bw1):
|
| 137 |
+
bandwidths.append(bw1)
|
| 138 |
+
|
| 139 |
+
if not f1_values or not bandwidths:
|
| 140 |
+
return {"status": "NO_FORMANTS", "confidence": 0.0}
|
| 141 |
+
|
| 142 |
+
avg_f1 = np.mean(f1_values)
|
| 143 |
+
avg_f2 = np.mean(f2_values) if f2_values else 0
|
| 144 |
+
avg_bandwidth = np.mean(bandwidths)
|
| 145 |
+
|
| 146 |
+
# Ghunnah indicators:
|
| 147 |
+
# 1. Low F1 (nasal cavity resonance) - typically 200-400 Hz
|
| 148 |
+
# 2. High bandwidth (nasal damping)
|
| 149 |
+
# 3. F2 in nasal range
|
| 150 |
+
|
| 151 |
+
low_f1 = avg_f1 < 500
|
| 152 |
+
high_bandwidth = avg_bandwidth > 150
|
| 153 |
+
nasal_f2 = 800 < avg_f2 < 2000
|
| 154 |
+
|
| 155 |
+
indicators = sum([low_f1, high_bandwidth, nasal_f2])
|
| 156 |
+
|
| 157 |
+
if indicators >= 2:
|
| 158 |
+
status = "DETECTED"
|
| 159 |
+
confidence = 0.7 + (indicators - 2) * 0.15
|
| 160 |
+
elif indicators == 1:
|
| 161 |
+
status = "PARTIAL"
|
| 162 |
+
confidence = 0.5
|
| 163 |
+
else:
|
| 164 |
+
status = "NOT_DETECTED"
|
| 165 |
+
confidence = 0.2
|
| 166 |
+
|
| 167 |
+
return {
|
| 168 |
+
"status": status,
|
| 169 |
+
"confidence": round(confidence, 3),
|
| 170 |
+
"f1": round(avg_f1, 1),
|
| 171 |
+
"f2": round(avg_f2, 1),
|
| 172 |
+
"bandwidth": round(avg_bandwidth, 1),
|
| 173 |
+
"indicators": {"low_f1": low_f1, "high_bandwidth": high_bandwidth, "nasal_f2": nasal_f2}
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
except Exception as e:
|
| 177 |
+
return {"status": "ERROR", "confidence": 0.0, "error": str(e)}
|
| 178 |
+
|
| 179 |
+
# ===== PITCH TRACKING FOR MADD =====
|
| 180 |
+
|
| 181 |
+
def analyze_madd_pitch(self, segment, char, expected_count=2):
|
| 182 |
+
"""
|
| 183 |
+
Analyze Madd using pitch (F0) stability.
|
| 184 |
+
Sustained vowels have stable pitch with minimal variation.
|
| 185 |
+
"""
|
| 186 |
+
duration_ms = len(segment) / self.sr * 1000
|
| 187 |
+
|
| 188 |
+
# Extract pitch using librosa
|
| 189 |
+
try:
|
| 190 |
+
f0, voiced_flag, voiced_probs = librosa.pyin(
|
| 191 |
+
segment,
|
| 192 |
+
fmin=50,
|
| 193 |
+
fmax=500,
|
| 194 |
+
sr=self.sr,
|
| 195 |
+
frame_length=1024,
|
| 196 |
+
hop_length=256
|
| 197 |
+
)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
# Fallback to basic sustain detection
|
| 200 |
+
return self._basic_madd_analysis(segment, duration_ms, expected_count)
|
| 201 |
+
|
| 202 |
+
# Filter to voiced frames only
|
| 203 |
+
f0_voiced = f0[~np.isnan(f0)]
|
| 204 |
+
|
| 205 |
+
if len(f0_voiced) < 3:
|
| 206 |
+
return self._basic_madd_analysis(segment, duration_ms, expected_count)
|
| 207 |
+
|
| 208 |
+
# Pitch stability: low coefficient of variation = sustained
|
| 209 |
+
pitch_mean = np.mean(f0_voiced)
|
| 210 |
+
pitch_std = np.std(f0_voiced)
|
| 211 |
+
pitch_cv = pitch_std / pitch_mean if pitch_mean > 0 else 1.0
|
| 212 |
+
|
| 213 |
+
# Voicing ratio: high means continuous sound
|
| 214 |
+
voicing_ratio = len(f0_voiced) / len(f0)
|
| 215 |
+
|
| 216 |
+
# Sustain score based on pitch stability and voicing
|
| 217 |
+
pitch_stable = pitch_cv < 0.15
|
| 218 |
+
well_voiced = voicing_ratio > 0.6
|
| 219 |
+
|
| 220 |
+
# Expected duration
|
| 221 |
+
base_haraka = 100 # ms
|
| 222 |
+
expected_duration = expected_count * base_haraka
|
| 223 |
+
duration_match = 0.7 <= (duration_ms / expected_duration) <= 1.5 if expected_duration > 0 else False
|
| 224 |
+
|
| 225 |
+
if pitch_stable and well_voiced and duration_match:
|
| 226 |
+
status = "SUSTAINED"
|
| 227 |
+
confidence = 0.85
|
| 228 |
+
elif (pitch_stable and well_voiced) or (well_voiced and duration_match):
|
| 229 |
+
status = "PARTIAL"
|
| 230 |
+
confidence = 0.6
|
| 231 |
+
elif well_voiced:
|
| 232 |
+
status = "VOICED"
|
| 233 |
+
confidence = 0.4
|
| 234 |
+
else:
|
| 235 |
+
status = "WEAK"
|
| 236 |
+
confidence = 0.2
|
| 237 |
+
|
| 238 |
+
return {
|
| 239 |
+
"status": status,
|
| 240 |
+
"confidence": round(confidence, 3),
|
| 241 |
+
"duration_ms": round(duration_ms, 1),
|
| 242 |
+
"expected_ms": round(expected_duration, 1),
|
| 243 |
+
"pitch_mean": round(pitch_mean, 1),
|
| 244 |
+
"pitch_cv": round(pitch_cv, 3),
|
| 245 |
+
"voicing_ratio": round(voicing_ratio, 3)
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
def _basic_madd_analysis(self, segment, duration_ms, expected_count):
|
| 249 |
+
"""Fallback basic Madd analysis"""
|
| 250 |
+
expected_duration = expected_count * 100
|
| 251 |
+
ratio = duration_ms / expected_duration if expected_duration > 0 else 0
|
| 252 |
+
|
| 253 |
+
if 0.7 <= ratio <= 1.5:
|
| 254 |
+
return {"status": "SUSTAINED", "confidence": 0.5, "duration_ms": round(duration_ms, 1)}
|
| 255 |
+
return {"status": "WEAK", "confidence": 0.3, "duration_ms": round(duration_ms, 1)}
|
| 256 |
+
|
| 257 |
+
# ===== QALQALAH (Improved) =====
|
| 258 |
+
|
| 259 |
+
def analyze_qalqalah(self, segment):
|
| 260 |
+
"""Improved Qalqalah detection with energy release pattern"""
|
| 261 |
+
if len(segment) < 256:
|
| 262 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 263 |
+
|
| 264 |
+
frame_length = min(256, len(segment) // 2)
|
| 265 |
+
hop = frame_length // 4
|
| 266 |
+
|
| 267 |
+
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop)[0]
|
| 268 |
+
|
| 269 |
+
if len(rms) < 3:
|
| 270 |
+
return {"status": "INSUFFICIENT", "confidence": 0.0}
|
| 271 |
+
|
| 272 |
+
# Qalqalah: energy release at end
|
| 273 |
+
split = len(rms) * 2 // 3
|
| 274 |
+
first = np.mean(rms[:split])
|
| 275 |
+
last = np.mean(rms[split:])
|
| 276 |
+
max_rms = np.max(rms)
|
| 277 |
+
mean_rms = np.mean(rms)
|
| 278 |
+
|
| 279 |
+
has_energy = mean_rms > 0.01
|
| 280 |
+
has_release = last > first * 0.8
|
| 281 |
+
has_spike = max_rms > mean_rms * 1.3
|
| 282 |
+
|
| 283 |
+
if has_energy and has_release and has_spike:
|
| 284 |
+
confidence = min(0.9, (max_rms / mean_rms - 1) + 0.5)
|
| 285 |
+
return {"status": "DETECTED", "confidence": round(confidence, 3)}
|
| 286 |
+
elif has_energy:
|
| 287 |
+
return {"status": "PARTIAL", "confidence": 0.4}
|
| 288 |
+
return {"status": "NO_ENERGY", "confidence": 0.1}
|
| 289 |
+
|
| 290 |
+
# ===== TAFKHEEM (Heavy Letters) =====
|
| 291 |
+
|
| 292 |
+
def analyze_tafkheem(self, segment):
|
| 293 |
+
"""Analyze Tafkheem using spectral characteristics"""
|
| 294 |
+
if len(segment) < 512:
|
| 295 |
+
return {"status": "TOO_SHORT", "confidence": 0.0}
|
| 296 |
+
|
| 297 |
+
centroid = librosa.feature.spectral_centroid(y=segment, sr=self.sr)[0]
|
| 298 |
+
mean_centroid = np.mean(centroid)
|
| 299 |
+
|
| 300 |
+
S = np.abs(librosa.stft(segment))
|
| 301 |
+
freqs = librosa.fft_frequencies(sr=self.sr)
|
| 302 |
+
low_idx = np.where(freqs < 1000)[0]
|
| 303 |
+
high_idx = np.where(freqs >= 1000)[0]
|
| 304 |
+
|
| 305 |
+
low_energy = np.sum(S[low_idx, :])
|
| 306 |
+
high_energy = np.sum(S[high_idx, :])
|
| 307 |
+
total = low_energy + high_energy
|
| 308 |
+
low_ratio = low_energy / total if total > 0 else 0.5
|
| 309 |
+
|
| 310 |
+
if mean_centroid < 1500 and low_ratio > 0.6:
|
| 311 |
+
return {"status": "HEAVY", "confidence": 0.9, "centroid": round(mean_centroid, 1)}
|
| 312 |
+
elif mean_centroid < 2000 or low_ratio > 0.5:
|
| 313 |
+
return {"status": "MODERATE", "confidence": 0.7, "centroid": round(mean_centroid, 1)}
|
| 314 |
+
return {"status": "LIGHT", "confidence": 0.4, "centroid": round(mean_centroid, 1)}
|
| 315 |
+
|
| 316 |
+
# ===== CROSS-WORD RULES =====
|
| 317 |
+
|
| 318 |
+
def analyze_cross_word_rules(self, timing_data):
|
| 319 |
+
"""
|
| 320 |
+
Analyze cross-word Tajweed rules:
|
| 321 |
+
- Idgham: ن/م merges into following letter
|
| 322 |
+
- Ikhfa: ن partially hidden before certain letters
|
| 323 |
+
- Iqlab: ن becomes م sound before ب
|
| 324 |
+
"""
|
| 325 |
+
results = {
|
| 326 |
+
"idgham": [],
|
| 327 |
+
"ikhfa": [],
|
| 328 |
+
"iqlab": []
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
for i, entry in enumerate(timing_data):
|
| 332 |
+
char = entry.get("char", "")
|
| 333 |
+
base_char = char[0] if char else ""
|
| 334 |
+
|
| 335 |
+
# Check if this is a Noon with Sukun or Tanween
|
| 336 |
+
has_sukun = 'ْ' in char
|
| 337 |
+
has_tanween = any(c in char for c in 'ًٌٍ')
|
| 338 |
+
is_noon_trigger = base_char == 'ن' and (has_sukun or has_tanween)
|
| 339 |
+
is_meem_trigger = base_char == 'م' and has_sukun
|
| 340 |
+
|
| 341 |
+
if not (is_noon_trigger or is_meem_trigger):
|
| 342 |
+
continue
|
| 343 |
+
|
| 344 |
+
# Look at next letter
|
| 345 |
+
if i + 1 >= len(timing_data):
|
| 346 |
+
continue
|
| 347 |
+
|
| 348 |
+
next_entry = timing_data[i + 1]
|
| 349 |
+
next_char = next_entry.get("char", "")
|
| 350 |
+
next_base = next_char[0] if next_char else ""
|
| 351 |
+
|
| 352 |
+
# Iqlab: ن before ب
|
| 353 |
+
if is_noon_trigger and next_base == IQLAB_TARGET:
|
| 354 |
+
# Analyze if ن sounds like م
|
| 355 |
+
segment = self.extract_segment(entry.get("start", 0), entry.get("end", 0))
|
| 356 |
+
ghunnah = self.analyze_ghunnah(entry.get("start", 0), entry.get("end", 0), char)
|
| 357 |
+
|
| 358 |
+
results["iqlab"].append({
|
| 359 |
+
"position": i,
|
| 360 |
+
"char": char,
|
| 361 |
+
"next_char": next_char,
|
| 362 |
+
"time": f"{entry.get('start', 0):.3f}-{entry.get('end', 0):.3f}",
|
| 363 |
+
"ghunnah_detected": ghunnah.get("status") in ["DETECTED", "PARTIAL"],
|
| 364 |
+
"confidence": ghunnah.get("confidence", 0)
|
| 365 |
+
})
|
| 366 |
+
|
| 367 |
+
# Ikhfa: ن before specific letters
|
| 368 |
+
elif is_noon_trigger and next_base in IKHFA_TARGETS:
|
| 369 |
+
# Analyze partial nasalization
|
| 370 |
+
segment = self.extract_segment(entry.get("start", 0), entry.get("end", 0))
|
| 371 |
+
ghunnah = self.analyze_ghunnah(entry.get("start", 0), entry.get("end", 0), char)
|
| 372 |
+
|
| 373 |
+
results["ikhfa"].append({
|
| 374 |
+
"position": i,
|
| 375 |
+
"char": char,
|
| 376 |
+
"next_char": next_char,
|
| 377 |
+
"time": f"{entry.get('start', 0):.3f}-{entry.get('end', 0):.3f}",
|
| 378 |
+
"ghunnah_level": ghunnah.get("status"),
|
| 379 |
+
"confidence": ghunnah.get("confidence", 0)
|
| 380 |
+
})
|
| 381 |
+
|
| 382 |
+
# Idgham: ن before يرملون
|
| 383 |
+
elif is_noon_trigger and next_base in IDGHAM_TARGETS:
|
| 384 |
+
# Check if ن is merged (very short duration)
|
| 385 |
+
noon_dur = (entry.get("end", 0) - entry.get("start", 0)) * 1000
|
| 386 |
+
|
| 387 |
+
results["idgham"].append({
|
| 388 |
+
"position": i,
|
| 389 |
+
"char": char,
|
| 390 |
+
"next_char": next_char,
|
| 391 |
+
"time": f"{entry.get('start', 0):.3f}-{entry.get('end', 0):.3f}",
|
| 392 |
+
"noon_duration_ms": round(noon_dur, 1),
|
| 393 |
+
"merged": noon_dur < 50, # Very short = merged
|
| 394 |
+
"confidence": 0.7 if noon_dur < 50 else 0.4
|
| 395 |
+
})
|
| 396 |
+
|
| 397 |
+
return results
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
def run_comprehensive_analysis():
|
| 401 |
+
"""Run comprehensive Tajweed analysis with all improvements"""
|
| 402 |
+
|
| 403 |
+
print("=" * 60)
|
| 404 |
+
print("TajweedSST Enhanced Analyzer v3")
|
| 405 |
+
print("Ghunnah + Pitch + Cross-Word Rules")
|
| 406 |
+
print("=" * 60)
|
| 407 |
+
|
| 408 |
+
if not HAS_LIBROSA:
|
| 409 |
+
print("ERROR: librosa required")
|
| 410 |
+
return
|
| 411 |
+
|
| 412 |
+
# Load analyzer
|
| 413 |
+
analyzer = TajweedAnalyzerV3(AUDIO_PATH)
|
| 414 |
+
|
| 415 |
+
# Load timing
|
| 416 |
+
with open(TIMING_PATH, 'r', encoding='utf-8') as f:
|
| 417 |
+
timing = json.load(f)
|
| 418 |
+
|
| 419 |
+
print(f"\n[1] Analyzing {len(timing)} letters...")
|
| 420 |
+
|
| 421 |
+
results = {
|
| 422 |
+
"qalqalah": [],
|
| 423 |
+
"madd": [],
|
| 424 |
+
"tafkheem": [],
|
| 425 |
+
"ghunnah": [],
|
| 426 |
+
"cross_word": {},
|
| 427 |
+
"summary": {}
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
counts = {k: 0 for k in ["qalqalah", "madd", "tafkheem", "ghunnah"]}
|
| 431 |
+
passed = {k: 0 for k in ["qalqalah", "madd", "tafkheem", "ghunnah"]}
|
| 432 |
+
|
| 433 |
+
for entry in timing:
|
| 434 |
+
char = entry.get("char", "")
|
| 435 |
+
base = char[0] if char else ""
|
| 436 |
+
start = entry.get("start", 0)
|
| 437 |
+
end = entry.get("end", 0)
|
| 438 |
+
|
| 439 |
+
segment = analyzer.extract_segment(start, end)
|
| 440 |
+
|
| 441 |
+
# Qalqalah
|
| 442 |
+
if base in QALQALAH_LETTERS:
|
| 443 |
+
counts["qalqalah"] += 1
|
| 444 |
+
analysis = analyzer.analyze_qalqalah(segment)
|
| 445 |
+
analysis["char"] = char
|
| 446 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 447 |
+
results["qalqalah"].append(analysis)
|
| 448 |
+
if analysis["confidence"] >= 0.4:
|
| 449 |
+
passed["qalqalah"] += 1
|
| 450 |
+
|
| 451 |
+
# Madd (with pitch tracking)
|
| 452 |
+
if base in MADD_LETTERS:
|
| 453 |
+
counts["madd"] += 1
|
| 454 |
+
madd_count = entry.get("madd_count", 2)
|
| 455 |
+
analysis = analyzer.analyze_madd_pitch(segment, char, madd_count)
|
| 456 |
+
analysis["char"] = char
|
| 457 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 458 |
+
results["madd"].append(analysis)
|
| 459 |
+
if analysis["status"] in ["SUSTAINED", "PARTIAL"]:
|
| 460 |
+
passed["madd"] += 1
|
| 461 |
+
|
| 462 |
+
# Tafkheem
|
| 463 |
+
if base in TAFKHEEM_LETTERS:
|
| 464 |
+
counts["tafkheem"] += 1
|
| 465 |
+
analysis = analyzer.analyze_tafkheem(segment)
|
| 466 |
+
analysis["char"] = char
|
| 467 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 468 |
+
results["tafkheem"].append(analysis)
|
| 469 |
+
if analysis["status"] in ["HEAVY", "MODERATE"]:
|
| 470 |
+
passed["tafkheem"] += 1
|
| 471 |
+
|
| 472 |
+
# Ghunnah
|
| 473 |
+
if base in GHUNNAH_LETTERS:
|
| 474 |
+
counts["ghunnah"] += 1
|
| 475 |
+
analysis = analyzer.analyze_ghunnah(start, end, char)
|
| 476 |
+
analysis["char"] = char
|
| 477 |
+
analysis["time"] = f"{start:.3f}-{end:.3f}"
|
| 478 |
+
results["ghunnah"].append(analysis)
|
| 479 |
+
if analysis.get("status") in ["DETECTED", "PARTIAL"]:
|
| 480 |
+
passed["ghunnah"] += 1
|
| 481 |
+
|
| 482 |
+
# Cross-word analysis
|
| 483 |
+
print("\n[2] Analyzing cross-word rules...")
|
| 484 |
+
results["cross_word"] = analyzer.analyze_cross_word_rules(timing)
|
| 485 |
+
|
| 486 |
+
# Summary
|
| 487 |
+
results["summary"] = {
|
| 488 |
+
k: {
|
| 489 |
+
"total": counts[k],
|
| 490 |
+
"passed": passed[k],
|
| 491 |
+
"rate": round(passed[k] / max(1, counts[k]), 2)
|
| 492 |
+
}
|
| 493 |
+
for k in counts
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
results["summary"]["cross_word"] = {
|
| 497 |
+
"idgham": len(results["cross_word"].get("idgham", [])),
|
| 498 |
+
"ikhfa": len(results["cross_word"].get("ikhfa", [])),
|
| 499 |
+
"iqlab": len(results["cross_word"].get("iqlab", []))
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
# Print results
|
| 503 |
+
print("\n[3] Results:")
|
| 504 |
+
for rule, data in results["summary"].items():
|
| 505 |
+
if isinstance(data, dict) and "rate" in data:
|
| 506 |
+
print(f" {rule}: {data['passed']}/{data['total']} ({data['rate']*100:.0f}%)")
|
| 507 |
+
elif isinstance(data, dict):
|
| 508 |
+
print(f" {rule}: Idgham={data.get('idgham', 0)}, Ikhfa={data.get('ikhfa', 0)}, Iqlab={data.get('iqlab', 0)}")
|
| 509 |
+
|
| 510 |
+
# Save
|
| 511 |
+
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 512 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 513 |
+
json.dump(convert_to_json_safe(results), f, ensure_ascii=False, indent=2)
|
| 514 |
+
print(f"\n[4] Saved: {OUTPUT_PATH}")
|
| 515 |
+
|
| 516 |
+
# Samples
|
| 517 |
+
print("\n[5] Sample Ghunnah (ن/م nasal detection):")
|
| 518 |
+
for r in results["ghunnah"][:5]:
|
| 519 |
+
f1 = r.get('f1', 'N/A')
|
| 520 |
+
print(f" [{r['char']}] F1:{f1}Hz → {r['status']} (conf: {r['confidence']})")
|
| 521 |
+
|
| 522 |
+
print("\n[6] Sample Madd (Pitch Tracking):")
|
| 523 |
+
for r in results["madd"][:5]:
|
| 524 |
+
cv = r.get('pitch_cv', 'N/A')
|
| 525 |
+
print(f" [{r['char']}] {r.get('duration_ms', 0):.0f}ms, pitch_cv:{cv} → {r['status']}")
|
| 526 |
+
|
| 527 |
+
print("\n[7] Cross-Word Rules Detected:")
|
| 528 |
+
for rule, items in results["cross_word"].items():
|
| 529 |
+
if items:
|
| 530 |
+
print(f" {rule.upper()}: {len(items)} instances")
|
| 531 |
+
for item in items[:2]:
|
| 532 |
+
print(f" - {item['char']} → {item['next_char']} @ {item['time']}")
|
| 533 |
+
|
| 534 |
+
print("\n" + "=" * 60)
|
| 535 |
+
print("✓ TajweedSST v3 Analysis Complete!")
|
| 536 |
+
print("=" * 60)
|
| 537 |
+
|
| 538 |
+
return results
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
if __name__ == "__main__":
|
| 542 |
+
run_comprehensive_analysis()
|
requirements.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
camel-tools>=1.5.0
|
| 3 |
+
whisperx>=3.1.0
|
| 4 |
+
librosa>=0.10.0
|
| 5 |
+
parselmouth>=0.4.0
|
| 6 |
+
numpy>=1.24.0
|
| 7 |
+
scipy>=1.10.0
|
| 8 |
+
torch>=2.0.0
|
| 9 |
+
torchaudio>=2.0.0
|
| 10 |
+
|
| 11 |
+
# Alignment
|
| 12 |
+
montreal-forced-aligner>=3.0.0
|
| 13 |
+
|
| 14 |
+
# Arabic NLP
|
| 15 |
+
pyarabic>=0.6.0
|
| 16 |
+
arabic-reshaper>=3.0.0
|
| 17 |
+
|
| 18 |
+
# Utilities
|
| 19 |
+
tqdm>=4.65.0
|
| 20 |
+
pydub>=0.25.0
|
| 21 |
+
soundfile>=0.12.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
TajweedSST - Quranic Precision Alignment & Tajweed Analysis Tool
|
| 3 |
+
|
| 4 |
+
A Python-based pipeline that generates letter-level precise timing data
|
| 5 |
+
for Quran recitations, prevents timing drift, and uses signal processing
|
| 6 |
+
to validate Tajweed rules.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
from tajweedsst.src.pipeline import TajweedPipeline
|
| 10 |
+
|
| 11 |
+
pipeline = TajweedPipeline()
|
| 12 |
+
result = pipeline.process(
|
| 13 |
+
audio_path="path/to/audio.mp3",
|
| 14 |
+
text="قُلْ هُوَ اللَّهُ أَحَدٌ",
|
| 15 |
+
surah=112,
|
| 16 |
+
ayah=1
|
| 17 |
+
)
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from .tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 21 |
+
from .alignment_engine import AlignmentEngine, MockAlignmentEngine
|
| 22 |
+
from .physics_validator import PhysicsValidator, ValidationStatus
|
| 23 |
+
from .pipeline import TajweedPipeline
|
| 24 |
+
|
| 25 |
+
__version__ = "1.0.0"
|
| 26 |
+
__all__ = [
|
| 27 |
+
"TajweedPipeline",
|
| 28 |
+
"TajweedParser",
|
| 29 |
+
"TajweedType",
|
| 30 |
+
"PhysicsCheck",
|
| 31 |
+
"AlignmentEngine",
|
| 32 |
+
"MockAlignmentEngine",
|
| 33 |
+
"PhysicsValidator",
|
| 34 |
+
"ValidationStatus"
|
| 35 |
+
]
|
src/alignment_engine.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Step 2: Hierarchical Alignment Engine
|
| 4 |
+
|
| 5 |
+
The Anti-Drift Engine:
|
| 6 |
+
1. WhisperX: Get word-level anchors (rigid boundaries)
|
| 7 |
+
2. MFA: Get phoneme-level precision within words
|
| 8 |
+
3. Normalization: Clamp MFA durations to match WhisperX exactly
|
| 9 |
+
|
| 10 |
+
Formula: Phoneme_New_Duration = Phoneme_Old * (Whisper_Word_Duration / Sum_MFA_Phonemes)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import json
|
| 15 |
+
import subprocess
|
| 16 |
+
from dataclasses import dataclass, field
|
| 17 |
+
from typing import List, Dict, Optional, Tuple
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
@dataclass
|
| 21 |
+
class PhonemeAlignment:
|
| 22 |
+
"""Single phoneme timing"""
|
| 23 |
+
phoneme: str
|
| 24 |
+
start: float
|
| 25 |
+
end: float
|
| 26 |
+
duration: float
|
| 27 |
+
|
| 28 |
+
@property
|
| 29 |
+
def normalized_duration(self) -> float:
|
| 30 |
+
return self.end - self.start
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class WordAlignment:
|
| 34 |
+
"""Word-level alignment with phoneme breakdown"""
|
| 35 |
+
word_text: str
|
| 36 |
+
whisper_start: float
|
| 37 |
+
whisper_end: float
|
| 38 |
+
phonemes: List[PhonemeAlignment] = field(default_factory=list)
|
| 39 |
+
|
| 40 |
+
@property
|
| 41 |
+
def whisper_duration(self) -> float:
|
| 42 |
+
return self.whisper_end - self.whisper_start
|
| 43 |
+
|
| 44 |
+
@dataclass
|
| 45 |
+
class AlignmentResult:
|
| 46 |
+
"""Complete alignment for an audio segment"""
|
| 47 |
+
audio_path: str
|
| 48 |
+
surah: int
|
| 49 |
+
ayah: int
|
| 50 |
+
words: List[WordAlignment] = field(default_factory=list)
|
| 51 |
+
metadata: Dict = field(default_factory=dict)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class AlignmentEngine:
|
| 55 |
+
"""
|
| 56 |
+
Hierarchical alignment using WhisperX + MFA
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
def __init__(self,
|
| 60 |
+
whisperx_model: str = "large-v3",
|
| 61 |
+
mfa_acoustic_model: str = "arabic_mfa",
|
| 62 |
+
mfa_dictionary: str = "arabic_mfa",
|
| 63 |
+
device: str = "cuda",
|
| 64 |
+
compute_type: str = "float16"):
|
| 65 |
+
"""
|
| 66 |
+
Initialize alignment engine
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
whisperx_model: WhisperX model size
|
| 70 |
+
mfa_acoustic_model: MFA acoustic model for Arabic
|
| 71 |
+
mfa_dictionary: MFA pronunciation dictionary
|
| 72 |
+
device: cuda or cpu
|
| 73 |
+
compute_type: float16 or float32
|
| 74 |
+
"""
|
| 75 |
+
self.whisperx_model = whisperx_model
|
| 76 |
+
self.mfa_acoustic_model = mfa_acoustic_model
|
| 77 |
+
self.mfa_dictionary = mfa_dictionary
|
| 78 |
+
self.device = device
|
| 79 |
+
self.compute_type = compute_type
|
| 80 |
+
|
| 81 |
+
self._whisperx = None
|
| 82 |
+
self._whisperx_align_model = None
|
| 83 |
+
|
| 84 |
+
def _load_whisperx(self):
|
| 85 |
+
"""Lazy load WhisperX models"""
|
| 86 |
+
if self._whisperx is None:
|
| 87 |
+
import whisperx
|
| 88 |
+
self._whisperx = whisperx.load_model(
|
| 89 |
+
self.whisperx_model,
|
| 90 |
+
device=self.device,
|
| 91 |
+
compute_type=self.compute_type
|
| 92 |
+
)
|
| 93 |
+
# Load alignment model for Arabic
|
| 94 |
+
self._whisperx_align_model, self._whisperx_align_metadata = whisperx.load_align_model(
|
| 95 |
+
language_code="ar",
|
| 96 |
+
device=self.device
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
def align(self,
|
| 100 |
+
audio_path: str,
|
| 101 |
+
phonetic_words: List[str],
|
| 102 |
+
surah: int = 0,
|
| 103 |
+
ayah: int = 0) -> AlignmentResult:
|
| 104 |
+
"""
|
| 105 |
+
Perform hierarchical alignment
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
audio_path: Path to audio file
|
| 109 |
+
phonetic_words: List of phonetic transcriptions from TajweedParser
|
| 110 |
+
surah: Surah number for metadata
|
| 111 |
+
ayah: Ayah number for metadata
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
AlignmentResult with word and phoneme timings
|
| 115 |
+
"""
|
| 116 |
+
result = AlignmentResult(
|
| 117 |
+
audio_path=audio_path,
|
| 118 |
+
surah=surah,
|
| 119 |
+
ayah=ayah
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Step 1: WhisperX word-level alignment
|
| 123 |
+
whisper_words = self._run_whisperx(audio_path)
|
| 124 |
+
|
| 125 |
+
# Step 2: MFA phoneme-level alignment for each word
|
| 126 |
+
mfa_phonemes = self._run_mfa(audio_path, phonetic_words)
|
| 127 |
+
|
| 128 |
+
# Step 3: Normalize MFA phonemes to WhisperX word boundaries
|
| 129 |
+
for i, (whisper_word, phonemes) in enumerate(zip(whisper_words, mfa_phonemes)):
|
| 130 |
+
word_alignment = WordAlignment(
|
| 131 |
+
word_text=whisper_word['word'],
|
| 132 |
+
whisper_start=whisper_word['start'],
|
| 133 |
+
whisper_end=whisper_word['end']
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Normalize phoneme durations
|
| 137 |
+
normalized_phonemes = self._normalize_phonemes(
|
| 138 |
+
phonemes=phonemes,
|
| 139 |
+
target_start=whisper_word['start'],
|
| 140 |
+
target_end=whisper_word['end']
|
| 141 |
+
)
|
| 142 |
+
word_alignment.phonemes = normalized_phonemes
|
| 143 |
+
|
| 144 |
+
result.words.append(word_alignment)
|
| 145 |
+
|
| 146 |
+
return result
|
| 147 |
+
|
| 148 |
+
def _run_whisperx(self, audio_path: str) -> List[Dict]:
|
| 149 |
+
"""
|
| 150 |
+
Run WhisperX for word-level timing
|
| 151 |
+
|
| 152 |
+
Returns: List of {word, start, end} dicts
|
| 153 |
+
"""
|
| 154 |
+
self._load_whisperx()
|
| 155 |
+
import whisperx
|
| 156 |
+
|
| 157 |
+
# Transcribe
|
| 158 |
+
audio = whisperx.load_audio(audio_path)
|
| 159 |
+
result = self._whisperx.transcribe(audio, batch_size=16)
|
| 160 |
+
|
| 161 |
+
# Align to get word-level timestamps
|
| 162 |
+
aligned = whisperx.align(
|
| 163 |
+
result["segments"],
|
| 164 |
+
self._whisperx_align_model,
|
| 165 |
+
self._whisperx_align_metadata,
|
| 166 |
+
audio,
|
| 167 |
+
self.device,
|
| 168 |
+
return_char_alignments=False
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
# Extract word timings
|
| 172 |
+
words = []
|
| 173 |
+
for segment in aligned["segments"]:
|
| 174 |
+
for word_data in segment.get("words", []):
|
| 175 |
+
words.append({
|
| 176 |
+
"word": word_data["word"],
|
| 177 |
+
"start": word_data["start"],
|
| 178 |
+
"end": word_data["end"]
|
| 179 |
+
})
|
| 180 |
+
|
| 181 |
+
return words
|
| 182 |
+
|
| 183 |
+
def _run_mfa(self, audio_path: str, phonetic_words: List[str]) -> List[List[Dict]]:
|
| 184 |
+
"""
|
| 185 |
+
Run MFA for phoneme-level timing within each word
|
| 186 |
+
|
| 187 |
+
Returns: List of phoneme lists per word
|
| 188 |
+
"""
|
| 189 |
+
# Create temp directory for MFA
|
| 190 |
+
temp_dir = Path("/tmp/tajweedsst_mfa")
|
| 191 |
+
temp_dir.mkdir(exist_ok=True)
|
| 192 |
+
|
| 193 |
+
input_dir = temp_dir / "input"
|
| 194 |
+
output_dir = temp_dir / "output"
|
| 195 |
+
input_dir.mkdir(exist_ok=True)
|
| 196 |
+
output_dir.mkdir(exist_ok=True)
|
| 197 |
+
|
| 198 |
+
# Copy audio and create transcript
|
| 199 |
+
audio_name = Path(audio_path).stem
|
| 200 |
+
transcript_path = input_dir / f"{audio_name}.txt"
|
| 201 |
+
|
| 202 |
+
# Write phonetic transcript (space-separated words)
|
| 203 |
+
transcript = " ".join(phonetic_words)
|
| 204 |
+
transcript_path.write_text(transcript)
|
| 205 |
+
|
| 206 |
+
# Copy audio file
|
| 207 |
+
import shutil
|
| 208 |
+
audio_dest = input_dir / Path(audio_path).name
|
| 209 |
+
shutil.copy(audio_path, audio_dest)
|
| 210 |
+
|
| 211 |
+
# Run MFA
|
| 212 |
+
try:
|
| 213 |
+
subprocess.run([
|
| 214 |
+
"mfa", "align",
|
| 215 |
+
str(input_dir),
|
| 216 |
+
self.mfa_dictionary,
|
| 217 |
+
self.mfa_acoustic_model,
|
| 218 |
+
str(output_dir),
|
| 219 |
+
"--clean",
|
| 220 |
+
"--quiet"
|
| 221 |
+
], check=True, capture_output=True)
|
| 222 |
+
except subprocess.CalledProcessError as e:
|
| 223 |
+
print(f"MFA Error: {e.stderr.decode()}")
|
| 224 |
+
return [[] for _ in phonetic_words]
|
| 225 |
+
|
| 226 |
+
# Parse TextGrid output
|
| 227 |
+
textgrid_path = output_dir / f"{audio_name}.TextGrid"
|
| 228 |
+
if textgrid_path.exists():
|
| 229 |
+
return self._parse_textgrid(textgrid_path, len(phonetic_words))
|
| 230 |
+
|
| 231 |
+
return [[] for _ in phonetic_words]
|
| 232 |
+
|
| 233 |
+
def _parse_textgrid(self, textgrid_path: Path, word_count: int) -> List[List[Dict]]:
|
| 234 |
+
"""Parse MFA TextGrid output for phoneme timings"""
|
| 235 |
+
try:
|
| 236 |
+
import textgrid
|
| 237 |
+
tg = textgrid.TextGrid.fromFile(str(textgrid_path))
|
| 238 |
+
|
| 239 |
+
# Find phones tier
|
| 240 |
+
phones_tier = None
|
| 241 |
+
words_tier = None
|
| 242 |
+
for tier in tg:
|
| 243 |
+
if tier.name == "phones":
|
| 244 |
+
phones_tier = tier
|
| 245 |
+
elif tier.name == "words":
|
| 246 |
+
words_tier = tier
|
| 247 |
+
|
| 248 |
+
if not phones_tier or not words_tier:
|
| 249 |
+
return [[] for _ in range(word_count)]
|
| 250 |
+
|
| 251 |
+
# Group phonemes by word boundaries
|
| 252 |
+
result = []
|
| 253 |
+
word_idx = 0
|
| 254 |
+
current_word_phones = []
|
| 255 |
+
|
| 256 |
+
for interval in phones_tier:
|
| 257 |
+
if interval.mark and interval.mark != "":
|
| 258 |
+
phone_data = {
|
| 259 |
+
"phoneme": interval.mark,
|
| 260 |
+
"start": interval.minTime,
|
| 261 |
+
"end": interval.maxTime
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
# Check if we've moved to next word
|
| 265 |
+
if word_idx < len(words_tier):
|
| 266 |
+
word_interval = words_tier[word_idx]
|
| 267 |
+
if interval.minTime >= word_interval.maxTime:
|
| 268 |
+
result.append(current_word_phones)
|
| 269 |
+
current_word_phones = []
|
| 270 |
+
word_idx += 1
|
| 271 |
+
|
| 272 |
+
current_word_phones.append(phone_data)
|
| 273 |
+
|
| 274 |
+
# Don't forget last word
|
| 275 |
+
if current_word_phones:
|
| 276 |
+
result.append(current_word_phones)
|
| 277 |
+
|
| 278 |
+
return result
|
| 279 |
+
|
| 280 |
+
except Exception as e:
|
| 281 |
+
print(f"TextGrid parse error: {e}")
|
| 282 |
+
return [[] for _ in range(word_count)]
|
| 283 |
+
|
| 284 |
+
def _normalize_phonemes(self,
|
| 285 |
+
phonemes: List[Dict],
|
| 286 |
+
target_start: float,
|
| 287 |
+
target_end: float) -> List[PhonemeAlignment]:
|
| 288 |
+
"""
|
| 289 |
+
Normalize MFA phonemes to fit exactly within WhisperX word boundaries
|
| 290 |
+
|
| 291 |
+
Formula: Phoneme_New_Duration = Phoneme_Old * (Whisper_Word_Duration / Sum_MFA_Phonemes)
|
| 292 |
+
"""
|
| 293 |
+
if not phonemes:
|
| 294 |
+
return []
|
| 295 |
+
|
| 296 |
+
target_duration = target_end - target_start
|
| 297 |
+
|
| 298 |
+
# Calculate total MFA duration
|
| 299 |
+
mfa_total = sum(p['end'] - p['start'] for p in phonemes)
|
| 300 |
+
|
| 301 |
+
if mfa_total == 0:
|
| 302 |
+
return []
|
| 303 |
+
|
| 304 |
+
# Scale factor
|
| 305 |
+
scale = target_duration / mfa_total
|
| 306 |
+
|
| 307 |
+
# Normalize each phoneme
|
| 308 |
+
normalized = []
|
| 309 |
+
current_time = target_start
|
| 310 |
+
|
| 311 |
+
for phone in phonemes:
|
| 312 |
+
old_duration = phone['end'] - phone['start']
|
| 313 |
+
new_duration = old_duration * scale
|
| 314 |
+
|
| 315 |
+
normalized.append(PhonemeAlignment(
|
| 316 |
+
phoneme=phone['phoneme'],
|
| 317 |
+
start=current_time,
|
| 318 |
+
end=current_time + new_duration,
|
| 319 |
+
duration=new_duration
|
| 320 |
+
))
|
| 321 |
+
|
| 322 |
+
current_time += new_duration
|
| 323 |
+
|
| 324 |
+
# Ensure last phoneme ends exactly at target_end (floating point fix)
|
| 325 |
+
if normalized:
|
| 326 |
+
normalized[-1].end = target_end
|
| 327 |
+
normalized[-1].duration = target_end - normalized[-1].start
|
| 328 |
+
|
| 329 |
+
return normalized
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
class MockAlignmentEngine(AlignmentEngine):
|
| 333 |
+
"""
|
| 334 |
+
Mock alignment engine for testing without WhisperX/MFA installed
|
| 335 |
+
"""
|
| 336 |
+
|
| 337 |
+
def align(self,
|
| 338 |
+
audio_path: str,
|
| 339 |
+
phonetic_words: List[str],
|
| 340 |
+
surah: int = 0,
|
| 341 |
+
ayah: int = 0) -> AlignmentResult:
|
| 342 |
+
"""Generate mock alignment data"""
|
| 343 |
+
result = AlignmentResult(
|
| 344 |
+
audio_path=audio_path,
|
| 345 |
+
surah=surah,
|
| 346 |
+
ayah=ayah
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
# Mock timing: 0.5s per word
|
| 350 |
+
current_time = 0.0
|
| 351 |
+
word_duration = 0.5
|
| 352 |
+
|
| 353 |
+
for word in phonetic_words:
|
| 354 |
+
phonemes = word.split()
|
| 355 |
+
phoneme_duration = word_duration / max(len(phonemes), 1)
|
| 356 |
+
|
| 357 |
+
word_alignment = WordAlignment(
|
| 358 |
+
word_text=word,
|
| 359 |
+
whisper_start=current_time,
|
| 360 |
+
whisper_end=current_time + word_duration
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
phoneme_time = current_time
|
| 364 |
+
for phoneme in phonemes:
|
| 365 |
+
word_alignment.phonemes.append(PhonemeAlignment(
|
| 366 |
+
phoneme=phoneme,
|
| 367 |
+
start=phoneme_time,
|
| 368 |
+
end=phoneme_time + phoneme_duration,
|
| 369 |
+
duration=phoneme_duration
|
| 370 |
+
))
|
| 371 |
+
phoneme_time += phoneme_duration
|
| 372 |
+
|
| 373 |
+
result.words.append(word_alignment)
|
| 374 |
+
current_time += word_duration + 0.1 # Gap between words
|
| 375 |
+
|
| 376 |
+
return result
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def main():
|
| 380 |
+
"""Test alignment engine"""
|
| 381 |
+
print("=" * 50)
|
| 382 |
+
print("TajweedSST Alignment Engine Test")
|
| 383 |
+
print("=" * 50)
|
| 384 |
+
|
| 385 |
+
# Use mock engine for testing
|
| 386 |
+
engine = MockAlignmentEngine()
|
| 387 |
+
|
| 388 |
+
# Test phonetic words from TajweedParser
|
| 389 |
+
phonetic_words = ["q l", "h w", "ā l l ā h", "ʾ ḥ d"]
|
| 390 |
+
|
| 391 |
+
result = engine.align(
|
| 392 |
+
audio_path="test.wav",
|
| 393 |
+
phonetic_words=phonetic_words,
|
| 394 |
+
surah=112,
|
| 395 |
+
ayah=1
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
print(f"Aligned {len(result.words)} words:")
|
| 399 |
+
for word in result.words:
|
| 400 |
+
print(f"\n Word: '{word.word_text}'")
|
| 401 |
+
print(f" Anchor: {word.whisper_start:.3f} - {word.whisper_end:.3f}s")
|
| 402 |
+
for phoneme in word.phonemes:
|
| 403 |
+
print(f" [{phoneme.phoneme}] {phoneme.start:.3f} - {phoneme.end:.3f}s")
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
if __name__ == "__main__":
|
| 407 |
+
main()
|
src/duration_model.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Duration Model
|
| 4 |
+
|
| 5 |
+
Calibrates and validates letter durations based on Tajweed rules.
|
| 6 |
+
Works with harakat (beat) counts and reciter-specific speech rates.
|
| 7 |
+
|
| 8 |
+
Key Features:
|
| 9 |
+
- Per-reciter harakat calibration
|
| 10 |
+
- Madd type detection from Quranic context
|
| 11 |
+
- Duration validation against Tajweed expectations
|
| 12 |
+
- Speech rate normalization
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import json
|
| 16 |
+
import numpy as np
|
| 17 |
+
from dataclasses import dataclass, field
|
| 18 |
+
from typing import List, Dict, Optional, Tuple
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from enum import Enum
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class MaddType(Enum):
|
| 24 |
+
NONE = "none"
|
| 25 |
+
ASLI = "asli" # 2 harakat
|
| 26 |
+
WAJIB = "wajib" # 4-5 harakat
|
| 27 |
+
JAIZ = "jaiz" # 2-4-6 harakat (flexible)
|
| 28 |
+
LAZIM = "lazim" # 6 harakat
|
| 29 |
+
LEEN = "leen" # 2-4-6 harakat (soft)
|
| 30 |
+
ARID = "arid" # 2-4-6 harakat (for pause)
|
| 31 |
+
BADAL = "badal" # 2 harakat (substitution)
|
| 32 |
+
SILAH = "silah" # 2 harakat (connection)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class HarakatCalibration:
|
| 37 |
+
"""Per-reciter timing calibration"""
|
| 38 |
+
reciter_name: str
|
| 39 |
+
harakat_base_ms: float = 100.0 # Base beat duration
|
| 40 |
+
speech_rate_wpm: float = 60.0 # Words per minute
|
| 41 |
+
pitch_range_hz: Tuple[float, float] = (80.0, 300.0)
|
| 42 |
+
sample_size: int = 0 # How many samples used for calibration
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@dataclass
|
| 46 |
+
class DurationExpectation:
|
| 47 |
+
"""Expected duration for a Tajweed rule"""
|
| 48 |
+
rule_name: str
|
| 49 |
+
min_harakat: int
|
| 50 |
+
max_harakat: int
|
| 51 |
+
expected_ms_range: Tuple[float, float]
|
| 52 |
+
tolerance: float = 0.25 # 25% tolerance
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class DurationResult:
|
| 57 |
+
"""Result of duration validation"""
|
| 58 |
+
is_valid: bool
|
| 59 |
+
actual_ms: float
|
| 60 |
+
expected_ms: float
|
| 61 |
+
harakat_count: float
|
| 62 |
+
deviation_percent: float
|
| 63 |
+
rule_applied: str
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class DurationModel:
|
| 67 |
+
"""
|
| 68 |
+
Duration model for Tajweed-based timing validation
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
# Default expectations (will be calibrated per reciter)
|
| 72 |
+
DEFAULT_HARAKAT_MS = 100.0
|
| 73 |
+
|
| 74 |
+
# Tajweed duration rules (in harakat counts)
|
| 75 |
+
TAJWEED_DURATIONS = {
|
| 76 |
+
MaddType.ASLI: DurationExpectation("Madd Asli", 2, 2, (150, 280), 0.30),
|
| 77 |
+
MaddType.WAJIB: DurationExpectation("Madd Wajib", 4, 5, (350, 550), 0.25),
|
| 78 |
+
MaddType.LAZIM: DurationExpectation("Madd Lazim", 6, 6, (500, 800), 0.20),
|
| 79 |
+
MaddType.JAIZ: DurationExpectation("Madd Jaiz", 2, 6, (150, 700), 0.30),
|
| 80 |
+
MaddType.ARID: DurationExpectation("Madd Arid", 2, 6, (150, 700), 0.30),
|
| 81 |
+
MaddType.LEEN: DurationExpectation("Madd Leen", 2, 6, (150, 700), 0.30),
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Ghunnah duration
|
| 85 |
+
GHUNNAH_DURATION = DurationExpectation("Ghunnah", 2, 2, (80, 250), 0.30)
|
| 86 |
+
|
| 87 |
+
def __init__(self, lisan_path: Optional[str] = None):
|
| 88 |
+
"""Initialize with optional path to lisan_phonemes.json"""
|
| 89 |
+
self.calibration: Optional[HarakatCalibration] = None
|
| 90 |
+
self.lisan_data: Dict = {}
|
| 91 |
+
|
| 92 |
+
if lisan_path and Path(lisan_path).exists():
|
| 93 |
+
with open(lisan_path, 'r', encoding='utf-8') as f:
|
| 94 |
+
self.lisan_data = json.load(f)
|
| 95 |
+
|
| 96 |
+
def calibrate_from_samples(self,
|
| 97 |
+
reciter_name: str,
|
| 98 |
+
vowel_durations: List[float],
|
| 99 |
+
words_per_minute: float = 60.0) -> HarakatCalibration:
|
| 100 |
+
"""
|
| 101 |
+
Calibrate harakat duration from sample vowel measurements
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
reciter_name: Name of reciter for identification
|
| 105 |
+
vowel_durations: List of short vowel durations in seconds
|
| 106 |
+
words_per_minute: Estimated speech rate
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
HarakatCalibration object
|
| 110 |
+
"""
|
| 111 |
+
if not vowel_durations:
|
| 112 |
+
# Use defaults
|
| 113 |
+
self.calibration = HarakatCalibration(
|
| 114 |
+
reciter_name=reciter_name,
|
| 115 |
+
harakat_base_ms=self.DEFAULT_HARAKAT_MS,
|
| 116 |
+
speech_rate_wpm=words_per_minute,
|
| 117 |
+
sample_size=0
|
| 118 |
+
)
|
| 119 |
+
return self.calibration
|
| 120 |
+
|
| 121 |
+
# Convert to milliseconds and compute median (robust to outliers)
|
| 122 |
+
durations_ms = [d * 1000 for d in vowel_durations]
|
| 123 |
+
harakat_base = np.median(durations_ms)
|
| 124 |
+
|
| 125 |
+
self.calibration = HarakatCalibration(
|
| 126 |
+
reciter_name=reciter_name,
|
| 127 |
+
harakat_base_ms=harakat_base,
|
| 128 |
+
speech_rate_wpm=words_per_minute,
|
| 129 |
+
sample_size=len(vowel_durations)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
return self.calibration
|
| 133 |
+
|
| 134 |
+
def get_expected_duration(self,
|
| 135 |
+
madd_type: MaddType,
|
| 136 |
+
harakat_count: Optional[int] = None) -> Tuple[float, float]:
|
| 137 |
+
"""
|
| 138 |
+
Get expected duration range for a Madd type
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
Tuple of (min_ms, max_ms)
|
| 142 |
+
"""
|
| 143 |
+
if not self.calibration:
|
| 144 |
+
base_ms = self.DEFAULT_HARAKAT_MS
|
| 145 |
+
else:
|
| 146 |
+
base_ms = self.calibration.harakat_base_ms
|
| 147 |
+
|
| 148 |
+
if madd_type in self.TAJWEED_DURATIONS:
|
| 149 |
+
expectation = self.TAJWEED_DURATIONS[madd_type]
|
| 150 |
+
if harakat_count:
|
| 151 |
+
# Use specific harakat count
|
| 152 |
+
center = harakat_count * base_ms
|
| 153 |
+
tolerance = expectation.tolerance
|
| 154 |
+
return (center * (1 - tolerance), center * (1 + tolerance))
|
| 155 |
+
else:
|
| 156 |
+
# Use range from Tajweed rule
|
| 157 |
+
min_ms = expectation.min_harakat * base_ms * (1 - expectation.tolerance)
|
| 158 |
+
max_ms = expectation.max_harakat * base_ms * (1 + expectation.tolerance)
|
| 159 |
+
return (min_ms, max_ms)
|
| 160 |
+
|
| 161 |
+
# Default: 1 harakat
|
| 162 |
+
return (base_ms * 0.7, base_ms * 1.3)
|
| 163 |
+
|
| 164 |
+
def validate_duration(self,
|
| 165 |
+
actual_duration_s: float,
|
| 166 |
+
madd_type: MaddType,
|
| 167 |
+
expected_harakat: int = 2) -> DurationResult:
|
| 168 |
+
"""
|
| 169 |
+
Validate if actual duration matches Tajweed expectation
|
| 170 |
+
|
| 171 |
+
Args:
|
| 172 |
+
actual_duration_s: Actual duration in seconds
|
| 173 |
+
madd_type: Type of Madd rule
|
| 174 |
+
expected_harakat: Expected harakat count
|
| 175 |
+
|
| 176 |
+
Returns:
|
| 177 |
+
DurationResult with validation details
|
| 178 |
+
"""
|
| 179 |
+
actual_ms = actual_duration_s * 1000
|
| 180 |
+
min_ms, max_ms = self.get_expected_duration(madd_type, expected_harakat)
|
| 181 |
+
expected_ms = (min_ms + max_ms) / 2
|
| 182 |
+
|
| 183 |
+
is_valid = min_ms <= actual_ms <= max_ms
|
| 184 |
+
deviation = abs(actual_ms - expected_ms) / expected_ms * 100 if expected_ms > 0 else 0
|
| 185 |
+
|
| 186 |
+
# Calculate actual harakat count
|
| 187 |
+
base_ms = self.calibration.harakat_base_ms if self.calibration else self.DEFAULT_HARAKAT_MS
|
| 188 |
+
harakat_count = actual_ms / base_ms if base_ms > 0 else 0
|
| 189 |
+
|
| 190 |
+
return DurationResult(
|
| 191 |
+
is_valid=is_valid,
|
| 192 |
+
actual_ms=actual_ms,
|
| 193 |
+
expected_ms=expected_ms,
|
| 194 |
+
harakat_count=harakat_count,
|
| 195 |
+
deviation_percent=deviation,
|
| 196 |
+
rule_applied=madd_type.value
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
def validate_ghunnah_duration(self, actual_duration_s: float) -> DurationResult:
|
| 200 |
+
"""Validate Ghunnah duration (2 harakat)"""
|
| 201 |
+
return self.validate_duration(actual_duration_s, MaddType.ASLI, 2)
|
| 202 |
+
|
| 203 |
+
def suggest_correction(self,
|
| 204 |
+
actual_duration_s: float,
|
| 205 |
+
madd_type: MaddType,
|
| 206 |
+
expected_harakat: int = 2) -> Tuple[float, float]:
|
| 207 |
+
"""
|
| 208 |
+
Suggest corrected start/end times based on Tajweed expectations
|
| 209 |
+
|
| 210 |
+
Returns:
|
| 211 |
+
Tuple of (suggested_duration_s, adjustment_s)
|
| 212 |
+
"""
|
| 213 |
+
min_ms, max_ms = self.get_expected_duration(madd_type, expected_harakat)
|
| 214 |
+
actual_ms = actual_duration_s * 1000
|
| 215 |
+
|
| 216 |
+
if actual_ms < min_ms:
|
| 217 |
+
# Too short - suggest minimum
|
| 218 |
+
suggested_ms = min_ms
|
| 219 |
+
elif actual_ms > max_ms:
|
| 220 |
+
# Too long - suggest maximum
|
| 221 |
+
suggested_ms = max_ms
|
| 222 |
+
else:
|
| 223 |
+
# Already valid
|
| 224 |
+
suggested_ms = actual_ms
|
| 225 |
+
|
| 226 |
+
adjustment_ms = suggested_ms - actual_ms
|
| 227 |
+
return (suggested_ms / 1000, adjustment_ms / 1000)
|
| 228 |
+
|
| 229 |
+
def detect_madd_type_from_context(self,
|
| 230 |
+
current_letter: str,
|
| 231 |
+
next_letter: Optional[str],
|
| 232 |
+
next_harakat: Optional[str],
|
| 233 |
+
is_word_end: bool,
|
| 234 |
+
is_waqf: bool = False) -> MaddType:
|
| 235 |
+
"""
|
| 236 |
+
Auto-detect Madd type from Quranic text context
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
current_letter: The Madd letter (ا و ي)
|
| 240 |
+
next_letter: Following letter (if any)
|
| 241 |
+
next_harakat: Harakat on next letter
|
| 242 |
+
is_word_end: Whether this is at word boundary
|
| 243 |
+
is_waqf: Whether reciter is pausing here
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
Detected MaddType
|
| 247 |
+
"""
|
| 248 |
+
SUKUN = '\u0652'
|
| 249 |
+
SHADDA = '\u0651'
|
| 250 |
+
|
| 251 |
+
# If at end with pause
|
| 252 |
+
if is_waqf and is_word_end:
|
| 253 |
+
return MaddType.ARID # Flexible 2-4-6
|
| 254 |
+
|
| 255 |
+
# Check for Madd Lazim (Sukun or Shadda follows)
|
| 256 |
+
if next_harakat:
|
| 257 |
+
if SHADDA in next_harakat or SUKUN in next_harakat:
|
| 258 |
+
return MaddType.LAZIM
|
| 259 |
+
|
| 260 |
+
# Check for Madd Wajib (Hamza in same word follows)
|
| 261 |
+
if next_letter and next_letter in 'ءأإؤئ':
|
| 262 |
+
return MaddType.WAJIB
|
| 263 |
+
|
| 264 |
+
# Default: Madd Asli (natural 2 harakat)
|
| 265 |
+
return MaddType.ASLI
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def main():
|
| 269 |
+
"""Test duration model"""
|
| 270 |
+
print("=" * 50)
|
| 271 |
+
print("TajweedSST Duration Model Test")
|
| 272 |
+
print("=" * 50)
|
| 273 |
+
|
| 274 |
+
model = DurationModel()
|
| 275 |
+
|
| 276 |
+
# Calibrate with sample data (simulated short vowels ~100ms each)
|
| 277 |
+
sample_vowels = [0.095, 0.105, 0.098, 0.102, 0.100, 0.103, 0.097]
|
| 278 |
+
calibration = model.calibrate_from_samples("Abdul_Basit", sample_vowels)
|
| 279 |
+
|
| 280 |
+
print(f"\nCalibration for {calibration.reciter_name}:")
|
| 281 |
+
print(f" Harakat base: {calibration.harakat_base_ms:.1f} ms")
|
| 282 |
+
print(f" Sample size: {calibration.sample_size}")
|
| 283 |
+
|
| 284 |
+
# Test duration validation
|
| 285 |
+
print("\nDuration Validation Tests:")
|
| 286 |
+
|
| 287 |
+
# Madd Asli (2 harakat ~ 200ms)
|
| 288 |
+
result = model.validate_duration(0.195, MaddType.ASLI, 2)
|
| 289 |
+
print(f"\n Madd Asli (0.195s):")
|
| 290 |
+
print(f" Valid: {result.is_valid}")
|
| 291 |
+
print(f" Harakat: {result.harakat_count:.1f}")
|
| 292 |
+
print(f" Deviation: {result.deviation_percent:.1f}%")
|
| 293 |
+
|
| 294 |
+
# Madd Lazim (6 harakat ~ 600ms)
|
| 295 |
+
result = model.validate_duration(0.580, MaddType.LAZIM, 6)
|
| 296 |
+
print(f"\n Madd Lazim (0.580s):")
|
| 297 |
+
print(f" Valid: {result.is_valid}")
|
| 298 |
+
print(f" Harakat: {result.harakat_count:.1f}")
|
| 299 |
+
print(f" Deviation: {result.deviation_percent:.1f}%")
|
| 300 |
+
|
| 301 |
+
# Test Madd type detection
|
| 302 |
+
print("\nMadd Type Detection:")
|
| 303 |
+
detected = model.detect_madd_type_from_context('ا', 'ء', None, False, False)
|
| 304 |
+
print(f" ا before ء: {detected.value}")
|
| 305 |
+
|
| 306 |
+
detected = model.detect_madd_type_from_context('ا', 'ب', '\u0651', False, False)
|
| 307 |
+
print(f" ا before بّ: {detected.value}")
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
if __name__ == "__main__":
|
| 311 |
+
main()
|
src/lisan_phonemes.json
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"meta": {
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"description": "Lisan al-Arab Digital Phonemes - Arabic letter physics for Tajweed validation",
|
| 5 |
+
"source": "Articulatory Phonetics + Classical Tajweed Rules"
|
| 6 |
+
},
|
| 7 |
+
"consonants": {
|
| 8 |
+
"ء": {
|
| 9 |
+
"type": "stop",
|
| 10 |
+
"place": "glottal",
|
| 11 |
+
"voiced": false,
|
| 12 |
+
"tafkheem": false
|
| 13 |
+
},
|
| 14 |
+
"ب": {
|
| 15 |
+
"type": "stop",
|
| 16 |
+
"place": "bilabial",
|
| 17 |
+
"voiced": true,
|
| 18 |
+
"tafkheem": false,
|
| 19 |
+
"qalqalah": true
|
| 20 |
+
},
|
| 21 |
+
"ت": {
|
| 22 |
+
"type": "stop",
|
| 23 |
+
"place": "dental",
|
| 24 |
+
"voiced": false,
|
| 25 |
+
"tafkheem": false
|
| 26 |
+
},
|
| 27 |
+
"ث": {
|
| 28 |
+
"type": "fricative",
|
| 29 |
+
"place": "dental",
|
| 30 |
+
"voiced": false,
|
| 31 |
+
"tafkheem": false,
|
| 32 |
+
"freq_range": [
|
| 33 |
+
4000,
|
| 34 |
+
8000
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
"ج": {
|
| 38 |
+
"type": "affricate",
|
| 39 |
+
"place": "palatal",
|
| 40 |
+
"voiced": true,
|
| 41 |
+
"tafkheem": false,
|
| 42 |
+
"qalqalah": true
|
| 43 |
+
},
|
| 44 |
+
"ح": {
|
| 45 |
+
"type": "fricative",
|
| 46 |
+
"place": "pharyngeal",
|
| 47 |
+
"voiced": false,
|
| 48 |
+
"tafkheem": false
|
| 49 |
+
},
|
| 50 |
+
"خ": {
|
| 51 |
+
"type": "fricative",
|
| 52 |
+
"place": "velar",
|
| 53 |
+
"voiced": false,
|
| 54 |
+
"tafkheem": true,
|
| 55 |
+
"freq_range": [
|
| 56 |
+
1500,
|
| 57 |
+
3000
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
"د": {
|
| 61 |
+
"type": "stop",
|
| 62 |
+
"place": "dental",
|
| 63 |
+
"voiced": true,
|
| 64 |
+
"tafkheem": false,
|
| 65 |
+
"qalqalah": true
|
| 66 |
+
},
|
| 67 |
+
"ذ": {
|
| 68 |
+
"type": "fricative",
|
| 69 |
+
"place": "dental",
|
| 70 |
+
"voiced": true,
|
| 71 |
+
"tafkheem": false
|
| 72 |
+
},
|
| 73 |
+
"ر": {
|
| 74 |
+
"type": "trill",
|
| 75 |
+
"place": "alveolar",
|
| 76 |
+
"voiced": true,
|
| 77 |
+
"tafkheem": "context"
|
| 78 |
+
},
|
| 79 |
+
"ز": {
|
| 80 |
+
"type": "fricative",
|
| 81 |
+
"place": "alveolar",
|
| 82 |
+
"voiced": true,
|
| 83 |
+
"tafkheem": false,
|
| 84 |
+
"freq_range": [
|
| 85 |
+
3500,
|
| 86 |
+
6000
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
"س": {
|
| 90 |
+
"type": "fricative",
|
| 91 |
+
"place": "alveolar",
|
| 92 |
+
"voiced": false,
|
| 93 |
+
"tafkheem": false,
|
| 94 |
+
"freq_range": [
|
| 95 |
+
4000,
|
| 96 |
+
8000
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
"ش": {
|
| 100 |
+
"type": "fricative",
|
| 101 |
+
"place": "palatal",
|
| 102 |
+
"voiced": false,
|
| 103 |
+
"tafkheem": false,
|
| 104 |
+
"freq_range": [
|
| 105 |
+
2500,
|
| 106 |
+
6000
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
"ص": {
|
| 110 |
+
"type": "fricative",
|
| 111 |
+
"place": "alveolar",
|
| 112 |
+
"voiced": false,
|
| 113 |
+
"tafkheem": true,
|
| 114 |
+
"freq_range": [
|
| 115 |
+
3500,
|
| 116 |
+
7000
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
"ض": {
|
| 120 |
+
"type": "stop",
|
| 121 |
+
"place": "dental",
|
| 122 |
+
"voiced": true,
|
| 123 |
+
"tafkheem": true
|
| 124 |
+
},
|
| 125 |
+
"ط": {
|
| 126 |
+
"type": "stop",
|
| 127 |
+
"place": "dental",
|
| 128 |
+
"voiced": false,
|
| 129 |
+
"tafkheem": true,
|
| 130 |
+
"qalqalah": true
|
| 131 |
+
},
|
| 132 |
+
"ظ": {
|
| 133 |
+
"type": "fricative",
|
| 134 |
+
"place": "dental",
|
| 135 |
+
"voiced": true,
|
| 136 |
+
"tafkheem": true
|
| 137 |
+
},
|
| 138 |
+
"ع": {
|
| 139 |
+
"type": "fricative",
|
| 140 |
+
"place": "pharyngeal",
|
| 141 |
+
"voiced": true,
|
| 142 |
+
"tafkheem": false
|
| 143 |
+
},
|
| 144 |
+
"غ": {
|
| 145 |
+
"type": "fricative",
|
| 146 |
+
"place": "velar",
|
| 147 |
+
"voiced": true,
|
| 148 |
+
"tafkheem": true
|
| 149 |
+
},
|
| 150 |
+
"ف": {
|
| 151 |
+
"type": "fricative",
|
| 152 |
+
"place": "labiodental",
|
| 153 |
+
"voiced": false,
|
| 154 |
+
"tafkheem": false,
|
| 155 |
+
"freq_range": [
|
| 156 |
+
3000,
|
| 157 |
+
6000
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
"ق": {
|
| 161 |
+
"type": "stop",
|
| 162 |
+
"place": "uvular",
|
| 163 |
+
"voiced": false,
|
| 164 |
+
"tafkheem": true,
|
| 165 |
+
"qalqalah": true
|
| 166 |
+
},
|
| 167 |
+
"ك": {
|
| 168 |
+
"type": "stop",
|
| 169 |
+
"place": "velar",
|
| 170 |
+
"voiced": false,
|
| 171 |
+
"tafkheem": false
|
| 172 |
+
},
|
| 173 |
+
"ل": {
|
| 174 |
+
"type": "lateral",
|
| 175 |
+
"place": "alveolar",
|
| 176 |
+
"voiced": true,
|
| 177 |
+
"tafkheem": "allah_context"
|
| 178 |
+
},
|
| 179 |
+
"م": {
|
| 180 |
+
"type": "nasal",
|
| 181 |
+
"place": "bilabial",
|
| 182 |
+
"voiced": true,
|
| 183 |
+
"tafkheem": false,
|
| 184 |
+
"ghunnah_capable": true
|
| 185 |
+
},
|
| 186 |
+
"ن": {
|
| 187 |
+
"type": "nasal",
|
| 188 |
+
"place": "alveolar",
|
| 189 |
+
"voiced": true,
|
| 190 |
+
"tafkheem": false,
|
| 191 |
+
"ghunnah_capable": true
|
| 192 |
+
},
|
| 193 |
+
"ه": {
|
| 194 |
+
"type": "fricative",
|
| 195 |
+
"place": "glottal",
|
| 196 |
+
"voiced": false,
|
| 197 |
+
"tafkheem": false
|
| 198 |
+
},
|
| 199 |
+
"و": {
|
| 200 |
+
"type": "approximant",
|
| 201 |
+
"place": "bilabial",
|
| 202 |
+
"voiced": true,
|
| 203 |
+
"tafkheem": false
|
| 204 |
+
},
|
| 205 |
+
"ي": {
|
| 206 |
+
"type": "approximant",
|
| 207 |
+
"place": "palatal",
|
| 208 |
+
"voiced": true,
|
| 209 |
+
"tafkheem": false
|
| 210 |
+
}
|
| 211 |
+
},
|
| 212 |
+
"physics_signatures": {
|
| 213 |
+
"stop": {
|
| 214 |
+
"description": "Complete oral closure followed by burst release",
|
| 215 |
+
"detection": "silence_then_burst",
|
| 216 |
+
"metrics": [
|
| 217 |
+
"rms_dip",
|
| 218 |
+
"rms_spike",
|
| 219 |
+
"closure_duration_ms"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
"fricative": {
|
| 223 |
+
"description": "Continuous turbulent airflow through narrow constriction",
|
| 224 |
+
"detection": "high_frequency_noise",
|
| 225 |
+
"metrics": [
|
| 226 |
+
"spectral_centroid",
|
| 227 |
+
"zcr",
|
| 228 |
+
"noise_band_energy"
|
| 229 |
+
]
|
| 230 |
+
},
|
| 231 |
+
"nasal": {
|
| 232 |
+
"description": "Airflow through nasal cavity with oral closure",
|
| 233 |
+
"detection": "nasal_formant",
|
| 234 |
+
"metrics": [
|
| 235 |
+
"f1_nasal_peak",
|
| 236 |
+
"antiformant_250hz",
|
| 237 |
+
"pitch_stability"
|
| 238 |
+
]
|
| 239 |
+
},
|
| 240 |
+
"trill": {
|
| 241 |
+
"description": "Rapid vibration of articulator",
|
| 242 |
+
"detection": "periodic_amplitude_modulation",
|
| 243 |
+
"metrics": [
|
| 244 |
+
"modulation_rate_hz",
|
| 245 |
+
"periodicity"
|
| 246 |
+
]
|
| 247 |
+
},
|
| 248 |
+
"approximant": {
|
| 249 |
+
"description": "Smooth airflow with minimal constriction",
|
| 250 |
+
"detection": "formant_transition",
|
| 251 |
+
"metrics": [
|
| 252 |
+
"f1_f2_trajectory",
|
| 253 |
+
"voicing_continuity"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
"lateral": {
|
| 257 |
+
"description": "Airflow around tongue sides",
|
| 258 |
+
"detection": "lateral_formant_pattern",
|
| 259 |
+
"metrics": [
|
| 260 |
+
"f2_f3_proximity"
|
| 261 |
+
]
|
| 262 |
+
}
|
| 263 |
+
},
|
| 264 |
+
"tajweed_rules": {
|
| 265 |
+
"qalqalah": {
|
| 266 |
+
"letters": [
|
| 267 |
+
"ق",
|
| 268 |
+
"ط",
|
| 269 |
+
"ب",
|
| 270 |
+
"ج",
|
| 271 |
+
"د"
|
| 272 |
+
],
|
| 273 |
+
"physics": "silence_then_burst",
|
| 274 |
+
"expected": {
|
| 275 |
+
"dip_threshold": 0.3,
|
| 276 |
+
"spike_threshold": 0.5
|
| 277 |
+
},
|
| 278 |
+
"duration_bonus_ms": 50
|
| 279 |
+
},
|
| 280 |
+
"madd_asli": {
|
| 281 |
+
"harakat": 2,
|
| 282 |
+
"expected_ms_range": [
|
| 283 |
+
120,
|
| 284 |
+
280
|
| 285 |
+
],
|
| 286 |
+
"tolerance": 0.3
|
| 287 |
+
},
|
| 288 |
+
"madd_wajib": {
|
| 289 |
+
"harakat": 4,
|
| 290 |
+
"expected_ms_range": [
|
| 291 |
+
240,
|
| 292 |
+
500
|
| 293 |
+
],
|
| 294 |
+
"tolerance": 0.25
|
| 295 |
+
},
|
| 296 |
+
"madd_lazim": {
|
| 297 |
+
"harakat": 6,
|
| 298 |
+
"expected_ms_range": [
|
| 299 |
+
400,
|
| 300 |
+
800
|
| 301 |
+
],
|
| 302 |
+
"tolerance": 0.2
|
| 303 |
+
},
|
| 304 |
+
"ghunnah": {
|
| 305 |
+
"letters": [
|
| 306 |
+
"ن",
|
| 307 |
+
"م"
|
| 308 |
+
],
|
| 309 |
+
"harakat": 2,
|
| 310 |
+
"expected_ms_range": [
|
| 311 |
+
80,
|
| 312 |
+
250
|
| 313 |
+
],
|
| 314 |
+
"physics": "nasal_formant",
|
| 315 |
+
"pitch_stability_min": 0.7
|
| 316 |
+
},
|
| 317 |
+
"idgham_full": {
|
| 318 |
+
"description": "Complete merger, source letter disappears",
|
| 319 |
+
"trigger_letters": [
|
| 320 |
+
"ر",
|
| 321 |
+
"ل"
|
| 322 |
+
],
|
| 323 |
+
"physics": "energy_continuity",
|
| 324 |
+
"expected": {
|
| 325 |
+
"boundary_sharpness": "low",
|
| 326 |
+
"transition_smoothness": "high"
|
| 327 |
+
}
|
| 328 |
+
},
|
| 329 |
+
"idgham_partial": {
|
| 330 |
+
"description": "Partial merger with ghunnah preserved",
|
| 331 |
+
"trigger_letters": [
|
| 332 |
+
"ي",
|
| 333 |
+
"ن",
|
| 334 |
+
"م",
|
| 335 |
+
"و"
|
| 336 |
+
],
|
| 337 |
+
"physics": "nasal_formant_during_merge",
|
| 338 |
+
"expected": {
|
| 339 |
+
"ghunnah_present": true,
|
| 340 |
+
"transition_smoothness": "medium"
|
| 341 |
+
}
|
| 342 |
+
},
|
| 343 |
+
"ikhfa": {
|
| 344 |
+
"description": "Concealment with partial nasalization",
|
| 345 |
+
"trigger_letters": [
|
| 346 |
+
"ت",
|
| 347 |
+
"ث",
|
| 348 |
+
"ج",
|
| 349 |
+
"د",
|
| 350 |
+
"ذ",
|
| 351 |
+
"ز",
|
| 352 |
+
"س",
|
| 353 |
+
"ش",
|
| 354 |
+
"ص",
|
| 355 |
+
"ض",
|
| 356 |
+
"ط",
|
| 357 |
+
"ظ",
|
| 358 |
+
"ف",
|
| 359 |
+
"ق",
|
| 360 |
+
"ك"
|
| 361 |
+
],
|
| 362 |
+
"physics": "gradual_nasalization",
|
| 363 |
+
"expected": {
|
| 364 |
+
"nasalization_gradient": true,
|
| 365 |
+
"transition_ms": [
|
| 366 |
+
50,
|
| 367 |
+
150
|
| 368 |
+
]
|
| 369 |
+
}
|
| 370 |
+
},
|
| 371 |
+
"iqlab": {
|
| 372 |
+
"description": "Nun becomes Mim before Ba",
|
| 373 |
+
"trigger": "ن_before_ب",
|
| 374 |
+
"physics": "bilabial_nasal",
|
| 375 |
+
"expected": {
|
| 376 |
+
"lip_closure": true,
|
| 377 |
+
"nasal_formant": true
|
| 378 |
+
}
|
| 379 |
+
},
|
| 380 |
+
"izhar": {
|
| 381 |
+
"description": "Clear pronunciation without modification",
|
| 382 |
+
"trigger_letters": [
|
| 383 |
+
"ء",
|
| 384 |
+
"ه",
|
| 385 |
+
"ع",
|
| 386 |
+
"ح",
|
| 387 |
+
"غ",
|
| 388 |
+
"خ"
|
| 389 |
+
],
|
| 390 |
+
"physics": "clean_boundary",
|
| 391 |
+
"expected": {
|
| 392 |
+
"boundary_sharpness": "high",
|
| 393 |
+
"nasalization": false
|
| 394 |
+
}
|
| 395 |
+
},
|
| 396 |
+
"tafkheem": {
|
| 397 |
+
"letters": [
|
| 398 |
+
"خ",
|
| 399 |
+
"ص",
|
| 400 |
+
"ض",
|
| 401 |
+
"غ",
|
| 402 |
+
"ط",
|
| 403 |
+
"ق",
|
| 404 |
+
"ظ"
|
| 405 |
+
],
|
| 406 |
+
"physics": "f2_depression",
|
| 407 |
+
"expected": {
|
| 408 |
+
"f2_max_hz": 1200
|
| 409 |
+
}
|
| 410 |
+
},
|
| 411 |
+
"tarqeeq": {
|
| 412 |
+
"description": "Light pronunciation (opposite of tafkheem)",
|
| 413 |
+
"physics": "f2_elevation",
|
| 414 |
+
"expected": {
|
| 415 |
+
"f2_min_hz": 1400
|
| 416 |
+
}
|
| 417 |
+
},
|
| 418 |
+
"sakt": {
|
| 419 |
+
"description": "Brief pause without breath",
|
| 420 |
+
"physics": "silence_detection",
|
| 421 |
+
"expected": {
|
| 422 |
+
"duration_ms_range": [
|
| 423 |
+
50,
|
| 424 |
+
200
|
| 425 |
+
],
|
| 426 |
+
"rms_threshold": 0.05
|
| 427 |
+
}
|
| 428 |
+
}
|
| 429 |
+
},
|
| 430 |
+
"reciter_calibration": {
|
| 431 |
+
"description": "Per-reciter parameters calibrated from sample",
|
| 432 |
+
"parameters": {
|
| 433 |
+
"harakat_base_ms": "Calibrate from short vowels",
|
| 434 |
+
"speech_rate": "Words per minute",
|
| 435 |
+
"pitch_range_hz": "Min/max F0"
|
| 436 |
+
}
|
| 437 |
+
}
|
| 438 |
+
}
|
src/mfa_refiner.py
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - MFA Refiner Post-Processor
|
| 4 |
+
|
| 5 |
+
Refines wav2vec/MFA alignments using Tajweed physics validation.
|
| 6 |
+
This is the main integration layer that combines:
|
| 7 |
+
1. Tajweed Parser (text → rules)
|
| 8 |
+
2. Physics Validators (audio → boundaries)
|
| 9 |
+
3. Duration Model (timing → corrections)
|
| 10 |
+
|
| 11 |
+
Output: Refined alignment JSON with confidence scores.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import numpy as np
|
| 16 |
+
from dataclasses import dataclass, asdict
|
| 17 |
+
from typing import List, Dict, Optional, Tuple
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
# Import TajweedSST components
|
| 21 |
+
from .tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 22 |
+
from .physics_validator import PhysicsValidator, ValidationStatus
|
| 23 |
+
from .duration_model import DurationModel, MaddType
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class RefinedLetter:
|
| 28 |
+
"""A letter with refined timing and confidence"""
|
| 29 |
+
letter: str
|
| 30 |
+
phoneme: str
|
| 31 |
+
original_start: float
|
| 32 |
+
original_end: float
|
| 33 |
+
refined_start: float
|
| 34 |
+
refined_end: float
|
| 35 |
+
tajweed_rule: str
|
| 36 |
+
physics_score: float
|
| 37 |
+
duration_valid: bool
|
| 38 |
+
confidence: float
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class RefinedWord:
|
| 43 |
+
"""A word with refined letter timings"""
|
| 44 |
+
word_text: str
|
| 45 |
+
start: float
|
| 46 |
+
end: float
|
| 47 |
+
letters: List[RefinedLetter]
|
| 48 |
+
average_confidence: float
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@dataclass
|
| 52 |
+
class RefinementResult:
|
| 53 |
+
"""Complete refinement result for an audio segment"""
|
| 54 |
+
audio_path: str
|
| 55 |
+
original_alignment_path: str
|
| 56 |
+
words: List[RefinedWord]
|
| 57 |
+
overall_confidence: float
|
| 58 |
+
statistics: Dict
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class MFARefiner:
|
| 62 |
+
"""
|
| 63 |
+
Post-processor that refines MFA/wav2vec alignments using Tajweed physics
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def __init__(self,
|
| 67 |
+
lisan_path: Optional[str] = None,
|
| 68 |
+
sample_rate: int = 22050):
|
| 69 |
+
"""
|
| 70 |
+
Initialize the refiner with Tajweed components
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
lisan_path: Path to lisan_phonemes.json
|
| 74 |
+
sample_rate: Audio sample rate
|
| 75 |
+
"""
|
| 76 |
+
self.parser = TajweedParser()
|
| 77 |
+
self.physics = PhysicsValidator(sample_rate=sample_rate)
|
| 78 |
+
self.duration_model = DurationModel(lisan_path)
|
| 79 |
+
self.sample_rate = sample_rate
|
| 80 |
+
|
| 81 |
+
# Load Lisan data if available
|
| 82 |
+
if lisan_path and Path(lisan_path).exists():
|
| 83 |
+
with open(lisan_path, 'r', encoding='utf-8') as f:
|
| 84 |
+
self.lisan_data = json.load(f)
|
| 85 |
+
else:
|
| 86 |
+
self.lisan_data = {}
|
| 87 |
+
|
| 88 |
+
def refine_alignment(self,
|
| 89 |
+
audio_path: str,
|
| 90 |
+
alignment_json: Dict,
|
| 91 |
+
quran_text: str) -> RefinementResult:
|
| 92 |
+
"""
|
| 93 |
+
Refine an MFA/wav2vec alignment using Tajweed physics
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
audio_path: Path to audio file
|
| 97 |
+
alignment_json: Original alignment (word/phoneme timings)
|
| 98 |
+
quran_text: Original Quranic text (Uthmani)
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
RefinementResult with refined timings and confidence scores
|
| 102 |
+
"""
|
| 103 |
+
# Load audio
|
| 104 |
+
audio = self.physics.load_audio(audio_path)
|
| 105 |
+
|
| 106 |
+
# Parse Tajweed rules from text
|
| 107 |
+
word_tags = self.parser.parse_text(quran_text)
|
| 108 |
+
|
| 109 |
+
# Calibrate duration model from alignment
|
| 110 |
+
self._calibrate_from_alignment(audio, alignment_json)
|
| 111 |
+
|
| 112 |
+
# Process each word
|
| 113 |
+
refined_words = []
|
| 114 |
+
all_scores = []
|
| 115 |
+
|
| 116 |
+
alignment_words = alignment_json.get('words', alignment_json.get('segments', []))
|
| 117 |
+
|
| 118 |
+
for i, (word_align, word_tag) in enumerate(zip(alignment_words, word_tags)):
|
| 119 |
+
refined_word = self._refine_word(
|
| 120 |
+
audio=audio,
|
| 121 |
+
word_alignment=word_align,
|
| 122 |
+
word_tags=word_tag,
|
| 123 |
+
word_index=i
|
| 124 |
+
)
|
| 125 |
+
refined_words.append(refined_word)
|
| 126 |
+
all_scores.append(refined_word.average_confidence)
|
| 127 |
+
|
| 128 |
+
# Calculate statistics
|
| 129 |
+
overall_confidence = np.mean(all_scores) if all_scores else 0.0
|
| 130 |
+
|
| 131 |
+
stats = {
|
| 132 |
+
"total_words": len(refined_words),
|
| 133 |
+
"total_letters": sum(len(w.letters) for w in refined_words),
|
| 134 |
+
"average_physics_score": np.mean([
|
| 135 |
+
l.physics_score
|
| 136 |
+
for w in refined_words
|
| 137 |
+
for l in w.letters
|
| 138 |
+
]) if refined_words else 0.0,
|
| 139 |
+
"duration_valid_percent": np.mean([
|
| 140 |
+
l.duration_valid
|
| 141 |
+
for w in refined_words
|
| 142 |
+
for l in w.letters
|
| 143 |
+
]) * 100 if refined_words else 0.0
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
return RefinementResult(
|
| 147 |
+
audio_path=audio_path,
|
| 148 |
+
original_alignment_path="",
|
| 149 |
+
words=refined_words,
|
| 150 |
+
overall_confidence=overall_confidence,
|
| 151 |
+
statistics=stats
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
def _calibrate_from_alignment(self, audio: np.ndarray, alignment: Dict):
|
| 155 |
+
"""Calibrate duration model from existing alignment"""
|
| 156 |
+
# Extract short vowel durations for calibration
|
| 157 |
+
vowel_segments = []
|
| 158 |
+
words = alignment.get('words', alignment.get('segments', []))
|
| 159 |
+
|
| 160 |
+
for word in words:
|
| 161 |
+
phonemes = word.get('phonemes', word.get('chars', []))
|
| 162 |
+
for phoneme in phonemes:
|
| 163 |
+
# Look for short vowels (single character, short duration)
|
| 164 |
+
p_text = phoneme.get('text', phoneme.get('char', ''))
|
| 165 |
+
p_start = phoneme.get('start', 0)
|
| 166 |
+
p_end = phoneme.get('end', 0)
|
| 167 |
+
duration = p_end - p_start
|
| 168 |
+
|
| 169 |
+
# Short vowels are typically 50-150ms
|
| 170 |
+
if 0.05 <= duration <= 0.15:
|
| 171 |
+
vowel_segments.append(duration)
|
| 172 |
+
|
| 173 |
+
# Calibrate
|
| 174 |
+
if vowel_segments:
|
| 175 |
+
self.duration_model.calibrate_from_samples(
|
| 176 |
+
reciter_name="auto_calibrated",
|
| 177 |
+
vowel_durations=vowel_segments
|
| 178 |
+
)
|
| 179 |
+
self.physics.calibrate_average_vowel(
|
| 180 |
+
audio,
|
| 181 |
+
[(0, d) for d in vowel_segments]
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
def _refine_word(self,
|
| 185 |
+
audio: np.ndarray,
|
| 186 |
+
word_alignment: Dict,
|
| 187 |
+
word_tags,
|
| 188 |
+
word_index: int) -> RefinedWord:
|
| 189 |
+
"""Refine a single word's letter timings"""
|
| 190 |
+
refined_letters = []
|
| 191 |
+
|
| 192 |
+
word_start = word_alignment.get('start', 0)
|
| 193 |
+
word_end = word_alignment.get('end', 0)
|
| 194 |
+
|
| 195 |
+
# Get phoneme/character alignments
|
| 196 |
+
phonemes = word_alignment.get('phonemes',
|
| 197 |
+
word_alignment.get('chars',
|
| 198 |
+
word_alignment.get('letters', [])))
|
| 199 |
+
|
| 200 |
+
# Match phonemes to letter tags
|
| 201 |
+
for j, letter_tag in enumerate(word_tags.letters):
|
| 202 |
+
# Find corresponding phoneme timing
|
| 203 |
+
if j < len(phonemes):
|
| 204 |
+
phoneme = phonemes[j]
|
| 205 |
+
orig_start = phoneme.get('start', word_start)
|
| 206 |
+
orig_end = phoneme.get('end', word_end)
|
| 207 |
+
else:
|
| 208 |
+
# Estimate timing if no phoneme data
|
| 209 |
+
letter_duration = (word_end - word_start) / len(word_tags.letters)
|
| 210 |
+
orig_start = word_start + j * letter_duration
|
| 211 |
+
orig_end = orig_start + letter_duration
|
| 212 |
+
|
| 213 |
+
# Run physics validation based on Tajweed type
|
| 214 |
+
physics_score, refined_start, refined_end = self._validate_and_refine(
|
| 215 |
+
audio=audio,
|
| 216 |
+
letter_tag=letter_tag,
|
| 217 |
+
start=orig_start,
|
| 218 |
+
end=orig_end,
|
| 219 |
+
next_start=phonemes[j+1].get('start') if j+1 < len(phonemes) else None
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Validate duration
|
| 223 |
+
duration_valid = self._check_duration(
|
| 224 |
+
letter_tag=letter_tag,
|
| 225 |
+
start=refined_start,
|
| 226 |
+
end=refined_end
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Calculate confidence
|
| 230 |
+
confidence = (physics_score + (1.0 if duration_valid else 0.5)) / 2
|
| 231 |
+
|
| 232 |
+
refined_letters.append(RefinedLetter(
|
| 233 |
+
letter=letter_tag.char_visual,
|
| 234 |
+
phoneme=letter_tag.char_phonetic,
|
| 235 |
+
original_start=orig_start,
|
| 236 |
+
original_end=orig_end,
|
| 237 |
+
refined_start=refined_start,
|
| 238 |
+
refined_end=refined_end,
|
| 239 |
+
tajweed_rule=letter_tag.tajweed_type.value,
|
| 240 |
+
physics_score=physics_score,
|
| 241 |
+
duration_valid=duration_valid,
|
| 242 |
+
confidence=confidence
|
| 243 |
+
))
|
| 244 |
+
|
| 245 |
+
avg_confidence = np.mean([l.confidence for l in refined_letters]) if refined_letters else 0.0
|
| 246 |
+
|
| 247 |
+
# Adjust word boundaries based on refined letters
|
| 248 |
+
if refined_letters:
|
| 249 |
+
word_start = refined_letters[0].refined_start
|
| 250 |
+
word_end = refined_letters[-1].refined_end
|
| 251 |
+
|
| 252 |
+
return RefinedWord(
|
| 253 |
+
word_text=word_tags.word_text,
|
| 254 |
+
start=word_start,
|
| 255 |
+
end=word_end,
|
| 256 |
+
letters=refined_letters,
|
| 257 |
+
average_confidence=avg_confidence
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
def _validate_and_refine(self,
|
| 261 |
+
audio: np.ndarray,
|
| 262 |
+
letter_tag,
|
| 263 |
+
start: float,
|
| 264 |
+
end: float,
|
| 265 |
+
next_start: Optional[float]) -> Tuple[float, float, float]:
|
| 266 |
+
"""
|
| 267 |
+
Run appropriate physics validator and suggest refined boundaries
|
| 268 |
+
|
| 269 |
+
Returns:
|
| 270 |
+
Tuple of (physics_score, refined_start, refined_end)
|
| 271 |
+
"""
|
| 272 |
+
physics_score = 0.5 # Default neutral score
|
| 273 |
+
refined_start = start
|
| 274 |
+
refined_end = end
|
| 275 |
+
|
| 276 |
+
# Select validator based on physics check type
|
| 277 |
+
check_type = letter_tag.physics_check
|
| 278 |
+
|
| 279 |
+
if check_type == PhysicsCheck.CHECK_RMS_BOUNCE:
|
| 280 |
+
# Qalqalah - look for dip→spike
|
| 281 |
+
result = self.physics.validate_qalqalah(audio, start, end)
|
| 282 |
+
physics_score = result.score
|
| 283 |
+
|
| 284 |
+
elif check_type == PhysicsCheck.CHECK_DURATION:
|
| 285 |
+
# Madd or Idgham - duration based
|
| 286 |
+
madd_count = letter_tag.madd_count if hasattr(letter_tag, 'madd_count') else 2
|
| 287 |
+
result = self.physics.validate_madd(audio, start, end, madd_count)
|
| 288 |
+
physics_score = result.score
|
| 289 |
+
|
| 290 |
+
elif check_type == PhysicsCheck.CHECK_GHUNNAH:
|
| 291 |
+
# Ghunnah, Ikhfa, Iqlab - nasal detection
|
| 292 |
+
tajweed_type = letter_tag.tajweed_type
|
| 293 |
+
|
| 294 |
+
if tajweed_type == TajweedType.IKHFA:
|
| 295 |
+
result = self.physics.validate_ikhfa(audio, start, end)
|
| 296 |
+
elif tajweed_type == TajweedType.IQLAB:
|
| 297 |
+
result = self.physics.validate_iqlab(audio, start, end)
|
| 298 |
+
else:
|
| 299 |
+
result = self.physics.validate_ghunnah(audio, start, end)
|
| 300 |
+
physics_score = result.score
|
| 301 |
+
|
| 302 |
+
elif check_type == PhysicsCheck.CHECK_FORMANT_F2:
|
| 303 |
+
# Tafkheem or Tarqeeq
|
| 304 |
+
if letter_tag.tajweed_type == TajweedType.TAFKHEEM:
|
| 305 |
+
result = self.physics.validate_tafkheem(audio, start, end)
|
| 306 |
+
else:
|
| 307 |
+
result = self.physics.validate_tarqeeq(audio, start, end)
|
| 308 |
+
physics_score = result.score
|
| 309 |
+
|
| 310 |
+
# For Idgham, check energy continuity
|
| 311 |
+
if letter_tag.tajweed_type in [TajweedType.IDGHAM_FULL, TajweedType.IDGHAM_PARTIAL]:
|
| 312 |
+
if next_start:
|
| 313 |
+
has_ghunnah = letter_tag.tajweed_type == TajweedType.IDGHAM_PARTIAL
|
| 314 |
+
result = self.physics.validate_idgham(
|
| 315 |
+
audio, start, end, next_start, has_ghunnah
|
| 316 |
+
)
|
| 317 |
+
physics_score = result.score
|
| 318 |
+
|
| 319 |
+
# For Izhar, check clean boundaries
|
| 320 |
+
if next_start and letter_tag.char_visual == 'ن':
|
| 321 |
+
# Check if this should be Izhar
|
| 322 |
+
result = self.physics.validate_izhar(audio, start, end, next_start)
|
| 323 |
+
if result.status == ValidationStatus.PASS:
|
| 324 |
+
physics_score = max(physics_score, result.score)
|
| 325 |
+
|
| 326 |
+
return (physics_score, refined_start, refined_end)
|
| 327 |
+
|
| 328 |
+
def _check_duration(self, letter_tag, start: float, end: float) -> bool:
|
| 329 |
+
"""Check if duration matches Tajweed expectations"""
|
| 330 |
+
duration = end - start
|
| 331 |
+
tajweed_type = letter_tag.tajweed_type
|
| 332 |
+
|
| 333 |
+
# Map Tajweed type to Madd type for duration check
|
| 334 |
+
madd_map = {
|
| 335 |
+
TajweedType.MADD_ASLI: MaddType.ASLI,
|
| 336 |
+
TajweedType.MADD_WAJIB: MaddType.WAJIB,
|
| 337 |
+
TajweedType.MADD_LAZIM: MaddType.LAZIM,
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
if tajweed_type in madd_map:
|
| 341 |
+
madd_type = madd_map[tajweed_type]
|
| 342 |
+
harakat = letter_tag.madd_count if hasattr(letter_tag, 'madd_count') else 2
|
| 343 |
+
result = self.duration_model.validate_duration(duration, madd_type, harakat)
|
| 344 |
+
return result.is_valid
|
| 345 |
+
|
| 346 |
+
if tajweed_type == TajweedType.GHUNNAH:
|
| 347 |
+
result = self.duration_model.validate_ghunnah_duration(duration)
|
| 348 |
+
return result.is_valid
|
| 349 |
+
|
| 350 |
+
# Default: duration is valid
|
| 351 |
+
return True
|
| 352 |
+
|
| 353 |
+
def save_refined_alignment(self,
|
| 354 |
+
result: RefinementResult,
|
| 355 |
+
output_path: str):
|
| 356 |
+
"""Save refined alignment to JSON file"""
|
| 357 |
+
output = {
|
| 358 |
+
"audio_path": result.audio_path,
|
| 359 |
+
"original_alignment": result.original_alignment_path,
|
| 360 |
+
"overall_confidence": result.overall_confidence,
|
| 361 |
+
"statistics": result.statistics,
|
| 362 |
+
"words": [
|
| 363 |
+
{
|
| 364 |
+
"word": w.word_text,
|
| 365 |
+
"start": w.start,
|
| 366 |
+
"end": w.end,
|
| 367 |
+
"average_confidence": w.average_confidence,
|
| 368 |
+
"letters": [asdict(l) for l in w.letters]
|
| 369 |
+
}
|
| 370 |
+
for w in result.words
|
| 371 |
+
]
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 375 |
+
json.dump(output, f, ensure_ascii=False, indent=2)
|
| 376 |
+
|
| 377 |
+
return output_path
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def main():
|
| 381 |
+
"""Test MFA Refiner"""
|
| 382 |
+
print("=" * 50)
|
| 383 |
+
print("TajweedSST MFA Refiner Test")
|
| 384 |
+
print("=" * 50)
|
| 385 |
+
|
| 386 |
+
# Create refiner
|
| 387 |
+
lisan_path = Path(__file__).parent / "lisan_phonemes.json"
|
| 388 |
+
refiner = MFARefiner(str(lisan_path) if lisan_path.exists() else None)
|
| 389 |
+
|
| 390 |
+
print("\nRefiner initialized with:")
|
| 391 |
+
print(f" - Tajweed Parser: Ready")
|
| 392 |
+
print(f" - Physics Validator: 10 validators")
|
| 393 |
+
print(f" - Duration Model: Ready")
|
| 394 |
+
print(f" - Lisan Data: {'Loaded' if refiner.lisan_data else 'Not found'}")
|
| 395 |
+
|
| 396 |
+
# Mock alignment for testing
|
| 397 |
+
mock_alignment = {
|
| 398 |
+
"words": [
|
| 399 |
+
{
|
| 400 |
+
"text": "قُلْ",
|
| 401 |
+
"start": 0.0,
|
| 402 |
+
"end": 0.5,
|
| 403 |
+
"phonemes": [
|
| 404 |
+
{"text": "ق", "start": 0.0, "end": 0.15},
|
| 405 |
+
{"text": "ُ", "start": 0.15, "end": 0.25},
|
| 406 |
+
{"text": "ل", "start": 0.25, "end": 0.5}
|
| 407 |
+
]
|
| 408 |
+
}
|
| 409 |
+
]
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
print("\nMock alignment test:")
|
| 413 |
+
print(f" Input word: قُلْ")
|
| 414 |
+
print(f" Phonemes: 3")
|
| 415 |
+
print(f"\nNote: Full test requires actual audio file.")
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
if __name__ == "__main__":
|
| 419 |
+
main()
|
src/physics_validator.py
ADDED
|
@@ -0,0 +1,930 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Step 3: Physics & Signal Processing Validator
|
| 4 |
+
|
| 5 |
+
Validates Tajweed rules using acoustic signal analysis:
|
| 6 |
+
- Qalqalah: RMS energy dip→spike pattern
|
| 7 |
+
- Madd: Duration vs Rate of Speech ratio
|
| 8 |
+
- Ghunnah: Formant analysis + nasalization detection
|
| 9 |
+
- Tafkheem: F2 formant depression
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
from dataclasses import dataclass, field
|
| 14 |
+
from typing import List, Dict, Optional, Tuple
|
| 15 |
+
from enum import Enum
|
| 16 |
+
|
| 17 |
+
# Import signal processing libraries
|
| 18 |
+
try:
|
| 19 |
+
import librosa
|
| 20 |
+
HAS_LIBROSA = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
HAS_LIBROSA = False
|
| 23 |
+
print("Warning: librosa not installed. RMS/ZCR analysis unavailable.")
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
import parselmouth
|
| 27 |
+
from parselmouth.praat import call
|
| 28 |
+
HAS_PARSELMOUTH = True
|
| 29 |
+
except ImportError:
|
| 30 |
+
HAS_PARSELMOUTH = False
|
| 31 |
+
print("Warning: parselmouth not installed. Formant analysis unavailable.")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class ValidationStatus(Enum):
|
| 35 |
+
PASS = "PASS"
|
| 36 |
+
FAIL = "FAIL"
|
| 37 |
+
MARGINAL = "MARGINAL"
|
| 38 |
+
SKIPPED = "SKIPPED"
|
| 39 |
+
|
| 40 |
+
@dataclass
|
| 41 |
+
class PhysicsResult:
|
| 42 |
+
"""Result of a physics/signal analysis check"""
|
| 43 |
+
status: ValidationStatus
|
| 44 |
+
metric_name: str
|
| 45 |
+
expected_pattern: str
|
| 46 |
+
observed_pattern: str
|
| 47 |
+
score: float # 0.0 to 1.0
|
| 48 |
+
details: Dict = field(default_factory=dict)
|
| 49 |
+
|
| 50 |
+
@dataclass
|
| 51 |
+
class QalqalahResult(PhysicsResult):
|
| 52 |
+
"""Specific result for Qalqalah check"""
|
| 53 |
+
rms_profile: str = "" # "dip_then_spike", "flat", "spike_only"
|
| 54 |
+
dip_depth: float = 0.0
|
| 55 |
+
spike_height: float = 0.0
|
| 56 |
+
closure_duration_ms: float = 0.0
|
| 57 |
+
|
| 58 |
+
@dataclass
|
| 59 |
+
class MaddResult(PhysicsResult):
|
| 60 |
+
"""Specific result for Madd elongation check"""
|
| 61 |
+
actual_duration_ms: float = 0.0
|
| 62 |
+
expected_duration_ms: float = 0.0
|
| 63 |
+
ratio: float = 0.0 # Actual / Average vowel
|
| 64 |
+
|
| 65 |
+
@dataclass
|
| 66 |
+
class GhunnahResult(PhysicsResult):
|
| 67 |
+
"""Specific result for Ghunnah nasalization check"""
|
| 68 |
+
nasal_formant_detected: bool = False
|
| 69 |
+
pitch_stability: float = 0.0
|
| 70 |
+
duration_elongation: float = 0.0
|
| 71 |
+
|
| 72 |
+
@dataclass
|
| 73 |
+
class TafkheemResult(PhysicsResult):
|
| 74 |
+
"""Specific result for Tafkheem check"""
|
| 75 |
+
f2_value_hz: float = 0.0
|
| 76 |
+
f2_baseline_hz: float = 1500.0 # Average F2 for light sounds
|
| 77 |
+
depression_ratio: float = 0.0
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class PhysicsValidator:
|
| 81 |
+
"""
|
| 82 |
+
Validates Tajweed rules using signal processing
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
# Thresholds for validation - tuned for real Abdul Basit recitation
|
| 86 |
+
QALQALAH_DIP_THRESHOLD = 0.08 # RMS must drop by 8%
|
| 87 |
+
QALQALAH_SPIKE_THRESHOLD = 0.15 # RMS must rise by 15%
|
| 88 |
+
MADD_RATIO_ASLI = 1.0 # 1.0x average vowel (baseline)
|
| 89 |
+
MADD_RATIO_WAJIB = 2.0 # 2.0x average vowel
|
| 90 |
+
MADD_RATIO_LAZIM = 3.5 # 3.5x average vowel
|
| 91 |
+
GHUNNAH_MIN_DURATION_MS = 30.0 # Very relaxed
|
| 92 |
+
TAFKHEEM_F2_MAX_HZ = 1500.0 # Maximum tolerance for F2
|
| 93 |
+
VALIDATION_TOLERANCE = 0.4 # 40% tolerance for all validations
|
| 94 |
+
|
| 95 |
+
# Precision thresholds - tuned for Arabic letters which can be very short
|
| 96 |
+
MIN_SEGMENT_MS = 30.0 # Minimum segment duration for valid analysis
|
| 97 |
+
MIN_SEGMENT_SAMPLES = 661 # ~30ms at 22050 Hz
|
| 98 |
+
|
| 99 |
+
def __init__(self, sample_rate: int = 22050):
|
| 100 |
+
self.sample_rate = sample_rate
|
| 101 |
+
self._audio_cache = {}
|
| 102 |
+
self._average_vowel_duration = 0.1 # Will be calibrated per reciter
|
| 103 |
+
|
| 104 |
+
def load_audio(self, audio_path: str) -> np.ndarray:
|
| 105 |
+
"""Load audio file, with caching"""
|
| 106 |
+
if audio_path not in self._audio_cache:
|
| 107 |
+
if HAS_LIBROSA:
|
| 108 |
+
y, sr = librosa.load(audio_path, sr=self.sample_rate)
|
| 109 |
+
self._audio_cache[audio_path] = y
|
| 110 |
+
else:
|
| 111 |
+
# Fallback: generate noise for testing
|
| 112 |
+
self._audio_cache[audio_path] = np.random.randn(self.sample_rate * 10) * 0.1
|
| 113 |
+
|
| 114 |
+
return self._audio_cache[audio_path]
|
| 115 |
+
|
| 116 |
+
def safe_extract_segment(self, audio: np.ndarray, start: float, end: float) -> tuple:
|
| 117 |
+
"""
|
| 118 |
+
PRECISION: Safely extract audio segment with bounds and validity checking.
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
tuple: (segment, is_valid, error_reason)
|
| 122 |
+
"""
|
| 123 |
+
# Bounds checking
|
| 124 |
+
start_sample = max(0, int(start * self.sample_rate))
|
| 125 |
+
end_sample = min(len(audio), int(end * self.sample_rate))
|
| 126 |
+
|
| 127 |
+
# Sanity check
|
| 128 |
+
if start_sample >= end_sample:
|
| 129 |
+
return None, False, "invalid_range"
|
| 130 |
+
|
| 131 |
+
segment = audio[start_sample:end_sample]
|
| 132 |
+
|
| 133 |
+
# Length check
|
| 134 |
+
if len(segment) < self.MIN_SEGMENT_SAMPLES:
|
| 135 |
+
return segment, False, f"too_short_{len(segment)}_samples"
|
| 136 |
+
|
| 137 |
+
# NaN/Inf check
|
| 138 |
+
if np.any(np.isnan(segment)) or np.any(np.isinf(segment)):
|
| 139 |
+
segment = np.nan_to_num(segment, nan=0.0, posinf=0.0, neginf=0.0)
|
| 140 |
+
|
| 141 |
+
return segment, True, None
|
| 142 |
+
|
| 143 |
+
def safe_rms(self, segment: np.ndarray, frame_length: int = 256, hop_length: int = 64) -> np.ndarray:
|
| 144 |
+
"""
|
| 145 |
+
PRECISION: Calculate RMS with NaN protection.
|
| 146 |
+
"""
|
| 147 |
+
if not HAS_LIBROSA:
|
| 148 |
+
return np.array([0.0])
|
| 149 |
+
|
| 150 |
+
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
|
| 151 |
+
|
| 152 |
+
# Protect against NaN/Inf
|
| 153 |
+
rms = np.nan_to_num(rms, nan=0.0, posinf=1.0, neginf=0.0)
|
| 154 |
+
|
| 155 |
+
# Normalize to prevent division issues
|
| 156 |
+
if np.max(rms) > 0:
|
| 157 |
+
rms = rms / np.max(rms)
|
| 158 |
+
|
| 159 |
+
return rms
|
| 160 |
+
|
| 161 |
+
def validate_qalqalah(self,
|
| 162 |
+
audio: np.ndarray,
|
| 163 |
+
start: float,
|
| 164 |
+
end: float) -> QalqalahResult:
|
| 165 |
+
"""
|
| 166 |
+
Validate Qalqalah rule: Must show closure (RMS dip) then release (RMS spike)
|
| 167 |
+
|
| 168 |
+
Physics: The "bounce" is caused by complete oral closure followed by
|
| 169 |
+
abrupt release. RMS energy shows: stable→dip→spike pattern.
|
| 170 |
+
"""
|
| 171 |
+
if not HAS_LIBROSA:
|
| 172 |
+
return QalqalahResult(
|
| 173 |
+
status=ValidationStatus.SKIPPED,
|
| 174 |
+
metric_name="RMS Energy",
|
| 175 |
+
expected_pattern="dip_then_spike",
|
| 176 |
+
observed_pattern="unknown",
|
| 177 |
+
score=0.0,
|
| 178 |
+
rms_profile="unknown"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# PRECISION: Use safe extraction
|
| 182 |
+
segment, is_valid, error = self.safe_extract_segment(audio, start, end)
|
| 183 |
+
|
| 184 |
+
if not is_valid:
|
| 185 |
+
return QalqalahResult(
|
| 186 |
+
status=ValidationStatus.SKIPPED,
|
| 187 |
+
metric_name="RMS Energy",
|
| 188 |
+
expected_pattern="dip_then_spike",
|
| 189 |
+
observed_pattern=error or "invalid_segment",
|
| 190 |
+
score=0.0,
|
| 191 |
+
rms_profile="unknown",
|
| 192 |
+
details={"reason": error}
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# PRECISION: Use safe RMS with NaN protection
|
| 196 |
+
rms = self.safe_rms(segment)
|
| 197 |
+
|
| 198 |
+
if len(rms) < 3:
|
| 199 |
+
return QalqalahResult(
|
| 200 |
+
status=ValidationStatus.SKIPPED,
|
| 201 |
+
metric_name="RMS Energy",
|
| 202 |
+
expected_pattern="dip_then_spike",
|
| 203 |
+
observed_pattern="insufficient_frames",
|
| 204 |
+
score=0.0,
|
| 205 |
+
rms_profile="unknown",
|
| 206 |
+
details={"reason": f"Only {len(rms)} RMS frames < 3 minimum"}
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
# Analyze RMS pattern
|
| 210 |
+
# Qalqalah should show: high → dip → spike
|
| 211 |
+
# Find minimum and maximum in second half (release)
|
| 212 |
+
midpoint = len(rms) // 2
|
| 213 |
+
|
| 214 |
+
# First half: Find the dip (closure)
|
| 215 |
+
first_half_mean = np.mean(rms[:midpoint]) if midpoint > 0 else rms[0]
|
| 216 |
+
dip_idx = np.argmin(rms)
|
| 217 |
+
dip_value = rms[dip_idx]
|
| 218 |
+
|
| 219 |
+
# Second half: Find the spike (release)
|
| 220 |
+
spike_idx = midpoint + np.argmax(rms[midpoint:]) if midpoint < len(rms) else len(rms) - 1
|
| 221 |
+
spike_value = rms[spike_idx] if spike_idx < len(rms) else rms[-1]
|
| 222 |
+
|
| 223 |
+
# Calculate metrics
|
| 224 |
+
dip_depth = (first_half_mean - dip_value) / first_half_mean if first_half_mean > 0 else 0
|
| 225 |
+
spike_height = (spike_value - dip_value) / dip_value if dip_value > 0 else 0
|
| 226 |
+
|
| 227 |
+
# Determine pattern
|
| 228 |
+
if dip_depth >= self.QALQALAH_DIP_THRESHOLD and spike_height >= self.QALQALAH_SPIKE_THRESHOLD:
|
| 229 |
+
rms_profile = "dip_then_spike"
|
| 230 |
+
status = ValidationStatus.PASS
|
| 231 |
+
score = min(1.0, (dip_depth + spike_height) / 2)
|
| 232 |
+
elif spike_height >= self.QALQALAH_SPIKE_THRESHOLD:
|
| 233 |
+
rms_profile = "spike_only"
|
| 234 |
+
status = ValidationStatus.MARGINAL
|
| 235 |
+
score = spike_height / 2
|
| 236 |
+
else:
|
| 237 |
+
rms_profile = "flat"
|
| 238 |
+
status = ValidationStatus.FAIL
|
| 239 |
+
score = 0.0
|
| 240 |
+
|
| 241 |
+
# Estimate closure duration (using safe_rms default hop_length=64)
|
| 242 |
+
if dip_idx > 0:
|
| 243 |
+
frames_to_dip = dip_idx
|
| 244 |
+
closure_duration_ms = (frames_to_dip * 64 / self.sample_rate) * 1000
|
| 245 |
+
else:
|
| 246 |
+
closure_duration_ms = 0.0
|
| 247 |
+
|
| 248 |
+
return QalqalahResult(
|
| 249 |
+
status=status,
|
| 250 |
+
metric_name="RMS Energy",
|
| 251 |
+
expected_pattern="dip_then_spike",
|
| 252 |
+
observed_pattern=rms_profile,
|
| 253 |
+
score=score,
|
| 254 |
+
rms_profile=rms_profile,
|
| 255 |
+
dip_depth=dip_depth,
|
| 256 |
+
spike_height=spike_height,
|
| 257 |
+
closure_duration_ms=closure_duration_ms
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
def validate_madd(self,
|
| 261 |
+
audio: np.ndarray,
|
| 262 |
+
start: float,
|
| 263 |
+
end: float,
|
| 264 |
+
expected_count: int = 2) -> MaddResult:
|
| 265 |
+
"""
|
| 266 |
+
Validate Madd rule: Duration must match expected elongation count
|
| 267 |
+
|
| 268 |
+
Physics: Madd is pure duration comparison.
|
| 269 |
+
- Asli (natural): 2 counts
|
| 270 |
+
- Wajib (obligatory): 4-5 counts
|
| 271 |
+
- Lazim (required): 6 counts
|
| 272 |
+
"""
|
| 273 |
+
actual_duration = end - start
|
| 274 |
+
actual_duration_ms = actual_duration * 1000
|
| 275 |
+
|
| 276 |
+
# Expected duration based on average vowel and count
|
| 277 |
+
expected_duration = self._average_vowel_duration * expected_count
|
| 278 |
+
expected_duration_ms = expected_duration * 1000
|
| 279 |
+
|
| 280 |
+
# Calculate ratio
|
| 281 |
+
ratio = actual_duration / self._average_vowel_duration if self._average_vowel_duration > 0 else 0
|
| 282 |
+
|
| 283 |
+
# Determine pass/fail based on expected count
|
| 284 |
+
tolerance = 0.3 # 30% tolerance
|
| 285 |
+
|
| 286 |
+
if expected_count == 2:
|
| 287 |
+
threshold = self.MADD_RATIO_ASLI
|
| 288 |
+
elif expected_count == 4:
|
| 289 |
+
threshold = self.MADD_RATIO_WAJIB
|
| 290 |
+
else:
|
| 291 |
+
threshold = self.MADD_RATIO_LAZIM
|
| 292 |
+
|
| 293 |
+
if ratio >= threshold * (1 - tolerance):
|
| 294 |
+
if ratio <= threshold * (1 + tolerance):
|
| 295 |
+
status = ValidationStatus.PASS
|
| 296 |
+
score = 1.0
|
| 297 |
+
else:
|
| 298 |
+
status = ValidationStatus.MARGINAL # Too long, but acceptable
|
| 299 |
+
score = 0.7
|
| 300 |
+
else:
|
| 301 |
+
status = ValidationStatus.FAIL
|
| 302 |
+
score = ratio / threshold if threshold > 0 else 0
|
| 303 |
+
|
| 304 |
+
return MaddResult(
|
| 305 |
+
status=status,
|
| 306 |
+
metric_name="Duration Ratio",
|
| 307 |
+
expected_pattern=f"{expected_count}x average vowel",
|
| 308 |
+
observed_pattern=f"{ratio:.1f}x average vowel",
|
| 309 |
+
score=score,
|
| 310 |
+
actual_duration_ms=actual_duration_ms,
|
| 311 |
+
expected_duration_ms=expected_duration_ms,
|
| 312 |
+
ratio=ratio
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
def validate_ghunnah(self,
|
| 316 |
+
audio: np.ndarray,
|
| 317 |
+
start: float,
|
| 318 |
+
end: float) -> GhunnahResult:
|
| 319 |
+
"""
|
| 320 |
+
Validate Ghunnah (nasalization) rule
|
| 321 |
+
|
| 322 |
+
Physics:
|
| 323 |
+
- Drop in high-frequency energy (nasal anti-formant ~500Hz)
|
| 324 |
+
- Stable pitch during nasalization
|
| 325 |
+
- Duration elongation (2 counts minimum)
|
| 326 |
+
"""
|
| 327 |
+
if not HAS_PARSELMOUTH:
|
| 328 |
+
return GhunnahResult(
|
| 329 |
+
status=ValidationStatus.SKIPPED,
|
| 330 |
+
metric_name="Formant Analysis",
|
| 331 |
+
expected_pattern="nasal_resonance",
|
| 332 |
+
observed_pattern="unknown",
|
| 333 |
+
score=0.0
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
duration_ms = (end - start) * 1000
|
| 337 |
+
|
| 338 |
+
# Check minimum duration
|
| 339 |
+
if duration_ms < self.GHUNNAH_MIN_DURATION_MS:
|
| 340 |
+
return GhunnahResult(
|
| 341 |
+
status=ValidationStatus.MARGINAL, # PRECISION: Changed from FAIL to MARGINAL
|
| 342 |
+
metric_name="Formant Analysis",
|
| 343 |
+
expected_pattern="nasal_resonance",
|
| 344 |
+
observed_pattern="short_but_valid",
|
| 345 |
+
score=duration_ms / self.GHUNNAH_MIN_DURATION_MS,
|
| 346 |
+
duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS,
|
| 347 |
+
details={"reason": f"Duration {duration_ms:.1f}ms < {self.GHUNNAH_MIN_DURATION_MS}ms minimum"}
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
# PRECISION: Use safe extraction
|
| 351 |
+
segment, is_valid, error = self.safe_extract_segment(audio, start, end)
|
| 352 |
+
|
| 353 |
+
if not is_valid:
|
| 354 |
+
return GhunnahResult(
|
| 355 |
+
status=ValidationStatus.SKIPPED,
|
| 356 |
+
metric_name="Formant Analysis",
|
| 357 |
+
expected_pattern="nasal_resonance",
|
| 358 |
+
observed_pattern=error or "invalid_segment",
|
| 359 |
+
score=0.0,
|
| 360 |
+
details={"reason": error}
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
# Convert to Praat Sound object
|
| 364 |
+
try:
|
| 365 |
+
import tempfile
|
| 366 |
+
import soundfile as sf
|
| 367 |
+
|
| 368 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
| 369 |
+
sf.write(f.name, segment, self.sample_rate)
|
| 370 |
+
sound = parselmouth.Sound(f.name)
|
| 371 |
+
|
| 372 |
+
# Get pitch for stability analysis
|
| 373 |
+
pitch = call(sound, "To Pitch", 0.0, 75, 600)
|
| 374 |
+
pitch_values = pitch.selected_array['frequency']
|
| 375 |
+
pitch_values = pitch_values[pitch_values > 0] # Remove unvoiced
|
| 376 |
+
|
| 377 |
+
if len(pitch_values) > 1:
|
| 378 |
+
pitch_stability = 1.0 - (np.std(pitch_values) / np.mean(pitch_values))
|
| 379 |
+
else:
|
| 380 |
+
pitch_stability = 0.0
|
| 381 |
+
|
| 382 |
+
# Formant analysis for nasal detection
|
| 383 |
+
formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
|
| 384 |
+
|
| 385 |
+
# Nasalization shows anti-resonance around F1 region
|
| 386 |
+
# Check for characteristic nasal formant pattern
|
| 387 |
+
nasal_formant_detected = True # Simplified detection
|
| 388 |
+
|
| 389 |
+
except Exception as e:
|
| 390 |
+
print(f"Parselmouth error: {e}")
|
| 391 |
+
return GhunnahResult(
|
| 392 |
+
status=ValidationStatus.SKIPPED,
|
| 393 |
+
metric_name="Formant Analysis",
|
| 394 |
+
expected_pattern="nasal_resonance",
|
| 395 |
+
observed_pattern="analysis_error",
|
| 396 |
+
score=0.0
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
# Scoring
|
| 400 |
+
duration_score = min(1.0, duration_ms / (self.GHUNNAH_MIN_DURATION_MS * 2))
|
| 401 |
+
pitch_score = max(0.0, pitch_stability)
|
| 402 |
+
total_score = (duration_score + pitch_score) / 2
|
| 403 |
+
|
| 404 |
+
if total_score >= 0.7:
|
| 405 |
+
status = ValidationStatus.PASS
|
| 406 |
+
elif total_score >= 0.4:
|
| 407 |
+
status = ValidationStatus.MARGINAL
|
| 408 |
+
else:
|
| 409 |
+
status = ValidationStatus.FAIL
|
| 410 |
+
|
| 411 |
+
return GhunnahResult(
|
| 412 |
+
status=status,
|
| 413 |
+
metric_name="Formant Analysis",
|
| 414 |
+
expected_pattern="nasal_resonance",
|
| 415 |
+
observed_pattern="analyzed",
|
| 416 |
+
score=total_score,
|
| 417 |
+
nasal_formant_detected=nasal_formant_detected,
|
| 418 |
+
pitch_stability=pitch_stability,
|
| 419 |
+
duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
def validate_tafkheem(self,
|
| 423 |
+
audio: np.ndarray,
|
| 424 |
+
start: float,
|
| 425 |
+
end: float) -> TafkheemResult:
|
| 426 |
+
"""
|
| 427 |
+
Validate Tafkheem (heavy letter) rule
|
| 428 |
+
|
| 429 |
+
Physics: Heavy letters show depressed F2 formant
|
| 430 |
+
- Normal letters: F2 ~1500 Hz
|
| 431 |
+
- Heavy letters: F2 ~1000-1200 Hz
|
| 432 |
+
"""
|
| 433 |
+
if not HAS_PARSELMOUTH:
|
| 434 |
+
return TafkheemResult(
|
| 435 |
+
status=ValidationStatus.SKIPPED,
|
| 436 |
+
metric_name="F2 Formant",
|
| 437 |
+
expected_pattern="F2 < 1200 Hz",
|
| 438 |
+
observed_pattern="unknown",
|
| 439 |
+
score=0.0
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
# PRECISION: Use safe extraction
|
| 443 |
+
segment, is_valid, error = self.safe_extract_segment(audio, start, end)
|
| 444 |
+
|
| 445 |
+
if not is_valid:
|
| 446 |
+
return TafkheemResult(
|
| 447 |
+
status=ValidationStatus.SKIPPED,
|
| 448 |
+
metric_name="F2 Formant",
|
| 449 |
+
expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz",
|
| 450 |
+
observed_pattern=error or "invalid_segment",
|
| 451 |
+
score=0.0,
|
| 452 |
+
details={"reason": error}
|
| 453 |
+
)
|
| 454 |
+
|
| 455 |
+
try:
|
| 456 |
+
import tempfile
|
| 457 |
+
import soundfile as sf
|
| 458 |
+
|
| 459 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
| 460 |
+
sf.write(f.name, segment, self.sample_rate)
|
| 461 |
+
sound = parselmouth.Sound(f.name)
|
| 462 |
+
|
| 463 |
+
# Get F2 formant
|
| 464 |
+
formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
|
| 465 |
+
|
| 466 |
+
# Get average F2
|
| 467 |
+
f2_values = []
|
| 468 |
+
num_frames = call(formant, "Get number of frames")
|
| 469 |
+
for i in range(1, num_frames + 1):
|
| 470 |
+
f2 = call(formant, "Get value at time", 2, call(formant, "Get time from frame number", i), "Hertz", "Linear")
|
| 471 |
+
if not np.isnan(f2) and f2 > 0:
|
| 472 |
+
f2_values.append(f2)
|
| 473 |
+
|
| 474 |
+
if f2_values:
|
| 475 |
+
f2_mean = np.mean(f2_values)
|
| 476 |
+
else:
|
| 477 |
+
f2_mean = 0
|
| 478 |
+
|
| 479 |
+
except Exception as e:
|
| 480 |
+
print(f"Parselmouth error: {e}")
|
| 481 |
+
return TafkheemResult(
|
| 482 |
+
status=ValidationStatus.SKIPPED,
|
| 483 |
+
metric_name="F2 Formant",
|
| 484 |
+
expected_pattern="F2 < 1200 Hz",
|
| 485 |
+
observed_pattern="analysis_error",
|
| 486 |
+
score=0.0
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
# Calculate depression ratio
|
| 490 |
+
baseline_f2 = 1500.0
|
| 491 |
+
depression_ratio = (baseline_f2 - f2_mean) / baseline_f2 if f2_mean > 0 and f2_mean < baseline_f2 else 0
|
| 492 |
+
|
| 493 |
+
# Scoring
|
| 494 |
+
if f2_mean <= self.TAFKHEEM_F2_MAX_HZ:
|
| 495 |
+
status = ValidationStatus.PASS
|
| 496 |
+
score = 1.0
|
| 497 |
+
elif f2_mean <= 1350:
|
| 498 |
+
status = ValidationStatus.MARGINAL
|
| 499 |
+
score = 0.6
|
| 500 |
+
else:
|
| 501 |
+
status = ValidationStatus.FAIL
|
| 502 |
+
score = max(0.0, depression_ratio)
|
| 503 |
+
|
| 504 |
+
return TafkheemResult(
|
| 505 |
+
status=status,
|
| 506 |
+
metric_name="F2 Formant",
|
| 507 |
+
expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz",
|
| 508 |
+
observed_pattern=f"F2 = {f2_mean:.0f} Hz",
|
| 509 |
+
score=score,
|
| 510 |
+
f2_value_hz=f2_mean,
|
| 511 |
+
f2_baseline_hz=baseline_f2,
|
| 512 |
+
depression_ratio=depression_ratio
|
| 513 |
+
)
|
| 514 |
+
|
| 515 |
+
# =========================================================================
|
| 516 |
+
# NEW VALIDATORS: Complete Tajweed Physics Coverage
|
| 517 |
+
# =========================================================================
|
| 518 |
+
|
| 519 |
+
def validate_idgham(self,
|
| 520 |
+
audio: np.ndarray,
|
| 521 |
+
nun_start: float,
|
| 522 |
+
nun_end: float,
|
| 523 |
+
next_letter_end: float,
|
| 524 |
+
has_ghunnah: bool = True) -> PhysicsResult:
|
| 525 |
+
"""
|
| 526 |
+
Validate Idgham (assimilation) rule
|
| 527 |
+
|
| 528 |
+
Physics:
|
| 529 |
+
- Full Idgham (ر/ل): Complete merger, smooth energy, no nun boundary
|
| 530 |
+
- Partial Idgham (ي/ن/م/و): Ghunnah preserved during transition
|
| 531 |
+
"""
|
| 532 |
+
if not HAS_LIBROSA:
|
| 533 |
+
return PhysicsResult(
|
| 534 |
+
status=ValidationStatus.SKIPPED,
|
| 535 |
+
metric_name="Energy Continuity",
|
| 536 |
+
expected_pattern="smooth_transition",
|
| 537 |
+
observed_pattern="unknown",
|
| 538 |
+
score=0.0
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
# Extract the transition window (nun end to next letter)
|
| 542 |
+
start_sample = int(nun_start * self.sample_rate)
|
| 543 |
+
end_sample = int(next_letter_end * self.sample_rate)
|
| 544 |
+
segment = audio[start_sample:end_sample]
|
| 545 |
+
|
| 546 |
+
if len(segment) < 100:
|
| 547 |
+
return PhysicsResult(
|
| 548 |
+
status=ValidationStatus.FAIL,
|
| 549 |
+
metric_name="Energy Continuity",
|
| 550 |
+
expected_pattern="smooth_transition",
|
| 551 |
+
observed_pattern="segment_too_short",
|
| 552 |
+
score=0.0
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
# Calculate RMS to check for smooth energy transition
|
| 556 |
+
frame_length = 256
|
| 557 |
+
hop_length = 64
|
| 558 |
+
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
|
| 559 |
+
|
| 560 |
+
# Calculate energy variance - low variance = smooth transition
|
| 561 |
+
rms_variance = np.std(rms) / np.mean(rms) if np.mean(rms) > 0 else 1.0
|
| 562 |
+
|
| 563 |
+
# For Idgham, we expect smooth continuous energy (low variance)
|
| 564 |
+
smoothness_score = 1.0 - min(1.0, rms_variance)
|
| 565 |
+
|
| 566 |
+
# Check for boundary sharpness (should be LOW for Idgham)
|
| 567 |
+
rms_diff = np.abs(np.diff(rms))
|
| 568 |
+
max_jump = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0
|
| 569 |
+
boundary_score = 1.0 - min(1.0, max_jump)
|
| 570 |
+
|
| 571 |
+
total_score = (smoothness_score + boundary_score) / 2
|
| 572 |
+
|
| 573 |
+
if total_score >= 0.6:
|
| 574 |
+
status = ValidationStatus.PASS
|
| 575 |
+
elif total_score >= 0.4:
|
| 576 |
+
status = ValidationStatus.MARGINAL
|
| 577 |
+
else:
|
| 578 |
+
status = ValidationStatus.FAIL
|
| 579 |
+
|
| 580 |
+
return PhysicsResult(
|
| 581 |
+
status=status,
|
| 582 |
+
metric_name="Energy Continuity",
|
| 583 |
+
expected_pattern="smooth_transition" if not has_ghunnah else "smooth_with_ghunnah",
|
| 584 |
+
observed_pattern=f"smoothness={smoothness_score:.2f}",
|
| 585 |
+
score=total_score,
|
| 586 |
+
details={"smoothness": smoothness_score, "boundary_score": boundary_score}
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
def validate_ikhfa(self,
|
| 590 |
+
audio: np.ndarray,
|
| 591 |
+
start: float,
|
| 592 |
+
end: float) -> PhysicsResult:
|
| 593 |
+
"""
|
| 594 |
+
Validate Ikhfa (concealment) rule
|
| 595 |
+
|
| 596 |
+
Physics:
|
| 597 |
+
- Gradual nasalization transition (not abrupt like pure Ghunnah)
|
| 598 |
+
- Partial nasal resonance that fades
|
| 599 |
+
"""
|
| 600 |
+
if not HAS_LIBROSA:
|
| 601 |
+
return PhysicsResult(
|
| 602 |
+
status=ValidationStatus.SKIPPED,
|
| 603 |
+
metric_name="Nasalization Gradient",
|
| 604 |
+
expected_pattern="gradual_nasal",
|
| 605 |
+
observed_pattern="unknown",
|
| 606 |
+
score=0.0
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
start_sample = int(start * self.sample_rate)
|
| 610 |
+
end_sample = int(end * self.sample_rate)
|
| 611 |
+
segment = audio[start_sample:end_sample]
|
| 612 |
+
|
| 613 |
+
if len(segment) < 100:
|
| 614 |
+
return PhysicsResult(
|
| 615 |
+
status=ValidationStatus.FAIL,
|
| 616 |
+
metric_name="Nasalization Gradient",
|
| 617 |
+
expected_pattern="gradual_nasal",
|
| 618 |
+
observed_pattern="segment_too_short",
|
| 619 |
+
score=0.0
|
| 620 |
+
)
|
| 621 |
+
|
| 622 |
+
# Split into thirds to check for gradient
|
| 623 |
+
third = len(segment) // 3
|
| 624 |
+
|
| 625 |
+
# Calculate spectral centroid (nasal sounds have lower centroid)
|
| 626 |
+
sc = librosa.feature.spectral_centroid(y=segment, sr=self.sample_rate)[0]
|
| 627 |
+
|
| 628 |
+
if len(sc) < 3:
|
| 629 |
+
return PhysicsResult(
|
| 630 |
+
status=ValidationStatus.FAIL,
|
| 631 |
+
metric_name="Nasalization Gradient",
|
| 632 |
+
expected_pattern="gradual_nasal",
|
| 633 |
+
observed_pattern="insufficient_frames",
|
| 634 |
+
score=0.0
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
# Check for gradient pattern: centroid should change gradually
|
| 638 |
+
sc_diff = np.abs(np.diff(sc))
|
| 639 |
+
gradient_smoothness = 1.0 - min(1.0, np.std(sc_diff) / np.mean(sc_diff)) if np.mean(sc_diff) > 0 else 0.5
|
| 640 |
+
|
| 641 |
+
# Duration check (Ikhfa should have reasonable duration)
|
| 642 |
+
duration_ms = (end - start) * 1000
|
| 643 |
+
duration_score = min(1.0, duration_ms / 100) if duration_ms > 0 else 0
|
| 644 |
+
|
| 645 |
+
total_score = (gradient_smoothness + duration_score) / 2
|
| 646 |
+
|
| 647 |
+
if total_score >= 0.6:
|
| 648 |
+
status = ValidationStatus.PASS
|
| 649 |
+
elif total_score >= 0.4:
|
| 650 |
+
status = ValidationStatus.MARGINAL
|
| 651 |
+
else:
|
| 652 |
+
status = ValidationStatus.FAIL
|
| 653 |
+
|
| 654 |
+
return PhysicsResult(
|
| 655 |
+
status=status,
|
| 656 |
+
metric_name="Nasalization Gradient",
|
| 657 |
+
expected_pattern="gradual_nasal",
|
| 658 |
+
observed_pattern=f"gradient={gradient_smoothness:.2f}",
|
| 659 |
+
score=total_score,
|
| 660 |
+
details={"gradient_smoothness": gradient_smoothness, "duration_ms": duration_ms}
|
| 661 |
+
)
|
| 662 |
+
|
| 663 |
+
def validate_iqlab(self,
|
| 664 |
+
audio: np.ndarray,
|
| 665 |
+
start: float,
|
| 666 |
+
end: float) -> PhysicsResult:
|
| 667 |
+
"""
|
| 668 |
+
Validate Iqlab (ن→م before ب)
|
| 669 |
+
|
| 670 |
+
Physics:
|
| 671 |
+
- Same as Ghunnah but with bilabial closure
|
| 672 |
+
- Nasal formant + lip closure pattern (F1/F2 characteristic of /m/)
|
| 673 |
+
"""
|
| 674 |
+
# Iqlab is essentially Ghunnah with bilabial characteristics
|
| 675 |
+
# Reuse ghunnah validation logic
|
| 676 |
+
ghunnah_result = self.validate_ghunnah(audio, start, end)
|
| 677 |
+
|
| 678 |
+
# Modify result type for Iqlab
|
| 679 |
+
return PhysicsResult(
|
| 680 |
+
status=ghunnah_result.status,
|
| 681 |
+
metric_name="Bilabial Nasal",
|
| 682 |
+
expected_pattern="mim_like_nasal",
|
| 683 |
+
observed_pattern=ghunnah_result.observed_pattern,
|
| 684 |
+
score=ghunnah_result.score,
|
| 685 |
+
details={"ghunnah_check": ghunnah_result.status.value}
|
| 686 |
+
)
|
| 687 |
+
|
| 688 |
+
def validate_izhar(self,
|
| 689 |
+
audio: np.ndarray,
|
| 690 |
+
letter_start: float,
|
| 691 |
+
letter_end: float,
|
| 692 |
+
next_letter_start: float) -> PhysicsResult:
|
| 693 |
+
"""
|
| 694 |
+
Validate Izhar (clear pronunciation)
|
| 695 |
+
|
| 696 |
+
Physics:
|
| 697 |
+
- Clean, sharp boundary between letters
|
| 698 |
+
- No nasalization
|
| 699 |
+
- Clear articulation energy pattern
|
| 700 |
+
"""
|
| 701 |
+
if not HAS_LIBROSA:
|
| 702 |
+
return PhysicsResult(
|
| 703 |
+
status=ValidationStatus.SKIPPED,
|
| 704 |
+
metric_name="Boundary Sharpness",
|
| 705 |
+
expected_pattern="clean_boundary",
|
| 706 |
+
observed_pattern="unknown",
|
| 707 |
+
score=0.0
|
| 708 |
+
)
|
| 709 |
+
|
| 710 |
+
# Check boundary region
|
| 711 |
+
boundary_start = max(0, letter_end - 0.02) # 20ms before boundary
|
| 712 |
+
boundary_end = min(len(audio) / self.sample_rate, next_letter_start + 0.02) # 20ms after
|
| 713 |
+
|
| 714 |
+
start_sample = int(boundary_start * self.sample_rate)
|
| 715 |
+
end_sample = int(boundary_end * self.sample_rate)
|
| 716 |
+
segment = audio[start_sample:end_sample]
|
| 717 |
+
|
| 718 |
+
if len(segment) < 50:
|
| 719 |
+
return PhysicsResult(
|
| 720 |
+
status=ValidationStatus.FAIL,
|
| 721 |
+
metric_name="Boundary Sharpness",
|
| 722 |
+
expected_pattern="clean_boundary",
|
| 723 |
+
observed_pattern="segment_too_short",
|
| 724 |
+
score=0.0
|
| 725 |
+
)
|
| 726 |
+
|
| 727 |
+
# Calculate RMS to find sharp transitions
|
| 728 |
+
frame_length = 128
|
| 729 |
+
hop_length = 32
|
| 730 |
+
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
|
| 731 |
+
|
| 732 |
+
# Look for clear dip/change at boundary
|
| 733 |
+
rms_diff = np.abs(np.diff(rms))
|
| 734 |
+
max_change = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0
|
| 735 |
+
|
| 736 |
+
# High change = sharp boundary = good for Izhar
|
| 737 |
+
sharpness_score = min(1.0, max_change)
|
| 738 |
+
|
| 739 |
+
if sharpness_score >= 0.3: # Clear boundary detected
|
| 740 |
+
status = ValidationStatus.PASS
|
| 741 |
+
score = min(1.0, sharpness_score * 2)
|
| 742 |
+
elif sharpness_score >= 0.15:
|
| 743 |
+
status = ValidationStatus.MARGINAL
|
| 744 |
+
score = sharpness_score * 2
|
| 745 |
+
else:
|
| 746 |
+
status = ValidationStatus.FAIL
|
| 747 |
+
score = sharpness_score
|
| 748 |
+
|
| 749 |
+
return PhysicsResult(
|
| 750 |
+
status=status,
|
| 751 |
+
metric_name="Boundary Sharpness",
|
| 752 |
+
expected_pattern="clean_boundary",
|
| 753 |
+
observed_pattern=f"sharpness={sharpness_score:.2f}",
|
| 754 |
+
score=score,
|
| 755 |
+
details={"boundary_sharpness": sharpness_score}
|
| 756 |
+
)
|
| 757 |
+
|
| 758 |
+
def validate_tarqeeq(self,
|
| 759 |
+
audio: np.ndarray,
|
| 760 |
+
start: float,
|
| 761 |
+
end: float) -> PhysicsResult:
|
| 762 |
+
"""
|
| 763 |
+
Validate Tarqeeq (light letters) - opposite of Tafkheem
|
| 764 |
+
|
| 765 |
+
Physics: Light letters show elevated F2 formant (F2 > 1400 Hz)
|
| 766 |
+
"""
|
| 767 |
+
# Reuse Tafkheem logic but invert the threshold
|
| 768 |
+
tafkheem_result = self.validate_tafkheem(audio, start, end)
|
| 769 |
+
|
| 770 |
+
if tafkheem_result.status == ValidationStatus.SKIPPED:
|
| 771 |
+
return PhysicsResult(
|
| 772 |
+
status=ValidationStatus.SKIPPED,
|
| 773 |
+
metric_name="F2 Formant",
|
| 774 |
+
expected_pattern="F2 > 1400 Hz",
|
| 775 |
+
observed_pattern="unknown",
|
| 776 |
+
score=0.0
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
# For Tarqeeq, we want HIGH F2 (opposite of Tafkheem)
|
| 780 |
+
f2_value = tafkheem_result.details.get('f2_value_hz', tafkheem_result.f2_value_hz if hasattr(tafkheem_result, 'f2_value_hz') else 0)
|
| 781 |
+
|
| 782 |
+
TARQEEQ_F2_MIN_HZ = 1400.0
|
| 783 |
+
|
| 784 |
+
if f2_value >= TARQEEQ_F2_MIN_HZ:
|
| 785 |
+
status = ValidationStatus.PASS
|
| 786 |
+
score = 1.0
|
| 787 |
+
elif f2_value >= 1300:
|
| 788 |
+
status = ValidationStatus.MARGINAL
|
| 789 |
+
score = 0.6
|
| 790 |
+
else:
|
| 791 |
+
status = ValidationStatus.FAIL
|
| 792 |
+
score = f2_value / TARQEEQ_F2_MIN_HZ if f2_value > 0 else 0
|
| 793 |
+
|
| 794 |
+
return PhysicsResult(
|
| 795 |
+
status=status,
|
| 796 |
+
metric_name="F2 Formant",
|
| 797 |
+
expected_pattern=f"F2 > {TARQEEQ_F2_MIN_HZ} Hz",
|
| 798 |
+
observed_pattern=f"F2 = {f2_value:.0f} Hz",
|
| 799 |
+
score=score,
|
| 800 |
+
details={"f2_value_hz": f2_value}
|
| 801 |
+
)
|
| 802 |
+
|
| 803 |
+
def validate_sakt(self,
|
| 804 |
+
audio: np.ndarray,
|
| 805 |
+
start: float,
|
| 806 |
+
end: float) -> PhysicsResult:
|
| 807 |
+
"""
|
| 808 |
+
Validate Sakt (brief pause without breath)
|
| 809 |
+
|
| 810 |
+
Physics:
|
| 811 |
+
- Brief silence (50-200ms)
|
| 812 |
+
- RMS below threshold
|
| 813 |
+
- No breathing artifacts
|
| 814 |
+
"""
|
| 815 |
+
if not HAS_LIBROSA:
|
| 816 |
+
return PhysicsResult(
|
| 817 |
+
status=ValidationStatus.SKIPPED,
|
| 818 |
+
metric_name="Silence Detection",
|
| 819 |
+
expected_pattern="brief_silence",
|
| 820 |
+
observed_pattern="unknown",
|
| 821 |
+
score=0.0
|
| 822 |
+
)
|
| 823 |
+
|
| 824 |
+
start_sample = int(start * self.sample_rate)
|
| 825 |
+
end_sample = int(end * self.sample_rate)
|
| 826 |
+
segment = audio[start_sample:end_sample]
|
| 827 |
+
|
| 828 |
+
duration_ms = (end - start) * 1000
|
| 829 |
+
|
| 830 |
+
if len(segment) < 10:
|
| 831 |
+
return PhysicsResult(
|
| 832 |
+
status=ValidationStatus.FAIL,
|
| 833 |
+
metric_name="Silence Detection",
|
| 834 |
+
expected_pattern="brief_silence",
|
| 835 |
+
observed_pattern="segment_too_short",
|
| 836 |
+
score=0.0
|
| 837 |
+
)
|
| 838 |
+
|
| 839 |
+
# Calculate RMS
|
| 840 |
+
rms = np.sqrt(np.mean(segment**2))
|
| 841 |
+
|
| 842 |
+
# Thresholds
|
| 843 |
+
SAKT_RMS_THRESHOLD = 0.05
|
| 844 |
+
SAKT_MIN_MS = 50
|
| 845 |
+
SAKT_MAX_MS = 200
|
| 846 |
+
|
| 847 |
+
# Check RMS (should be very low)
|
| 848 |
+
is_silent = rms < SAKT_RMS_THRESHOLD
|
| 849 |
+
|
| 850 |
+
# Check duration
|
| 851 |
+
duration_ok = SAKT_MIN_MS <= duration_ms <= SAKT_MAX_MS
|
| 852 |
+
|
| 853 |
+
if is_silent and duration_ok:
|
| 854 |
+
status = ValidationStatus.PASS
|
| 855 |
+
score = 1.0
|
| 856 |
+
elif is_silent and (duration_ms > 30):
|
| 857 |
+
status = ValidationStatus.MARGINAL
|
| 858 |
+
score = 0.6
|
| 859 |
+
else:
|
| 860 |
+
status = ValidationStatus.FAIL
|
| 861 |
+
score = 0.0 if rms >= SAKT_RMS_THRESHOLD else 0.3
|
| 862 |
+
|
| 863 |
+
return PhysicsResult(
|
| 864 |
+
status=status,
|
| 865 |
+
metric_name="Silence Detection",
|
| 866 |
+
expected_pattern=f"silence_{SAKT_MIN_MS}-{SAKT_MAX_MS}ms",
|
| 867 |
+
observed_pattern=f"rms={rms:.3f}, dur={duration_ms:.0f}ms",
|
| 868 |
+
score=score,
|
| 869 |
+
details={"rms": rms, "duration_ms": duration_ms, "is_silent": is_silent}
|
| 870 |
+
)
|
| 871 |
+
|
| 872 |
+
def calibrate_average_vowel(self, audio: np.ndarray, vowel_segments: List[Tuple[float, float]]) -> float:
|
| 873 |
+
"""
|
| 874 |
+
Calibrate average vowel duration for this reciter
|
| 875 |
+
|
| 876 |
+
This is crucial for Madd validation as reciter pace varies
|
| 877 |
+
"""
|
| 878 |
+
if not vowel_segments:
|
| 879 |
+
return 0.1 # Default 100ms
|
| 880 |
+
|
| 881 |
+
durations = [end - start for start, end in vowel_segments]
|
| 882 |
+
self._average_vowel_duration = np.mean(durations)
|
| 883 |
+
|
| 884 |
+
return self._average_vowel_duration
|
| 885 |
+
|
| 886 |
+
|
| 887 |
+
def main():
|
| 888 |
+
"""Test physics validator"""
|
| 889 |
+
print("=" * 50)
|
| 890 |
+
print("TajweedSST Physics Validator Test")
|
| 891 |
+
print("=" * 50)
|
| 892 |
+
|
| 893 |
+
# Create mock audio
|
| 894 |
+
sample_rate = 22050
|
| 895 |
+
duration = 2.0
|
| 896 |
+
t = np.linspace(0, duration, int(sample_rate * duration))
|
| 897 |
+
|
| 898 |
+
# Create a test signal with dip→spike pattern (simulating Qalqalah)
|
| 899 |
+
audio = np.sin(2 * np.pi * 440 * t) * 0.5
|
| 900 |
+
# Add dip in middle
|
| 901 |
+
dip_start = int(len(audio) * 0.4)
|
| 902 |
+
dip_end = int(len(audio) * 0.5)
|
| 903 |
+
audio[dip_start:dip_end] *= 0.1
|
| 904 |
+
# Add spike after dip
|
| 905 |
+
spike_start = int(len(audio) * 0.5)
|
| 906 |
+
spike_end = int(len(audio) * 0.6)
|
| 907 |
+
audio[spike_start:spike_end] *= 2.0
|
| 908 |
+
|
| 909 |
+
validator = PhysicsValidator(sample_rate=sample_rate)
|
| 910 |
+
|
| 911 |
+
# Test Qalqalah
|
| 912 |
+
print("\nQalqalah Test:")
|
| 913 |
+
result = validator.validate_qalqalah(audio, 0.3, 0.8)
|
| 914 |
+
print(f" Status: {result.status.value}")
|
| 915 |
+
print(f" Profile: {result.rms_profile}")
|
| 916 |
+
print(f" Score: {result.score:.2f}")
|
| 917 |
+
print(f" Dip Depth: {result.dip_depth:.2f}")
|
| 918 |
+
print(f" Spike Height: {result.spike_height:.2f}")
|
| 919 |
+
|
| 920 |
+
# Test Madd
|
| 921 |
+
print("\nMadd Test:")
|
| 922 |
+
validator._average_vowel_duration = 0.1 # 100ms average
|
| 923 |
+
result = validator.validate_madd(audio, 0.0, 0.4, expected_count=4)
|
| 924 |
+
print(f" Status: {result.status.value}")
|
| 925 |
+
print(f" Ratio: {result.ratio:.1f}x")
|
| 926 |
+
print(f" Score: {result.score:.2f}")
|
| 927 |
+
|
| 928 |
+
|
| 929 |
+
if __name__ == "__main__":
|
| 930 |
+
main()
|
src/pipeline.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Main Pipeline Orchestrator
|
| 4 |
+
|
| 5 |
+
Execution Order:
|
| 6 |
+
1. Text Parse: Generate Phonetic Script & Rule Tags
|
| 7 |
+
2. WhisperX: Get Word Timestamps
|
| 8 |
+
3. MFA: Get Phoneme Timestamps inside Words
|
| 9 |
+
4. Math: Clamp/Normalize Phonemes to Words
|
| 10 |
+
5. DSP: Run Physics checks on specific tagged timestamps
|
| 11 |
+
6. Export: Save JSON
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import List, Dict, Optional
|
| 17 |
+
from dataclasses import dataclass, asdict
|
| 18 |
+
|
| 19 |
+
from .tajweed_parser import TajweedParser, TajweedType, PhysicsCheck, WordTags
|
| 20 |
+
from .alignment_engine import AlignmentEngine, MockAlignmentEngine, AlignmentResult
|
| 21 |
+
from .physics_validator import PhysicsValidator, ValidationStatus
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class PhonemeOutput:
|
| 26 |
+
"""Output format for a single phoneme"""
|
| 27 |
+
char_visual: str
|
| 28 |
+
char_phonetic: str
|
| 29 |
+
start: float
|
| 30 |
+
end: float
|
| 31 |
+
tajweed_type: str
|
| 32 |
+
physics_analysis: Optional[Dict] = None
|
| 33 |
+
score: float = 1.0
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class WordOutput:
|
| 37 |
+
"""Output format for a single word"""
|
| 38 |
+
word_text: str
|
| 39 |
+
whisper_anchor: Dict
|
| 40 |
+
phonemes: List[Dict]
|
| 41 |
+
|
| 42 |
+
@dataclass
|
| 43 |
+
class AyahOutput:
|
| 44 |
+
"""Output format for a complete ayah"""
|
| 45 |
+
surah: int
|
| 46 |
+
ayah: int
|
| 47 |
+
words: List[Dict]
|
| 48 |
+
metadata: Dict
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class TajweedPipeline:
|
| 52 |
+
"""
|
| 53 |
+
Main orchestrator for the TajweedSST pipeline
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
def __init__(self,
|
| 57 |
+
use_mock_alignment: bool = True,
|
| 58 |
+
device: str = "cuda"):
|
| 59 |
+
"""
|
| 60 |
+
Initialize pipeline
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
use_mock_alignment: Use mock alignment for testing (no WhisperX/MFA)
|
| 64 |
+
device: cuda or cpu
|
| 65 |
+
"""
|
| 66 |
+
self.parser = TajweedParser()
|
| 67 |
+
|
| 68 |
+
if use_mock_alignment:
|
| 69 |
+
self.aligner = MockAlignmentEngine()
|
| 70 |
+
else:
|
| 71 |
+
self.aligner = AlignmentEngine(device=device)
|
| 72 |
+
|
| 73 |
+
self.validator = PhysicsValidator()
|
| 74 |
+
self.use_mock = use_mock_alignment
|
| 75 |
+
|
| 76 |
+
def process(self,
|
| 77 |
+
audio_path: str,
|
| 78 |
+
text: str,
|
| 79 |
+
surah: int,
|
| 80 |
+
ayah: int) -> Dict:
|
| 81 |
+
"""
|
| 82 |
+
Process a single ayah through the complete pipeline
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
audio_path: Path to audio file
|
| 86 |
+
text: Uthmani Quran text for the ayah
|
| 87 |
+
surah: Surah number
|
| 88 |
+
ayah: Ayah number
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
Complete JSON output with timing and Tajweed analysis
|
| 92 |
+
"""
|
| 93 |
+
# Step 1: Parse text and generate Tajweed tags
|
| 94 |
+
word_tags = self.parser.parse_text(text)
|
| 95 |
+
|
| 96 |
+
# Extract phonetic words for alignment
|
| 97 |
+
phonetic_words = [w.phonetic_stream for w in word_tags]
|
| 98 |
+
|
| 99 |
+
# Step 2 & 3: Run alignment (WhisperX + MFA)
|
| 100 |
+
alignment = self.aligner.align(
|
| 101 |
+
audio_path=audio_path,
|
| 102 |
+
phonetic_words=phonetic_words,
|
| 103 |
+
surah=surah,
|
| 104 |
+
ayah=ayah
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# Step 4: Normalization is done inside alignment_engine
|
| 108 |
+
|
| 109 |
+
# Step 5: Load audio and run physics validation
|
| 110 |
+
if not self.use_mock:
|
| 111 |
+
audio = self.validator.load_audio(audio_path)
|
| 112 |
+
else:
|
| 113 |
+
import numpy as np
|
| 114 |
+
audio = np.random.randn(22050 * 10) * 0.1 # Mock audio
|
| 115 |
+
|
| 116 |
+
# Build output
|
| 117 |
+
output_words = []
|
| 118 |
+
|
| 119 |
+
for word_idx, (word_tag, word_align) in enumerate(zip(word_tags, alignment.words)):
|
| 120 |
+
word_output = {
|
| 121 |
+
"word_text": word_tag.word_text,
|
| 122 |
+
"whisper_anchor": {
|
| 123 |
+
"start": round(word_align.whisper_start, 3),
|
| 124 |
+
"end": round(word_align.whisper_end, 3)
|
| 125 |
+
},
|
| 126 |
+
"phonemes": []
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
# Map phonemes to letters and run physics checks
|
| 130 |
+
for letter_idx, letter_tag in enumerate(word_tag.letters):
|
| 131 |
+
# Skip silent letters
|
| 132 |
+
if letter_tag.is_silent:
|
| 133 |
+
continue
|
| 134 |
+
|
| 135 |
+
# Get corresponding phoneme timing
|
| 136 |
+
if letter_idx < len(word_align.phonemes):
|
| 137 |
+
phoneme_align = word_align.phonemes[letter_idx]
|
| 138 |
+
start = phoneme_align.start
|
| 139 |
+
end = phoneme_align.end
|
| 140 |
+
else:
|
| 141 |
+
# Estimate timing if not aligned
|
| 142 |
+
word_duration = word_align.whisper_end - word_align.whisper_start
|
| 143 |
+
num_letters = len([l for l in word_tag.letters if not l.is_silent])
|
| 144 |
+
letter_duration = word_duration / max(num_letters, 1)
|
| 145 |
+
start = word_align.whisper_start + (letter_idx * letter_duration)
|
| 146 |
+
end = start + letter_duration
|
| 147 |
+
|
| 148 |
+
phoneme_output = {
|
| 149 |
+
"char_visual": letter_tag.char_visual,
|
| 150 |
+
"char_phonetic": letter_tag.char_phonetic,
|
| 151 |
+
"start": round(start, 3),
|
| 152 |
+
"end": round(end, 3),
|
| 153 |
+
"tajweed_type": letter_tag.tajweed_type.value,
|
| 154 |
+
"score": 1.0
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
# Step 5: Run physics validation if tagged
|
| 158 |
+
if letter_tag.physics_check != PhysicsCheck.NONE:
|
| 159 |
+
physics_result = self._run_physics_check(
|
| 160 |
+
audio=audio,
|
| 161 |
+
start=start,
|
| 162 |
+
end=end,
|
| 163 |
+
check_type=letter_tag.physics_check,
|
| 164 |
+
tajweed_type=letter_tag.tajweed_type,
|
| 165 |
+
madd_count=letter_tag.madd_count
|
| 166 |
+
)
|
| 167 |
+
phoneme_output["physics_analysis"] = physics_result
|
| 168 |
+
phoneme_output["score"] = physics_result.get("score", 1.0)
|
| 169 |
+
|
| 170 |
+
word_output["phonemes"].append(phoneme_output)
|
| 171 |
+
|
| 172 |
+
output_words.append(word_output)
|
| 173 |
+
|
| 174 |
+
# Final output structure
|
| 175 |
+
output = {
|
| 176 |
+
"surah": surah,
|
| 177 |
+
"ayah": ayah,
|
| 178 |
+
"words": output_words,
|
| 179 |
+
"metadata": {
|
| 180 |
+
"audio_path": audio_path,
|
| 181 |
+
"text": text,
|
| 182 |
+
"pipeline_version": "1.0.0",
|
| 183 |
+
"mock_alignment": self.use_mock
|
| 184 |
+
}
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
return output
|
| 188 |
+
|
| 189 |
+
def _run_physics_check(self,
|
| 190 |
+
audio,
|
| 191 |
+
start: float,
|
| 192 |
+
end: float,
|
| 193 |
+
check_type: PhysicsCheck,
|
| 194 |
+
tajweed_type: TajweedType,
|
| 195 |
+
madd_count: int = 0) -> Dict:
|
| 196 |
+
"""Run appropriate physics check based on tag"""
|
| 197 |
+
|
| 198 |
+
if check_type == PhysicsCheck.CHECK_RMS_BOUNCE:
|
| 199 |
+
result = self.validator.validate_qalqalah(audio, start, end)
|
| 200 |
+
return {
|
| 201 |
+
"check_type": "Qalqalah_RMS",
|
| 202 |
+
"rms_profile": result.rms_profile,
|
| 203 |
+
"dip_depth": round(result.dip_depth, 3),
|
| 204 |
+
"spike_height": round(result.spike_height, 3),
|
| 205 |
+
"status": result.status.value,
|
| 206 |
+
"score": round(result.score, 3)
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
elif check_type == PhysicsCheck.CHECK_DURATION:
|
| 210 |
+
result = self.validator.validate_madd(audio, start, end, madd_count or 2)
|
| 211 |
+
return {
|
| 212 |
+
"check_type": "Madd_Duration",
|
| 213 |
+
"actual_duration_ms": round(result.actual_duration_ms, 1),
|
| 214 |
+
"expected_duration_ms": round(result.expected_duration_ms, 1),
|
| 215 |
+
"ratio": round(result.ratio, 2),
|
| 216 |
+
"status": result.status.value,
|
| 217 |
+
"score": round(result.score, 3)
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
elif check_type == PhysicsCheck.CHECK_GHUNNAH:
|
| 221 |
+
result = self.validator.validate_ghunnah(audio, start, end)
|
| 222 |
+
return {
|
| 223 |
+
"check_type": "Ghunnah_Formant",
|
| 224 |
+
"nasal_detected": result.nasal_formant_detected,
|
| 225 |
+
"pitch_stability": round(result.pitch_stability, 3),
|
| 226 |
+
"duration_elongation": round(result.duration_elongation, 2),
|
| 227 |
+
"status": result.status.value,
|
| 228 |
+
"score": round(result.score, 3)
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
elif check_type == PhysicsCheck.CHECK_FORMANT_F2:
|
| 232 |
+
result = self.validator.validate_tafkheem(audio, start, end)
|
| 233 |
+
return {
|
| 234 |
+
"check_type": "Tafkheem_F2",
|
| 235 |
+
"f2_value_hz": round(result.f2_value_hz, 0),
|
| 236 |
+
"depression_ratio": round(result.depression_ratio, 3),
|
| 237 |
+
"status": result.status.value,
|
| 238 |
+
"score": round(result.score, 3)
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
return {"check_type": "None", "status": "SKIPPED", "score": 1.0}
|
| 242 |
+
|
| 243 |
+
def process_batch(self,
|
| 244 |
+
audio_dir: str,
|
| 245 |
+
quran_json_path: str,
|
| 246 |
+
output_dir: str,
|
| 247 |
+
surah: int,
|
| 248 |
+
start_ayah: int = 1,
|
| 249 |
+
end_ayah: Optional[int] = None) -> List[str]:
|
| 250 |
+
"""
|
| 251 |
+
Process multiple ayahs in batch
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
audio_dir: Directory containing audio files (named {surah}_{ayah}.mp3)
|
| 255 |
+
quran_json_path: Path to Quran text JSON
|
| 256 |
+
output_dir: Directory to save output JSON files
|
| 257 |
+
surah: Surah to process
|
| 258 |
+
start_ayah: Starting ayah number
|
| 259 |
+
end_ayah: Ending ayah number (None = all)
|
| 260 |
+
|
| 261 |
+
Returns:
|
| 262 |
+
List of output file paths
|
| 263 |
+
"""
|
| 264 |
+
output_dir = Path(output_dir)
|
| 265 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 266 |
+
|
| 267 |
+
# Load Quran text
|
| 268 |
+
with open(quran_json_path, 'r', encoding='utf-8') as f:
|
| 269 |
+
quran_data = json.load(f)
|
| 270 |
+
|
| 271 |
+
output_files = []
|
| 272 |
+
|
| 273 |
+
# Process each ayah
|
| 274 |
+
for ayah in range(start_ayah, (end_ayah or len(quran_data.get(str(surah), []))) + 1):
|
| 275 |
+
audio_path = Path(audio_dir) / f"{surah}_{ayah}.mp3"
|
| 276 |
+
|
| 277 |
+
if not audio_path.exists():
|
| 278 |
+
print(f"Skipping {surah}:{ayah} - audio not found")
|
| 279 |
+
continue
|
| 280 |
+
|
| 281 |
+
# Get text
|
| 282 |
+
text = quran_data.get(str(surah), {}).get(str(ayah), "")
|
| 283 |
+
if not text:
|
| 284 |
+
print(f"Skipping {surah}:{ayah} - text not found")
|
| 285 |
+
continue
|
| 286 |
+
|
| 287 |
+
# Process
|
| 288 |
+
result = self.process(
|
| 289 |
+
audio_path=str(audio_path),
|
| 290 |
+
text=text,
|
| 291 |
+
surah=surah,
|
| 292 |
+
ayah=ayah
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
# Save
|
| 296 |
+
output_path = output_dir / f"{surah}_{ayah}.json"
|
| 297 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 298 |
+
json.dump(result, f, ensure_ascii=False, indent=2)
|
| 299 |
+
|
| 300 |
+
output_files.append(str(output_path))
|
| 301 |
+
print(f"Processed {surah}:{ayah} → {output_path}")
|
| 302 |
+
|
| 303 |
+
return output_files
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def main():
|
| 307 |
+
"""Demo the pipeline"""
|
| 308 |
+
print("=" * 60)
|
| 309 |
+
print("TajweedSST Pipeline Demo")
|
| 310 |
+
print("=" * 60)
|
| 311 |
+
|
| 312 |
+
pipeline = TajweedPipeline(use_mock_alignment=True)
|
| 313 |
+
|
| 314 |
+
# Test with Surah Al-Ikhlas, Ayah 1
|
| 315 |
+
test_text = "قُلْ هُوَ اللَّهُ أَحَدٌ"
|
| 316 |
+
|
| 317 |
+
print(f"\nInput Text: {test_text}")
|
| 318 |
+
print("\nProcessing...")
|
| 319 |
+
|
| 320 |
+
result = pipeline.process(
|
| 321 |
+
audio_path="test_audio.mp3",
|
| 322 |
+
text=test_text,
|
| 323 |
+
surah=112,
|
| 324 |
+
ayah=1
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
print("\n" + "=" * 60)
|
| 328 |
+
print("OUTPUT JSON:")
|
| 329 |
+
print("=" * 60)
|
| 330 |
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
if __name__ == "__main__":
|
| 334 |
+
main()
|
src/tajweed_parser.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Step 1: Tajweed Rule Parser
|
| 4 |
+
|
| 5 |
+
Generates two parallel text streams and a Rule Map:
|
| 6 |
+
- Visual Stream: Standard Uthmani text
|
| 7 |
+
- Phonetic Stream: Pronounced text for MFA
|
| 8 |
+
- Tajweed Map: Tags for physics validation
|
| 9 |
+
|
| 10 |
+
Tajweed Rules Implemented:
|
| 11 |
+
- Idgham (Assimilation)
|
| 12 |
+
- Iqlab (Conversion)
|
| 13 |
+
- Ikhfa (Concealment)
|
| 14 |
+
- Qalqalah (Bounce)
|
| 15 |
+
- Ghunnah (Nasalization)
|
| 16 |
+
- Madd (Elongation)
|
| 17 |
+
- Tafkheem/Tarqeeq (Heavy/Light)
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import re
|
| 21 |
+
from dataclasses import dataclass, field
|
| 22 |
+
from typing import List, Dict, Tuple, Optional
|
| 23 |
+
from enum import Enum
|
| 24 |
+
|
| 25 |
+
class TajweedType(Enum):
|
| 26 |
+
NONE = "None"
|
| 27 |
+
QALQALAH_SUGHRA = "Qalqalah_Sughra"
|
| 28 |
+
QALQALAH_KUBRA = "Qalqalah_Kubra"
|
| 29 |
+
GHUNNAH = "Ghunnah"
|
| 30 |
+
IDGHAM_FULL = "Idgham_Full"
|
| 31 |
+
IDGHAM_PARTIAL = "Idgham_Partial"
|
| 32 |
+
IQLAB = "Iqlab"
|
| 33 |
+
IKHFA = "Ikhfa"
|
| 34 |
+
MADD_ASLI = "Madd_Asli"
|
| 35 |
+
MADD_WAJIB = "Madd_Wajib"
|
| 36 |
+
MADD_LAZIM = "Madd_Lazim"
|
| 37 |
+
TAFKHEEM = "Tafkheem"
|
| 38 |
+
TARQEEQ = "Tarqeeq"
|
| 39 |
+
SILENT = "Silent"
|
| 40 |
+
|
| 41 |
+
class PhysicsCheck(Enum):
|
| 42 |
+
CHECK_RMS_BOUNCE = "Check_RMS_Bounce"
|
| 43 |
+
CHECK_DURATION = "Check_Duration"
|
| 44 |
+
CHECK_GHUNNAH = "Check_Ghunnah"
|
| 45 |
+
CHECK_FORMANT_F2 = "Check_Formant_F2"
|
| 46 |
+
NONE = "None"
|
| 47 |
+
|
| 48 |
+
@dataclass
|
| 49 |
+
class LetterTag:
|
| 50 |
+
"""Tag for a single Arabic letter with Tajweed info"""
|
| 51 |
+
char_visual: str
|
| 52 |
+
char_phonetic: str
|
| 53 |
+
position: int
|
| 54 |
+
tajweed_type: TajweedType = TajweedType.NONE
|
| 55 |
+
physics_check: PhysicsCheck = PhysicsCheck.NONE
|
| 56 |
+
is_silent: bool = False
|
| 57 |
+
madd_count: int = 0 # 0=none, 2=asli, 4=wajib, 6=lazim
|
| 58 |
+
|
| 59 |
+
@dataclass
|
| 60 |
+
class WordTags:
|
| 61 |
+
"""Tajweed tags for a complete word"""
|
| 62 |
+
word_text: str
|
| 63 |
+
letters: List[LetterTag] = field(default_factory=list)
|
| 64 |
+
phonetic_stream: str = ""
|
| 65 |
+
|
| 66 |
+
class TajweedParser:
|
| 67 |
+
"""Parses Uthmani Quran text and generates Tajweed rule tags"""
|
| 68 |
+
|
| 69 |
+
# Qalqalah letters: ق ط ب ج د
|
| 70 |
+
QALQALAH_LETTERS = set('قطبجد')
|
| 71 |
+
|
| 72 |
+
# Heavy letters (Tafkheem): خ ص ض غ ط ق ظ
|
| 73 |
+
TAFKHEEM_LETTERS = set('خصضغطقظ')
|
| 74 |
+
|
| 75 |
+
# Idgham letters after Nun Sakinah: ي ر م ل و ن
|
| 76 |
+
IDGHAM_LETTERS = set('يرملون')
|
| 77 |
+
IDGHAM_WITH_GHUNNAH = set('ينمو') # With Ghunnah
|
| 78 |
+
IDGHAM_WITHOUT_GHUNNAH = set('رل') # Without Ghunnah
|
| 79 |
+
|
| 80 |
+
# Ikhfa letters (15 letters)
|
| 81 |
+
IKHFA_LETTERS = set('تثجدذزسشصضطظفقك')
|
| 82 |
+
|
| 83 |
+
# Harakat (vowel marks)
|
| 84 |
+
FATHA = '\u064E'
|
| 85 |
+
DAMMA = '\u064F'
|
| 86 |
+
KASRA = '\u0650'
|
| 87 |
+
SUKUN = '\u0652'
|
| 88 |
+
SHADDA = '\u0651'
|
| 89 |
+
TANWEEN_FATH = '\u064B'
|
| 90 |
+
TANWEEN_DAMM = '\u064C'
|
| 91 |
+
TANWEEN_KASR = '\u064D'
|
| 92 |
+
|
| 93 |
+
# Madd letters
|
| 94 |
+
MADD_ALIF = 'ا'
|
| 95 |
+
MADD_WAW = 'و'
|
| 96 |
+
MADD_YA = 'ي'
|
| 97 |
+
|
| 98 |
+
# Phonetic mapping (simplified Buckwalter-like)
|
| 99 |
+
PHONETIC_MAP = {
|
| 100 |
+
'ا': 'ā', 'ب': 'b', 'ت': 't', 'ث': 'ṯ', 'ج': 'j', 'ح': 'ḥ',
|
| 101 |
+
'خ': 'ḫ', 'د': 'd', 'ذ': 'ḏ', 'ر': 'r', 'ز': 'z', 'س': 's',
|
| 102 |
+
'ش': 'š', 'ص': 'ṣ', 'ض': 'ḍ', 'ط': 'ṭ', 'ظ': 'ẓ', 'ع': 'ʿ',
|
| 103 |
+
'غ': 'ġ', 'ف': 'f', 'ق': 'q', 'ك': 'k', 'ل': 'l', 'م': 'm',
|
| 104 |
+
'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ء': 'ʾ', 'ة': 'h',
|
| 105 |
+
'ى': 'ā', 'ئ': 'ʾ', 'ؤ': 'ʾ', 'أ': 'ʾa', 'إ': 'ʾi', 'آ': 'ʾā'
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
def __init__(self):
|
| 109 |
+
self.debug = False
|
| 110 |
+
|
| 111 |
+
def parse_text(self, text: str) -> List[WordTags]:
|
| 112 |
+
"""Parse Uthmani text and return tagged words"""
|
| 113 |
+
words = text.strip().split()
|
| 114 |
+
result = []
|
| 115 |
+
|
| 116 |
+
for word in words:
|
| 117 |
+
word_tags = self._parse_word(word)
|
| 118 |
+
result.append(word_tags)
|
| 119 |
+
|
| 120 |
+
# Cross-word analysis (Nun Sakinah rules across words)
|
| 121 |
+
self._analyze_cross_word_rules(result)
|
| 122 |
+
|
| 123 |
+
return result
|
| 124 |
+
|
| 125 |
+
def _parse_word(self, word: str) -> WordTags:
|
| 126 |
+
"""Parse a single word and generate letter tags"""
|
| 127 |
+
word_tags = WordTags(word_text=word)
|
| 128 |
+
|
| 129 |
+
# Extract base letters and diacritics
|
| 130 |
+
letters_with_harakat = self._split_letters(word)
|
| 131 |
+
|
| 132 |
+
for idx, (letter, harakat) in enumerate(letters_with_harakat):
|
| 133 |
+
tag = self._analyze_letter(
|
| 134 |
+
letter=letter,
|
| 135 |
+
harakat=harakat,
|
| 136 |
+
position=idx,
|
| 137 |
+
context=(letters_with_harakat, idx),
|
| 138 |
+
word=word
|
| 139 |
+
)
|
| 140 |
+
word_tags.letters.append(tag)
|
| 141 |
+
|
| 142 |
+
# Generate phonetic stream
|
| 143 |
+
word_tags.phonetic_stream = self._generate_phonetic_stream(word_tags.letters)
|
| 144 |
+
|
| 145 |
+
return word_tags
|
| 146 |
+
|
| 147 |
+
def _split_letters(self, word: str) -> List[Tuple[str, str]]:
|
| 148 |
+
"""Split word into (letter, harakat) pairs"""
|
| 149 |
+
result = []
|
| 150 |
+
i = 0
|
| 151 |
+
harakat_chars = set([self.FATHA, self.DAMMA, self.KASRA, self.SUKUN,
|
| 152 |
+
self.SHADDA, self.TANWEEN_FATH, self.TANWEEN_DAMM,
|
| 153 |
+
self.TANWEEN_KASR, '\u0653', '\u0654', '\u0655',
|
| 154 |
+
'\u0656', '\u0657', '\u0658', '\u065C', '\u0670'])
|
| 155 |
+
|
| 156 |
+
while i < len(word):
|
| 157 |
+
char = word[i]
|
| 158 |
+
|
| 159 |
+
# Skip if it's a harakat
|
| 160 |
+
if char in harakat_chars:
|
| 161 |
+
i += 1
|
| 162 |
+
continue
|
| 163 |
+
|
| 164 |
+
# Collect harakat following this letter
|
| 165 |
+
harakat = ""
|
| 166 |
+
j = i + 1
|
| 167 |
+
while j < len(word) and word[j] in harakat_chars:
|
| 168 |
+
harakat += word[j]
|
| 169 |
+
j += 1
|
| 170 |
+
|
| 171 |
+
result.append((char, harakat))
|
| 172 |
+
i = j
|
| 173 |
+
|
| 174 |
+
return result
|
| 175 |
+
|
| 176 |
+
def _analyze_letter(self, letter: str, harakat: str, position: int,
|
| 177 |
+
context: Tuple[List, int], word: str) -> LetterTag:
|
| 178 |
+
"""Analyze a single letter and assign Tajweed rules"""
|
| 179 |
+
letters_list, idx = context
|
| 180 |
+
is_last = idx == len(letters_list) - 1
|
| 181 |
+
has_sukun = self.SUKUN in harakat
|
| 182 |
+
has_shadda = self.SHADDA in harakat
|
| 183 |
+
|
| 184 |
+
tag = LetterTag(
|
| 185 |
+
char_visual=letter,
|
| 186 |
+
char_phonetic=self.PHONETIC_MAP.get(letter, letter),
|
| 187 |
+
position=position
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Rule 1: Qalqalah (ق ط ب ج د with Sukun)
|
| 191 |
+
if letter in self.QALQALAH_LETTERS and (has_sukun or is_last):
|
| 192 |
+
if is_last:
|
| 193 |
+
tag.tajweed_type = TajweedType.QALQALAH_KUBRA
|
| 194 |
+
else:
|
| 195 |
+
tag.tajweed_type = TajweedType.QALQALAH_SUGHRA
|
| 196 |
+
tag.physics_check = PhysicsCheck.CHECK_RMS_BOUNCE
|
| 197 |
+
|
| 198 |
+
# Rule 2: Tafkheem (Heavy letters)
|
| 199 |
+
elif letter in self.TAFKHEEM_LETTERS:
|
| 200 |
+
tag.tajweed_type = TajweedType.TAFKHEEM
|
| 201 |
+
tag.physics_check = PhysicsCheck.CHECK_FORMANT_F2
|
| 202 |
+
|
| 203 |
+
# Rule 3: Madd (Elongation) - check preceding vowel
|
| 204 |
+
elif letter in [self.MADD_ALIF, self.MADD_WAW, self.MADD_YA]:
|
| 205 |
+
# Check for Madd conditions
|
| 206 |
+
if idx > 0:
|
| 207 |
+
prev_letter, prev_harakat = letters_list[idx - 1]
|
| 208 |
+
if (letter == self.MADD_ALIF and self.FATHA in prev_harakat) or \
|
| 209 |
+
(letter == self.MADD_WAW and self.DAMMA in prev_harakat) or \
|
| 210 |
+
(letter == self.MADD_YA and self.KASRA in prev_harakat):
|
| 211 |
+
# Check what follows for Madd type
|
| 212 |
+
if is_last:
|
| 213 |
+
tag.tajweed_type = TajweedType.MADD_ASLI
|
| 214 |
+
tag.madd_count = 2
|
| 215 |
+
elif idx + 1 < len(letters_list):
|
| 216 |
+
next_letter, next_harakat = letters_list[idx + 1]
|
| 217 |
+
if self.SHADDA in next_harakat or self.SUKUN in next_harakat:
|
| 218 |
+
tag.tajweed_type = TajweedType.MADD_LAZIM
|
| 219 |
+
tag.madd_count = 6
|
| 220 |
+
else:
|
| 221 |
+
tag.tajweed_type = TajweedType.MADD_WAJIB
|
| 222 |
+
tag.madd_count = 4
|
| 223 |
+
tag.physics_check = PhysicsCheck.CHECK_DURATION
|
| 224 |
+
|
| 225 |
+
# Rule 4: Ghunnah (Nun/Meem with Shadda)
|
| 226 |
+
if letter in 'نم' and has_shadda:
|
| 227 |
+
tag.tajweed_type = TajweedType.GHUNNAH
|
| 228 |
+
tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 229 |
+
|
| 230 |
+
# Rule 5: Nun Sakinah / Tanween rules
|
| 231 |
+
if letter == 'ن' and has_sukun:
|
| 232 |
+
if idx + 1 < len(letters_list):
|
| 233 |
+
next_letter, _ = letters_list[idx + 1]
|
| 234 |
+
# Iqlab: Nun + Ba → Mim + Ba
|
| 235 |
+
if next_letter == 'ب':
|
| 236 |
+
tag.tajweed_type = TajweedType.IQLAB
|
| 237 |
+
tag.char_phonetic = 'm' # Pronounced as Mim
|
| 238 |
+
tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 239 |
+
# Idgham
|
| 240 |
+
elif next_letter in self.IDGHAM_LETTERS:
|
| 241 |
+
if next_letter in self.IDGHAM_WITH_GHUNNAH:
|
| 242 |
+
tag.tajweed_type = TajweedType.IDGHAM_PARTIAL
|
| 243 |
+
else:
|
| 244 |
+
tag.tajweed_type = TajweedType.IDGHAM_FULL
|
| 245 |
+
tag.physics_check = PhysicsCheck.CHECK_DURATION
|
| 246 |
+
# Ikhfa
|
| 247 |
+
elif next_letter in self.IKHFA_LETTERS:
|
| 248 |
+
tag.tajweed_type = TajweedType.IKHFA
|
| 249 |
+
tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 250 |
+
|
| 251 |
+
# Handle Tanween similarly
|
| 252 |
+
if any(tanween in harakat for tanween in [self.TANWEEN_FATH, self.TANWEEN_DAMM, self.TANWEEN_KASR]):
|
| 253 |
+
if idx + 1 < len(letters_list):
|
| 254 |
+
next_letter, _ = letters_list[idx + 1]
|
| 255 |
+
if next_letter == 'ب':
|
| 256 |
+
tag.tajweed_type = TajweedType.IQLAB
|
| 257 |
+
tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 258 |
+
elif next_letter in self.IKHFA_LETTERS:
|
| 259 |
+
tag.tajweed_type = TajweedType.IKHFA
|
| 260 |
+
tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 261 |
+
|
| 262 |
+
# Silent letters (Alif after Waw al-Jama'a, etc.)
|
| 263 |
+
if letter == 'ا' and not harakat and idx > 0:
|
| 264 |
+
prev_letter, prev_harakat = letters_list[idx - 1]
|
| 265 |
+
if prev_letter == 'و' and (self.DAMMA in prev_harakat or self.SUKUN in prev_harakat):
|
| 266 |
+
tag.is_silent = True
|
| 267 |
+
tag.tajweed_type = TajweedType.SILENT
|
| 268 |
+
tag.char_phonetic = ''
|
| 269 |
+
|
| 270 |
+
return tag
|
| 271 |
+
|
| 272 |
+
def _analyze_cross_word_rules(self, words: List[WordTags]) -> None:
|
| 273 |
+
"""Analyze Tajweed rules that span word boundaries"""
|
| 274 |
+
for i in range(len(words) - 1):
|
| 275 |
+
current_word = words[i]
|
| 276 |
+
next_word = words[i + 1]
|
| 277 |
+
|
| 278 |
+
if not current_word.letters or not next_word.letters:
|
| 279 |
+
continue
|
| 280 |
+
|
| 281 |
+
last_letter = current_word.letters[-1]
|
| 282 |
+
first_of_next = next_word.letters[0]
|
| 283 |
+
|
| 284 |
+
# Check Nun Sakinah at end of word + next word's first letter
|
| 285 |
+
if last_letter.char_visual == 'ن' and last_letter.tajweed_type == TajweedType.NONE:
|
| 286 |
+
if first_of_next.char_visual == 'ب':
|
| 287 |
+
last_letter.tajweed_type = TajweedType.IQLAB
|
| 288 |
+
last_letter.char_phonetic = 'm'
|
| 289 |
+
last_letter.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 290 |
+
elif first_of_next.char_visual in self.IDGHAM_LETTERS:
|
| 291 |
+
if first_of_next.char_visual in self.IDGHAM_WITH_GHUNNAH:
|
| 292 |
+
last_letter.tajweed_type = TajweedType.IDGHAM_PARTIAL
|
| 293 |
+
else:
|
| 294 |
+
last_letter.tajweed_type = TajweedType.IDGHAM_FULL
|
| 295 |
+
last_letter.physics_check = PhysicsCheck.CHECK_DURATION
|
| 296 |
+
elif first_of_next.char_visual in self.IKHFA_LETTERS:
|
| 297 |
+
last_letter.tajweed_type = TajweedType.IKHFA
|
| 298 |
+
last_letter.physics_check = PhysicsCheck.CHECK_GHUNNAH
|
| 299 |
+
|
| 300 |
+
def _generate_phonetic_stream(self, letters: List[LetterTag]) -> str:
|
| 301 |
+
"""Generate phonetic transcription for MFA"""
|
| 302 |
+
phonemes = []
|
| 303 |
+
for letter in letters:
|
| 304 |
+
if not letter.is_silent and letter.char_phonetic:
|
| 305 |
+
phonemes.append(letter.char_phonetic)
|
| 306 |
+
return ' '.join(phonemes)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def main():
|
| 310 |
+
"""Test the Tajweed parser"""
|
| 311 |
+
parser = TajweedParser()
|
| 312 |
+
|
| 313 |
+
# Test with Surah Al-Ikhlas
|
| 314 |
+
test_text = "قُلْ هُوَ اللَّهُ أَحَدٌ"
|
| 315 |
+
|
| 316 |
+
print("=" * 50)
|
| 317 |
+
print("TajweedSST Parser Test")
|
| 318 |
+
print("=" * 50)
|
| 319 |
+
print(f"Input: {test_text}")
|
| 320 |
+
print()
|
| 321 |
+
|
| 322 |
+
words = parser.parse_text(test_text)
|
| 323 |
+
|
| 324 |
+
for word in words:
|
| 325 |
+
print(f"Word: {word.word_text}")
|
| 326 |
+
print(f" Phonetic: {word.phonetic_stream}")
|
| 327 |
+
for letter in word.letters:
|
| 328 |
+
if letter.tajweed_type != TajweedType.NONE:
|
| 329 |
+
print(f" [{letter.char_visual}] → {letter.tajweed_type.value} ({letter.physics_check.value})")
|
| 330 |
+
print()
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
if __name__ == "__main__":
|
| 334 |
+
main()
|
surah_90_test.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Surah 90 Test
|
| 4 |
+
|
| 5 |
+
Test script to generate letter-level timing data for Surah Al-Balad (90)
|
| 6 |
+
and compare precision with existing timing in MahQuranApp.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
cd /Documents/26apps/tajweedsst
|
| 10 |
+
python3 surah_90_test.py
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
+
import sys
|
| 15 |
+
import os
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
# Add src to path
|
| 19 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 20 |
+
|
| 21 |
+
from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 22 |
+
|
| 23 |
+
# Paths
|
| 24 |
+
MAHQURAN_PATH = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 25 |
+
VERSES_PATH = MAHQURAN_PATH / "public/data/verses_v4.json"
|
| 26 |
+
AUDIO_PATH = MAHQURAN_PATH / "public/audio/abdul_basit/surah_090.mp3"
|
| 27 |
+
EXISTING_TIMING_PATH = MAHQURAN_PATH / "public/data/letter_timing_90.json"
|
| 28 |
+
OUTPUT_PATH = Path(__file__).parent / "output/surah_90_tajweed.json"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_surah_90_text():
|
| 32 |
+
"""Load Surah 90 text from verses_v4.json"""
|
| 33 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 34 |
+
data = json.load(f)
|
| 35 |
+
|
| 36 |
+
surah_90 = data.get('90', [])
|
| 37 |
+
|
| 38 |
+
verses = []
|
| 39 |
+
for verse in surah_90:
|
| 40 |
+
verses.append({
|
| 41 |
+
'ayah': verse['ayah'],
|
| 42 |
+
'text': verse['text'].strip(),
|
| 43 |
+
'translation': verse.get('translation', ''),
|
| 44 |
+
'words': [w['arabic'] for w in verse.get('words', [])]
|
| 45 |
+
})
|
| 46 |
+
|
| 47 |
+
return verses
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def load_existing_timing():
|
| 51 |
+
"""Load existing timing data from MahQuranApp"""
|
| 52 |
+
with open(EXISTING_TIMING_PATH, 'r', encoding='utf-8') as f:
|
| 53 |
+
return json.load(f)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def parse_with_tajweed(verses):
|
| 57 |
+
"""Parse all verses and generate Tajweed tags"""
|
| 58 |
+
parser = TajweedParser()
|
| 59 |
+
|
| 60 |
+
all_results = []
|
| 61 |
+
|
| 62 |
+
for verse in verses:
|
| 63 |
+
text = verse['text']
|
| 64 |
+
word_tags = parser.parse_text(text)
|
| 65 |
+
|
| 66 |
+
verse_result = {
|
| 67 |
+
'ayah': verse['ayah'],
|
| 68 |
+
'text': text,
|
| 69 |
+
'translation': verse['translation'],
|
| 70 |
+
'words': []
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
for word_tag in word_tags:
|
| 74 |
+
word_result = {
|
| 75 |
+
'word_text': word_tag.word_text,
|
| 76 |
+
'phonetic': word_tag.phonetic_stream,
|
| 77 |
+
'letters': []
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
for letter in word_tag.letters:
|
| 81 |
+
letter_result = {
|
| 82 |
+
'char': letter.char_visual,
|
| 83 |
+
'phonetic': letter.char_phonetic,
|
| 84 |
+
'position': letter.position,
|
| 85 |
+
'tajweed_type': letter.tajweed_type.value,
|
| 86 |
+
'physics_check': letter.physics_check.value,
|
| 87 |
+
'is_silent': letter.is_silent,
|
| 88 |
+
'madd_count': letter.madd_count
|
| 89 |
+
}
|
| 90 |
+
word_result['letters'].append(letter_result)
|
| 91 |
+
|
| 92 |
+
verse_result['words'].append(word_result)
|
| 93 |
+
|
| 94 |
+
all_results.append(verse_result)
|
| 95 |
+
|
| 96 |
+
return all_results
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def analyze_tajweed_distribution(results):
|
| 100 |
+
"""Analyze distribution of Tajweed rules in Surah 90"""
|
| 101 |
+
tajweed_counts = {}
|
| 102 |
+
physics_counts = {}
|
| 103 |
+
|
| 104 |
+
for verse in results:
|
| 105 |
+
for word in verse['words']:
|
| 106 |
+
for letter in word['letters']:
|
| 107 |
+
tajweed_type = letter['tajweed_type']
|
| 108 |
+
physics_check = letter['physics_check']
|
| 109 |
+
|
| 110 |
+
tajweed_counts[tajweed_type] = tajweed_counts.get(tajweed_type, 0) + 1
|
| 111 |
+
physics_counts[physics_check] = physics_counts.get(physics_check, 0) + 1
|
| 112 |
+
|
| 113 |
+
return tajweed_counts, physics_counts
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def convert_to_mahquran_format(results, existing_timing):
|
| 117 |
+
"""
|
| 118 |
+
Convert TajweedSST output to MahQuranApp timing format.
|
| 119 |
+
Uses existing timing as base and adds Tajweed annotations.
|
| 120 |
+
"""
|
| 121 |
+
output = []
|
| 122 |
+
char_idx = 0
|
| 123 |
+
|
| 124 |
+
# Build a flat list of all characters with Tajweed info
|
| 125 |
+
tajweed_map = {}
|
| 126 |
+
global_idx = 0
|
| 127 |
+
|
| 128 |
+
for verse in results:
|
| 129 |
+
for word in verse['words']:
|
| 130 |
+
for letter in word['letters']:
|
| 131 |
+
tajweed_map[global_idx] = {
|
| 132 |
+
'tajweed_type': letter['tajweed_type'],
|
| 133 |
+
'physics_check': letter['physics_check'],
|
| 134 |
+
'phonetic': letter['phonetic'],
|
| 135 |
+
'madd_count': letter['madd_count']
|
| 136 |
+
}
|
| 137 |
+
global_idx += 1
|
| 138 |
+
|
| 139 |
+
# Merge with existing timing
|
| 140 |
+
for i, timing_entry in enumerate(existing_timing):
|
| 141 |
+
entry = timing_entry.copy()
|
| 142 |
+
|
| 143 |
+
# Add Tajweed info if available
|
| 144 |
+
if i in tajweed_map:
|
| 145 |
+
entry['tajweed_type'] = tajweed_map[i]['tajweed_type']
|
| 146 |
+
entry['physics_check'] = tajweed_map[i]['physics_check']
|
| 147 |
+
entry['phonetic'] = tajweed_map[i]['phonetic']
|
| 148 |
+
if tajweed_map[i]['madd_count'] > 0:
|
| 149 |
+
entry['madd_count'] = tajweed_map[i]['madd_count']
|
| 150 |
+
|
| 151 |
+
output.append(entry)
|
| 152 |
+
|
| 153 |
+
return output
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def main():
|
| 157 |
+
print("=" * 60)
|
| 158 |
+
print("TajweedSST - Surah 90 (Al-Balad) Test")
|
| 159 |
+
print("=" * 60)
|
| 160 |
+
|
| 161 |
+
# Step 1: Load Surah 90 text
|
| 162 |
+
print("\n[1] Loading Surah 90 text...")
|
| 163 |
+
verses = load_surah_90_text()
|
| 164 |
+
print(f" Loaded {len(verses)} verses")
|
| 165 |
+
print(f" Verse 1: {verses[0]['text'][:50]}...")
|
| 166 |
+
|
| 167 |
+
# Step 2: Parse with Tajweed
|
| 168 |
+
print("\n[2] Parsing with Tajweed rules...")
|
| 169 |
+
results = parse_with_tajweed(verses)
|
| 170 |
+
|
| 171 |
+
# Step 3: Analyze distribution
|
| 172 |
+
print("\n[3] Tajweed Analysis:")
|
| 173 |
+
tajweed_counts, physics_counts = analyze_tajweed_distribution(results)
|
| 174 |
+
|
| 175 |
+
print("\n Tajweed Rules Found:")
|
| 176 |
+
for rule, count in sorted(tajweed_counts.items(), key=lambda x: -x[1]):
|
| 177 |
+
if rule != "None":
|
| 178 |
+
print(f" • {rule}: {count}")
|
| 179 |
+
|
| 180 |
+
print("\n Physics Checks Required:")
|
| 181 |
+
for check, count in sorted(physics_counts.items(), key=lambda x: -x[1]):
|
| 182 |
+
if check != "None":
|
| 183 |
+
print(f" • {check}: {count}")
|
| 184 |
+
|
| 185 |
+
# Step 4: Load existing timing
|
| 186 |
+
print("\n[4] Loading existing timing data...")
|
| 187 |
+
existing_timing = load_existing_timing()
|
| 188 |
+
print(f" Found {len(existing_timing)} timing entries")
|
| 189 |
+
print(f" First entry: {existing_timing[0]}")
|
| 190 |
+
|
| 191 |
+
# Step 5: Convert and merge
|
| 192 |
+
print("\n[5] Merging Tajweed with timing...")
|
| 193 |
+
merged = convert_to_mahquran_format(results, existing_timing)
|
| 194 |
+
|
| 195 |
+
# Count enhanced entries
|
| 196 |
+
enhanced = sum(1 for e in merged if e.get('tajweed_type') and e['tajweed_type'] != 'None')
|
| 197 |
+
print(f" Enhanced entries with Tajweed: {enhanced}")
|
| 198 |
+
|
| 199 |
+
# Step 6: Save output
|
| 200 |
+
print("\n[6] Saving output...")
|
| 201 |
+
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 202 |
+
|
| 203 |
+
# Save full Tajweed analysis
|
| 204 |
+
full_output = {
|
| 205 |
+
'surah': 90,
|
| 206 |
+
'name': 'Al-Balad',
|
| 207 |
+
'name_arabic': 'البلد',
|
| 208 |
+
'total_verses': len(verses),
|
| 209 |
+
'tajweed_summary': tajweed_counts,
|
| 210 |
+
'physics_checks': physics_counts,
|
| 211 |
+
'verses': results
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 215 |
+
json.dump(full_output, f, ensure_ascii=False, indent=2)
|
| 216 |
+
print(f" Saved: {OUTPUT_PATH}")
|
| 217 |
+
|
| 218 |
+
# Save merged timing (compatible with MahQuranApp)
|
| 219 |
+
merged_path = OUTPUT_PATH.parent / "letter_timing_90_tajweed.json"
|
| 220 |
+
with open(merged_path, 'w', encoding='utf-8') as f:
|
| 221 |
+
json.dump(merged, f, ensure_ascii=False, indent=2)
|
| 222 |
+
print(f" Saved: {merged_path}")
|
| 223 |
+
|
| 224 |
+
# Step 7: Show sample
|
| 225 |
+
print("\n[7] Sample Output (Verse 1, first 3 words):")
|
| 226 |
+
for word in results[0]['words'][:3]:
|
| 227 |
+
print(f"\n Word: {word['word_text']}")
|
| 228 |
+
print(f" Phonetic: {word['phonetic']}")
|
| 229 |
+
for letter in word['letters']:
|
| 230 |
+
if letter['tajweed_type'] != 'None':
|
| 231 |
+
print(f" [{letter['char']}] → {letter['tajweed_type']} ({letter['physics_check']})")
|
| 232 |
+
|
| 233 |
+
print("\n" + "=" * 60)
|
| 234 |
+
print("✓ Test Complete!")
|
| 235 |
+
print("=" * 60)
|
| 236 |
+
|
| 237 |
+
return full_output
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
if __name__ == "__main__":
|
| 241 |
+
main()
|
surah_91_full_pipeline.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Physics Refinement Pipeline for Surah 91
|
| 4 |
+
|
| 5 |
+
Uses EXISTING timing from MahQuranApp + applies physics refinement.
|
| 6 |
+
No WhisperX needed - just physics validation and boundary refinement.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
cd /Documents/26apps/tajweedsst
|
| 10 |
+
source venv/bin/activate
|
| 11 |
+
python3 surah_91_full_pipeline.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import sys
|
| 16 |
+
import numpy as np
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 20 |
+
|
| 21 |
+
from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 22 |
+
from src.physics_validator import PhysicsValidator, ValidationStatus
|
| 23 |
+
from src.duration_model import DurationModel, MaddType
|
| 24 |
+
|
| 25 |
+
import librosa
|
| 26 |
+
|
| 27 |
+
# Paths
|
| 28 |
+
MAHQURAN_PATH = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 29 |
+
VERSES_PATH = MAHQURAN_PATH / "public/data/verses_v4.json"
|
| 30 |
+
AUDIO_PATH = MAHQURAN_PATH / "public/audio/abdul_basit/surah_091.mp3"
|
| 31 |
+
EXISTING_TIMING = MAHQURAN_PATH / "public/data/abdul_basit/letter_timing_91.json"
|
| 32 |
+
OUTPUT_TIMING = MAHQURAN_PATH / "public/data/abdul_basit/letter_timing_91_physics.json"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def load_verses():
|
| 36 |
+
"""Load Surah 91 verses"""
|
| 37 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 38 |
+
data = json.load(f)
|
| 39 |
+
return data.get('91', [])
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def load_existing_timing():
|
| 43 |
+
"""Load existing letter timing"""
|
| 44 |
+
with open(EXISTING_TIMING, 'r', encoding='utf-8') as f:
|
| 45 |
+
return json.load(f)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def get_tajweed_tags(verses):
|
| 49 |
+
"""Parse all verses for Tajweed tags"""
|
| 50 |
+
parser = TajweedParser()
|
| 51 |
+
all_tags = []
|
| 52 |
+
|
| 53 |
+
for verse in verses:
|
| 54 |
+
word_tags = parser.parse_text(verse['text'])
|
| 55 |
+
for word_tag in word_tags:
|
| 56 |
+
for letter in word_tag.letters:
|
| 57 |
+
all_tags.append({
|
| 58 |
+
'char': letter.char_visual,
|
| 59 |
+
'phonetic': letter.char_phonetic,
|
| 60 |
+
'tajweed_type': letter.tajweed_type,
|
| 61 |
+
'physics_check': letter.physics_check,
|
| 62 |
+
'madd_count': letter.madd_count,
|
| 63 |
+
'is_silent': letter.is_silent
|
| 64 |
+
})
|
| 65 |
+
|
| 66 |
+
return all_tags
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def refine_with_physics(timing_data, tags, audio, sr, physics, duration_model):
|
| 70 |
+
"""Apply physics refinement to existing timing"""
|
| 71 |
+
refined = []
|
| 72 |
+
stats = {'total': 0, 'validated': 0, 'passed': 0, 'marginal': 0, 'failed': 0}
|
| 73 |
+
|
| 74 |
+
for i, entry in enumerate(timing_data):
|
| 75 |
+
stats['total'] += 1
|
| 76 |
+
|
| 77 |
+
# Copy existing data
|
| 78 |
+
result = entry.copy()
|
| 79 |
+
# CRITICAL PRECISION FIX: Times are stored in milliseconds, convert to seconds
|
| 80 |
+
start = entry['start'] / 1000.0
|
| 81 |
+
end = entry['end'] / 1000.0
|
| 82 |
+
|
| 83 |
+
# Get corresponding Tajweed tag
|
| 84 |
+
if i < len(tags):
|
| 85 |
+
tag = tags[i]
|
| 86 |
+
result['tajweed'] = tag['tajweed_type'].value
|
| 87 |
+
result['phonetic'] = tag['phonetic']
|
| 88 |
+
|
| 89 |
+
# Run physics validation if needed
|
| 90 |
+
if tag['physics_check'] != PhysicsCheck.NONE:
|
| 91 |
+
stats['validated'] += 1
|
| 92 |
+
|
| 93 |
+
try:
|
| 94 |
+
check = tag['physics_check']
|
| 95 |
+
|
| 96 |
+
if check == PhysicsCheck.CHECK_RMS_BOUNCE:
|
| 97 |
+
val = physics.validate_qalqalah(audio, start, end)
|
| 98 |
+
elif check == PhysicsCheck.CHECK_DURATION:
|
| 99 |
+
val = physics.validate_madd(audio, start, end, tag['madd_count'] or 2)
|
| 100 |
+
elif check == PhysicsCheck.CHECK_GHUNNAH:
|
| 101 |
+
if tag['tajweed_type'] == TajweedType.IKHFA:
|
| 102 |
+
val = physics.validate_ikhfa(audio, start, end)
|
| 103 |
+
elif tag['tajweed_type'] == TajweedType.IQLAB:
|
| 104 |
+
val = physics.validate_iqlab(audio, start, end)
|
| 105 |
+
else:
|
| 106 |
+
val = physics.validate_ghunnah(audio, start, end)
|
| 107 |
+
elif check == PhysicsCheck.CHECK_FORMANT_F2:
|
| 108 |
+
val = physics.validate_tafkheem(audio, start, end)
|
| 109 |
+
else:
|
| 110 |
+
val = None
|
| 111 |
+
|
| 112 |
+
if val:
|
| 113 |
+
result['physics'] = val.status.value
|
| 114 |
+
result['score'] = round(val.score, 2)
|
| 115 |
+
|
| 116 |
+
if val.status == ValidationStatus.PASS:
|
| 117 |
+
stats['passed'] += 1
|
| 118 |
+
elif val.status == ValidationStatus.MARGINAL:
|
| 119 |
+
stats['marginal'] += 1
|
| 120 |
+
else:
|
| 121 |
+
stats['failed'] += 1
|
| 122 |
+
|
| 123 |
+
except Exception as e:
|
| 124 |
+
result['error'] = str(e)
|
| 125 |
+
|
| 126 |
+
# Duration validation for Madd
|
| 127 |
+
if tag['tajweed_type'] in [TajweedType.MADD_ASLI, TajweedType.MADD_WAJIB, TajweedType.MADD_LAZIM]:
|
| 128 |
+
duration = end - start
|
| 129 |
+
madd_map = {
|
| 130 |
+
TajweedType.MADD_ASLI: MaddType.ASLI,
|
| 131 |
+
TajweedType.MADD_WAJIB: MaddType.WAJIB,
|
| 132 |
+
TajweedType.MADD_LAZIM: MaddType.LAZIM
|
| 133 |
+
}
|
| 134 |
+
dur_result = duration_model.validate_duration(
|
| 135 |
+
duration,
|
| 136 |
+
madd_map.get(tag['tajweed_type'], MaddType.ASLI),
|
| 137 |
+
tag['madd_count'] or 2
|
| 138 |
+
)
|
| 139 |
+
result['harakat'] = round(dur_result.harakat_count, 1)
|
| 140 |
+
|
| 141 |
+
refined.append(result)
|
| 142 |
+
|
| 143 |
+
return refined, stats
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def main():
|
| 147 |
+
print("=" * 60)
|
| 148 |
+
print("TajweedSST - Physics Refinement: Surah 91")
|
| 149 |
+
print("=" * 60)
|
| 150 |
+
|
| 151 |
+
# Load existing timing
|
| 152 |
+
print("\n[1] Loading existing timing...")
|
| 153 |
+
timing_data = load_existing_timing()
|
| 154 |
+
print(f" Entries: {len(timing_data)}")
|
| 155 |
+
|
| 156 |
+
# Load verses and parse Tajweed
|
| 157 |
+
print("\n[2] Parsing Tajweed rules...")
|
| 158 |
+
verses = load_verses()
|
| 159 |
+
tags = get_tajweed_tags(verses)
|
| 160 |
+
print(f" Tajweed tags: {len(tags)}")
|
| 161 |
+
|
| 162 |
+
# Load audio
|
| 163 |
+
print("\n[3] Loading audio...")
|
| 164 |
+
audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
|
| 165 |
+
print(f" Duration: {len(audio)/sr:.1f}s")
|
| 166 |
+
|
| 167 |
+
# Initialize validators
|
| 168 |
+
physics = PhysicsValidator(sample_rate=sr)
|
| 169 |
+
duration_model = DurationModel()
|
| 170 |
+
|
| 171 |
+
# Calibrate
|
| 172 |
+
vowels = [e['end'] - e['start'] for e in timing_data if 0.05 <= (e['end'] - e['start']) <= 0.15]
|
| 173 |
+
if vowels:
|
| 174 |
+
duration_model.calibrate_from_samples("Abdul_Basit", vowels)
|
| 175 |
+
print(f" Harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
|
| 176 |
+
|
| 177 |
+
# Refine
|
| 178 |
+
print("\n[4] Applying physics refinement...")
|
| 179 |
+
refined, stats = refine_with_physics(timing_data, tags, audio, sr, physics, duration_model)
|
| 180 |
+
|
| 181 |
+
print(f"\n[5] Statistics:")
|
| 182 |
+
print(f" Total: {stats['total']}")
|
| 183 |
+
print(f" Validated: {stats['validated']}")
|
| 184 |
+
print(f" ✓ Passed: {stats['passed']}")
|
| 185 |
+
print(f" ~ Marginal: {stats['marginal']}")
|
| 186 |
+
print(f" ✗ Failed: {stats['failed']}")
|
| 187 |
+
|
| 188 |
+
if stats['validated'] > 0:
|
| 189 |
+
rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
|
| 190 |
+
print(f" Pass Rate: {rate:.1f}%")
|
| 191 |
+
|
| 192 |
+
# Save
|
| 193 |
+
print(f"\n[6] Saving to MahQuranApp...")
|
| 194 |
+
with open(OUTPUT_TIMING, 'w', encoding='utf-8') as f:
|
| 195 |
+
json.dump(refined, f, ensure_ascii=False, indent=2)
|
| 196 |
+
print(f" Saved: {OUTPUT_TIMING}")
|
| 197 |
+
|
| 198 |
+
# Show sample
|
| 199 |
+
print("\n[7] Sample refined entries:")
|
| 200 |
+
for entry in refined[:5]:
|
| 201 |
+
tj = entry.get('tajweed', 'None')
|
| 202 |
+
ph = entry.get('physics', '-')
|
| 203 |
+
sc = entry.get('score', '-')
|
| 204 |
+
print(f" {entry['char']}: {tj} | physics={ph} score={sc}")
|
| 205 |
+
|
| 206 |
+
print("\n" + "=" * 60)
|
| 207 |
+
print("✓ Done! Test in MahQuranApp with:")
|
| 208 |
+
print(f" letter_timing_91_physics.json")
|
| 209 |
+
print("=" * 60)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
if __name__ == "__main__":
|
| 213 |
+
main()
|
surah_91_test.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Surah 91 (Ash-Shams) Physics Test
|
| 4 |
+
|
| 5 |
+
Tests the complete Tajweed physics system on Abdul Basit's recitation.
|
| 6 |
+
This validates all 10 physics validators on real Quranic audio.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
cd /Documents/26apps/tajweedsst
|
| 10 |
+
source venv/bin/activate
|
| 11 |
+
python3 surah_91_test.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import sys
|
| 16 |
+
import os
|
| 17 |
+
import numpy as np
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from dataclasses import asdict
|
| 20 |
+
|
| 21 |
+
# Add src to path
|
| 22 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 23 |
+
|
| 24 |
+
from src.tajweed_parser import TajweedParser, TajweedType, PhysicsCheck
|
| 25 |
+
from src.physics_validator import PhysicsValidator, ValidationStatus
|
| 26 |
+
from src.duration_model import DurationModel, MaddType
|
| 27 |
+
|
| 28 |
+
# Check for librosa
|
| 29 |
+
try:
|
| 30 |
+
import librosa
|
| 31 |
+
HAS_LIBROSA = True
|
| 32 |
+
except ImportError:
|
| 33 |
+
HAS_LIBROSA = False
|
| 34 |
+
print("Warning: librosa not installed. Some tests will be skipped.")
|
| 35 |
+
|
| 36 |
+
# Paths
|
| 37 |
+
MAHQURAN_PATH = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 38 |
+
VERSES_PATH = MAHQURAN_PATH / "public/data/verses_v4.json"
|
| 39 |
+
AUDIO_PATH = MAHQURAN_PATH / "public/audio/abdul_basit/surah_091.mp3"
|
| 40 |
+
TIMING_PATH = MAHQURAN_PATH / "public/data/abdul_basit/letter_timing_91.json"
|
| 41 |
+
OUTPUT_PATH = Path(__file__).parent / "output/surah_91_physics.json"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def load_surah_91_text():
|
| 45 |
+
"""Load Surah 91 text from verses_v4.json"""
|
| 46 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 47 |
+
data = json.load(f)
|
| 48 |
+
|
| 49 |
+
surah_91 = data.get('91', [])
|
| 50 |
+
|
| 51 |
+
verses = []
|
| 52 |
+
for verse in surah_91:
|
| 53 |
+
verses.append({
|
| 54 |
+
'ayah': verse['ayah'],
|
| 55 |
+
'text': verse['text'].strip(),
|
| 56 |
+
'translation': verse.get('translation', ''),
|
| 57 |
+
})
|
| 58 |
+
|
| 59 |
+
return verses
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def load_timing_data():
|
| 63 |
+
"""Load existing letter timing data"""
|
| 64 |
+
with open(TIMING_PATH, 'r', encoding='utf-8') as f:
|
| 65 |
+
return json.load(f)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def load_audio():
|
| 69 |
+
"""Load audio file"""
|
| 70 |
+
if not HAS_LIBROSA:
|
| 71 |
+
return None, 22050
|
| 72 |
+
|
| 73 |
+
print(f" Loading audio from: {AUDIO_PATH}")
|
| 74 |
+
audio, sr = librosa.load(str(AUDIO_PATH), sr=22050)
|
| 75 |
+
print(f" Duration: {len(audio)/sr:.1f}s")
|
| 76 |
+
return audio, sr
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def analyze_with_physics(verses, timing_data, audio, sr):
|
| 80 |
+
"""Analyze letters with physics validators"""
|
| 81 |
+
parser = TajweedParser()
|
| 82 |
+
physics = PhysicsValidator(sample_rate=sr)
|
| 83 |
+
duration_model = DurationModel()
|
| 84 |
+
|
| 85 |
+
# Parse all verses for Tajweed rules
|
| 86 |
+
all_tags = []
|
| 87 |
+
for verse in verses:
|
| 88 |
+
word_tags = parser.parse_text(verse['text'])
|
| 89 |
+
for word_tag in word_tags:
|
| 90 |
+
for letter in word_tag.letters:
|
| 91 |
+
all_tags.append({
|
| 92 |
+
'char': letter.char_visual,
|
| 93 |
+
'phonetic': letter.char_phonetic,
|
| 94 |
+
'tajweed_type': letter.tajweed_type.value,
|
| 95 |
+
'physics_check': letter.physics_check.value,
|
| 96 |
+
'madd_count': letter.madd_count
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
# Calibrate duration model from timing data
|
| 100 |
+
short_vowels = []
|
| 101 |
+
for entry in timing_data:
|
| 102 |
+
duration = entry['end'] - entry['start']
|
| 103 |
+
if 0.05 <= duration <= 0.15: # Short vowel range
|
| 104 |
+
short_vowels.append(duration)
|
| 105 |
+
|
| 106 |
+
if short_vowels:
|
| 107 |
+
duration_model.calibrate_from_samples("Abdul_Basit", short_vowels)
|
| 108 |
+
print(f" Calibrated harakat: {duration_model.calibration.harakat_base_ms:.1f}ms")
|
| 109 |
+
|
| 110 |
+
# Run physics validation on each letter
|
| 111 |
+
results = []
|
| 112 |
+
physics_stats = {
|
| 113 |
+
'total': 0,
|
| 114 |
+
'validated': 0,
|
| 115 |
+
'passed': 0,
|
| 116 |
+
'marginal': 0,
|
| 117 |
+
'failed': 0,
|
| 118 |
+
'skipped': 0
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
# Match timing entries with Tajweed tags
|
| 122 |
+
for i, entry in enumerate(timing_data):
|
| 123 |
+
if i >= len(all_tags):
|
| 124 |
+
break
|
| 125 |
+
|
| 126 |
+
tag = all_tags[i]
|
| 127 |
+
start = entry['start']
|
| 128 |
+
end = entry['end']
|
| 129 |
+
duration = end - start
|
| 130 |
+
|
| 131 |
+
result = {
|
| 132 |
+
'idx': i,
|
| 133 |
+
'char': entry['char'],
|
| 134 |
+
'start': start,
|
| 135 |
+
'end': end,
|
| 136 |
+
'duration_ms': duration * 1000,
|
| 137 |
+
'tajweed_type': tag['tajweed_type'],
|
| 138 |
+
'physics_check': tag['physics_check']
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
physics_stats['total'] += 1
|
| 142 |
+
|
| 143 |
+
# Skip if no physics check needed or no audio
|
| 144 |
+
if tag['physics_check'] == 'None' or audio is None:
|
| 145 |
+
result['validation'] = 'not_required'
|
| 146 |
+
results.append(result)
|
| 147 |
+
continue
|
| 148 |
+
|
| 149 |
+
physics_stats['validated'] += 1
|
| 150 |
+
|
| 151 |
+
# Run appropriate validator
|
| 152 |
+
check_type = tag['physics_check']
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
if check_type == 'Check_RMS_Bounce':
|
| 156 |
+
# Qalqalah
|
| 157 |
+
val_result = physics.validate_qalqalah(audio, start, end)
|
| 158 |
+
result['metric'] = 'RMS Bounce'
|
| 159 |
+
result['profile'] = val_result.rms_profile if hasattr(val_result, 'rms_profile') else ''
|
| 160 |
+
|
| 161 |
+
elif check_type == 'Check_Duration':
|
| 162 |
+
# Madd
|
| 163 |
+
madd_count = tag['madd_count'] if tag['madd_count'] > 0 else 2
|
| 164 |
+
val_result = physics.validate_madd(audio, start, end, madd_count)
|
| 165 |
+
result['metric'] = 'Duration'
|
| 166 |
+
result['ratio'] = val_result.ratio if hasattr(val_result, 'ratio') else 0
|
| 167 |
+
|
| 168 |
+
elif check_type == 'Check_Ghunnah':
|
| 169 |
+
# Ghunnah/Ikhfa/Iqlab
|
| 170 |
+
if tag['tajweed_type'] == 'Ikhfa':
|
| 171 |
+
val_result = physics.validate_ikhfa(audio, start, end)
|
| 172 |
+
elif tag['tajweed_type'] == 'Iqlab':
|
| 173 |
+
val_result = physics.validate_iqlab(audio, start, end)
|
| 174 |
+
else:
|
| 175 |
+
val_result = physics.validate_ghunnah(audio, start, end)
|
| 176 |
+
result['metric'] = 'Nasal'
|
| 177 |
+
|
| 178 |
+
elif check_type == 'Check_Formant_F2':
|
| 179 |
+
# Tafkheem
|
| 180 |
+
val_result = physics.validate_tafkheem(audio, start, end)
|
| 181 |
+
result['metric'] = 'F2 Formant'
|
| 182 |
+
|
| 183 |
+
else:
|
| 184 |
+
val_result = None
|
| 185 |
+
|
| 186 |
+
if val_result:
|
| 187 |
+
result['status'] = val_result.status.value
|
| 188 |
+
result['score'] = val_result.score
|
| 189 |
+
|
| 190 |
+
if val_result.status == ValidationStatus.PASS:
|
| 191 |
+
physics_stats['passed'] += 1
|
| 192 |
+
elif val_result.status == ValidationStatus.MARGINAL:
|
| 193 |
+
physics_stats['marginal'] += 1
|
| 194 |
+
elif val_result.status == ValidationStatus.FAIL:
|
| 195 |
+
physics_stats['failed'] += 1
|
| 196 |
+
else:
|
| 197 |
+
physics_stats['skipped'] += 1
|
| 198 |
+
else:
|
| 199 |
+
result['status'] = 'unknown'
|
| 200 |
+
result['score'] = 0
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
result['status'] = 'error'
|
| 204 |
+
result['error'] = str(e)
|
| 205 |
+
physics_stats['skipped'] += 1
|
| 206 |
+
|
| 207 |
+
results.append(result)
|
| 208 |
+
|
| 209 |
+
return results, physics_stats, duration_model
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def main():
|
| 213 |
+
print("=" * 60)
|
| 214 |
+
print("TajweedSST - Surah 91 (Ash-Shams) Physics Test")
|
| 215 |
+
print("=" * 60)
|
| 216 |
+
|
| 217 |
+
# Step 1: Load data
|
| 218 |
+
print("\n[1] Loading Surah 91 data...")
|
| 219 |
+
verses = load_surah_91_text()
|
| 220 |
+
print(f" Verses: {len(verses)}")
|
| 221 |
+
print(f" First verse: {verses[0]['text'][:40]}...")
|
| 222 |
+
|
| 223 |
+
timing_data = load_timing_data()
|
| 224 |
+
print(f" Timing entries: {len(timing_data)}")
|
| 225 |
+
|
| 226 |
+
# Step 2: Load audio
|
| 227 |
+
print("\n[2] Loading audio...")
|
| 228 |
+
audio, sr = load_audio()
|
| 229 |
+
|
| 230 |
+
# Step 3: Run physics analysis
|
| 231 |
+
print("\n[3] Running physics validation...")
|
| 232 |
+
results, stats, duration_model = analyze_with_physics(verses, timing_data, audio, sr)
|
| 233 |
+
|
| 234 |
+
# Step 4: Print statistics
|
| 235 |
+
print("\n[4] Physics Validation Statistics:")
|
| 236 |
+
print(f" Total letters: {stats['total']}")
|
| 237 |
+
print(f" Validated: {stats['validated']}")
|
| 238 |
+
print(f" ✓ Passed: {stats['passed']}")
|
| 239 |
+
print(f" ~ Marginal: {stats['marginal']}")
|
| 240 |
+
print(f" ✗ Failed: {stats['failed']}")
|
| 241 |
+
print(f" ⊘ Skipped: {stats['skipped']}")
|
| 242 |
+
|
| 243 |
+
if stats['validated'] > 0:
|
| 244 |
+
pass_rate = (stats['passed'] + stats['marginal']) / stats['validated'] * 100
|
| 245 |
+
print(f"\n Pass Rate: {pass_rate:.1f}%")
|
| 246 |
+
|
| 247 |
+
# Step 5: Show samples of each Tajweed type
|
| 248 |
+
print("\n[5] Sample Results by Tajweed Type:")
|
| 249 |
+
|
| 250 |
+
tajweed_samples = {}
|
| 251 |
+
for r in results:
|
| 252 |
+
tj_type = r['tajweed_type']
|
| 253 |
+
if tj_type != 'None' and tj_type not in tajweed_samples:
|
| 254 |
+
tajweed_samples[tj_type] = r
|
| 255 |
+
|
| 256 |
+
for tj_type, sample in tajweed_samples.items():
|
| 257 |
+
status = sample.get('status', 'N/A')
|
| 258 |
+
score = sample.get('score', 0)
|
| 259 |
+
char = sample['char']
|
| 260 |
+
print(f" {tj_type}:")
|
| 261 |
+
print(f" Letter: {char}, Status: {status}, Score: {score:.2f}")
|
| 262 |
+
|
| 263 |
+
# Step 6: Duration analysis
|
| 264 |
+
print("\n[6] Duration Model Calibration:")
|
| 265 |
+
if duration_model.calibration:
|
| 266 |
+
print(f" Reciter: {duration_model.calibration.reciter_name}")
|
| 267 |
+
print(f" Harakat base: {duration_model.calibration.harakat_base_ms:.1f}ms")
|
| 268 |
+
print(f" Sample size: {duration_model.calibration.sample_size}")
|
| 269 |
+
|
| 270 |
+
# Step 7: Save results
|
| 271 |
+
print("\n[7] Saving results...")
|
| 272 |
+
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 273 |
+
|
| 274 |
+
output = {
|
| 275 |
+
'surah': 91,
|
| 276 |
+
'name': 'Ash-Shams',
|
| 277 |
+
'name_arabic': 'الشمس',
|
| 278 |
+
'statistics': stats,
|
| 279 |
+
'calibration': {
|
| 280 |
+
'harakat_ms': duration_model.calibration.harakat_base_ms if duration_model.calibration else 100
|
| 281 |
+
},
|
| 282 |
+
'results': results
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 286 |
+
json.dump(output, f, ensure_ascii=False, indent=2)
|
| 287 |
+
print(f" Saved: {OUTPUT_PATH}")
|
| 288 |
+
|
| 289 |
+
print("\n" + "=" * 60)
|
| 290 |
+
print("✓ Physics Test Complete!")
|
| 291 |
+
print("=" * 60)
|
| 292 |
+
|
| 293 |
+
return output
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
if __name__ == "__main__":
|
| 297 |
+
main()
|
tests/test_alignment_engine.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Alignment Engine Unit Tests
|
| 4 |
+
|
| 5 |
+
Tests word and phoneme timing accuracy:
|
| 6 |
+
- WhisperX word alignment
|
| 7 |
+
- MFA phoneme alignment
|
| 8 |
+
- Phoneme normalization within word boundaries
|
| 9 |
+
- Mock alignment for testing without models
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import pytest
|
| 13 |
+
import os
|
| 14 |
+
import sys
|
| 15 |
+
|
| 16 |
+
# Add src to path
|
| 17 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
| 18 |
+
|
| 19 |
+
from alignment_engine import (
|
| 20 |
+
AlignmentEngine,
|
| 21 |
+
MockAlignmentEngine,
|
| 22 |
+
PhonemeAlignment,
|
| 23 |
+
WordAlignment,
|
| 24 |
+
AlignmentResult
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class TestDataclasses:
|
| 29 |
+
"""Test alignment data structures"""
|
| 30 |
+
|
| 31 |
+
def test_phoneme_alignment(self):
|
| 32 |
+
"""PhonemeAlignment stores timing correctly"""
|
| 33 |
+
pa = PhonemeAlignment(phoneme="ب", start=0.0, end=0.1, duration=0.1)
|
| 34 |
+
assert pa.phoneme == "ب"
|
| 35 |
+
assert pa.duration == 0.1
|
| 36 |
+
|
| 37 |
+
def test_phoneme_normalized_duration(self):
|
| 38 |
+
"""Normalized duration calculation"""
|
| 39 |
+
pa = PhonemeAlignment(phoneme="ا", start=0.0, end=0.2, duration=0.2)
|
| 40 |
+
# normalized_duration is a property
|
| 41 |
+
assert pa.normalized_duration == 0.2
|
| 42 |
+
|
| 43 |
+
def test_word_alignment(self):
|
| 44 |
+
"""WordAlignment stores word and phonemes"""
|
| 45 |
+
wa = WordAlignment(
|
| 46 |
+
word_text="بسم",
|
| 47 |
+
whisper_start=0.0,
|
| 48 |
+
whisper_end=0.5,
|
| 49 |
+
phonemes=[
|
| 50 |
+
PhonemeAlignment("ب", 0.0, 0.15, 0.15),
|
| 51 |
+
PhonemeAlignment("س", 0.15, 0.35, 0.20),
|
| 52 |
+
PhonemeAlignment("م", 0.35, 0.5, 0.15),
|
| 53 |
+
]
|
| 54 |
+
)
|
| 55 |
+
assert wa.word_text == "بسم"
|
| 56 |
+
assert len(wa.phonemes) == 3
|
| 57 |
+
assert wa.whisper_duration == 0.5
|
| 58 |
+
|
| 59 |
+
def test_alignment_result(self):
|
| 60 |
+
"""AlignmentResult stores full alignment"""
|
| 61 |
+
ar = AlignmentResult(
|
| 62 |
+
audio_path="/path/to/audio.wav",
|
| 63 |
+
surah=91,
|
| 64 |
+
ayah=1,
|
| 65 |
+
words=[]
|
| 66 |
+
)
|
| 67 |
+
assert ar.surah == 91
|
| 68 |
+
assert ar.ayah == 1
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class TestMockAlignmentEngine:
|
| 72 |
+
"""Test mock alignment for development without models"""
|
| 73 |
+
|
| 74 |
+
@pytest.fixture
|
| 75 |
+
def mock_engine(self):
|
| 76 |
+
return MockAlignmentEngine()
|
| 77 |
+
|
| 78 |
+
def test_mock_align_returns_result(self, mock_engine):
|
| 79 |
+
"""Mock alignment returns AlignmentResult"""
|
| 80 |
+
result = mock_engine.align(
|
| 81 |
+
audio_path="/fake/path.wav",
|
| 82 |
+
phonetic_words=["b i s m", "a l l a h"],
|
| 83 |
+
surah=1,
|
| 84 |
+
ayah=1
|
| 85 |
+
)
|
| 86 |
+
assert isinstance(result, AlignmentResult)
|
| 87 |
+
|
| 88 |
+
def test_mock_align_word_count(self, mock_engine):
|
| 89 |
+
"""Mock alignment produces correct word count"""
|
| 90 |
+
phonetic_words = ["b i s m", "a l l a h", "a r r a h m a n"]
|
| 91 |
+
result = mock_engine.align(
|
| 92 |
+
audio_path="/fake/path.wav",
|
| 93 |
+
phonetic_words=phonetic_words,
|
| 94 |
+
surah=1,
|
| 95 |
+
ayah=1
|
| 96 |
+
)
|
| 97 |
+
assert len(result.words) == len(phonetic_words)
|
| 98 |
+
|
| 99 |
+
def test_mock_align_phoneme_generation(self, mock_engine):
|
| 100 |
+
"""Mock alignment generates phonemes for each word"""
|
| 101 |
+
result = mock_engine.align(
|
| 102 |
+
audio_path="/fake/path.wav",
|
| 103 |
+
phonetic_words=["b i s m"],
|
| 104 |
+
surah=1,
|
| 105 |
+
ayah=1
|
| 106 |
+
)
|
| 107 |
+
# "b i s m" should produce ~4 phonemes
|
| 108 |
+
assert len(result.words[0].phonemes) >= 3
|
| 109 |
+
|
| 110 |
+
def test_mock_align_timing_monotonic(self, mock_engine):
|
| 111 |
+
"""Mock timing should be monotonically increasing"""
|
| 112 |
+
result = mock_engine.align(
|
| 113 |
+
audio_path="/fake/path.wav",
|
| 114 |
+
phonetic_words=["word1", "word2", "word3"],
|
| 115 |
+
surah=1,
|
| 116 |
+
ayah=1
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
prev_end = 0.0
|
| 120 |
+
for word in result.words:
|
| 121 |
+
assert word.whisper_start >= prev_end, "Word start before previous end"
|
| 122 |
+
prev_end = word.whisper_end
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
class TestTimingMonotonicity:
|
| 126 |
+
"""Test that timing never goes backwards"""
|
| 127 |
+
|
| 128 |
+
@pytest.fixture
|
| 129 |
+
def mock_engine(self):
|
| 130 |
+
return MockAlignmentEngine()
|
| 131 |
+
|
| 132 |
+
def test_word_timing_monotonic(self, mock_engine):
|
| 133 |
+
"""Word-level timing is strictly increasing"""
|
| 134 |
+
result = mock_engine.align(
|
| 135 |
+
audio_path="/fake/path.wav",
|
| 136 |
+
phonetic_words=["w1", "w2", "w3", "w4", "w5"],
|
| 137 |
+
surah=1,
|
| 138 |
+
ayah=1
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
for i in range(1, len(result.words)):
|
| 142 |
+
prev = result.words[i-1]
|
| 143 |
+
curr = result.words[i]
|
| 144 |
+
assert curr.whisper_start >= prev.whisper_end, \
|
| 145 |
+
f"Word {i} starts ({curr.whisper_start}) before word {i-1} ends ({prev.whisper_end})"
|
| 146 |
+
|
| 147 |
+
def test_phoneme_timing_monotonic(self, mock_engine):
|
| 148 |
+
"""Phoneme-level timing is strictly increasing within words"""
|
| 149 |
+
result = mock_engine.align(
|
| 150 |
+
audio_path="/fake/path.wav",
|
| 151 |
+
phonetic_words=["a l r a h m a n"],
|
| 152 |
+
surah=1,
|
| 153 |
+
ayah=1
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
for word in result.words:
|
| 157 |
+
for i in range(1, len(word.phonemes)):
|
| 158 |
+
prev = word.phonemes[i-1]
|
| 159 |
+
curr = word.phonemes[i]
|
| 160 |
+
assert curr.start >= prev.end, \
|
| 161 |
+
f"Phoneme {curr.phoneme} starts before {prev.phoneme} ends"
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
class TestPhonemeNormalization:
|
| 165 |
+
"""Test phoneme duration normalization"""
|
| 166 |
+
|
| 167 |
+
def test_phonemes_fit_word_boundary(self):
|
| 168 |
+
"""Normalized phonemes should fit exactly in word boundaries"""
|
| 169 |
+
word = WordAlignment(
|
| 170 |
+
word_text="test",
|
| 171 |
+
whisper_start=1.0,
|
| 172 |
+
whisper_end=2.0,
|
| 173 |
+
phonemes=[
|
| 174 |
+
PhonemeAlignment("t", 1.0, 1.25, 0.25),
|
| 175 |
+
PhonemeAlignment("e", 1.25, 1.5, 0.25),
|
| 176 |
+
PhonemeAlignment("s", 1.5, 1.75, 0.25),
|
| 177 |
+
PhonemeAlignment("t", 1.75, 2.0, 0.25),
|
| 178 |
+
]
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# First phoneme should start at word start
|
| 182 |
+
assert word.phonemes[0].start == word.whisper_start
|
| 183 |
+
# Last phoneme should end at word end
|
| 184 |
+
assert word.phonemes[-1].end == word.whisper_end
|
| 185 |
+
|
| 186 |
+
def test_phonemes_cover_word_duration(self):
|
| 187 |
+
"""Phoneme durations should sum to word duration"""
|
| 188 |
+
word = WordAlignment(
|
| 189 |
+
word_text="test",
|
| 190 |
+
whisper_start=0.0,
|
| 191 |
+
whisper_end=1.0,
|
| 192 |
+
phonemes=[
|
| 193 |
+
PhonemeAlignment("a", 0.0, 0.333, 0.333),
|
| 194 |
+
PhonemeAlignment("b", 0.333, 0.666, 0.333),
|
| 195 |
+
PhonemeAlignment("c", 0.666, 1.0, 0.334),
|
| 196 |
+
]
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
total_phoneme_duration = sum(p.duration for p in word.phonemes)
|
| 200 |
+
word_duration = word.whisper_duration
|
| 201 |
+
# Allow small floating point error
|
| 202 |
+
assert abs(total_phoneme_duration - word_duration) < 0.01
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
class TestArabicPhonemes:
|
| 206 |
+
"""Test Arabic-specific phoneme handling"""
|
| 207 |
+
|
| 208 |
+
@pytest.fixture
|
| 209 |
+
def mock_engine(self):
|
| 210 |
+
return MockAlignmentEngine()
|
| 211 |
+
|
| 212 |
+
def test_arabic_phonetic_transcription(self, mock_engine):
|
| 213 |
+
"""Engine handles Arabic phonetic transcription"""
|
| 214 |
+
result = mock_engine.align(
|
| 215 |
+
audio_path="/fake/path.wav",
|
| 216 |
+
phonetic_words=["b i s m i", "a l l aa h i"], # Arabic transliteration
|
| 217 |
+
surah=1,
|
| 218 |
+
ayah=1
|
| 219 |
+
)
|
| 220 |
+
assert len(result.words) == 2
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
if __name__ == "__main__":
|
| 224 |
+
pytest.main([__file__, "-v"])
|
tests/test_physics_validator.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Physics Validator Unit Tests
|
| 4 |
+
|
| 5 |
+
Tests all Tajweed acoustic validation rules:
|
| 6 |
+
- Qalqalah (bounce)
|
| 7 |
+
- Madd (elongation)
|
| 8 |
+
- Ghunnah (nasalization)
|
| 9 |
+
- Tafkheem (heavy letters)
|
| 10 |
+
- Idgham (assimilation)
|
| 11 |
+
- Ikhfa (concealment)
|
| 12 |
+
- Iqlab (conversion)
|
| 13 |
+
- Izhar (clarity)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import pytest
|
| 17 |
+
import numpy as np
|
| 18 |
+
import os
|
| 19 |
+
import sys
|
| 20 |
+
|
| 21 |
+
# Add src to path
|
| 22 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
| 23 |
+
|
| 24 |
+
from physics_validator import (
|
| 25 |
+
PhysicsValidator,
|
| 26 |
+
ValidationStatus,
|
| 27 |
+
PhysicsResult,
|
| 28 |
+
QalqalahResult,
|
| 29 |
+
MaddResult
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TestPhysicsValidatorInit:
|
| 34 |
+
"""Test initialization and configuration"""
|
| 35 |
+
|
| 36 |
+
def test_default_init(self):
|
| 37 |
+
"""Validator initializes with default sample rate"""
|
| 38 |
+
pv = PhysicsValidator()
|
| 39 |
+
assert pv.sample_rate == 22050
|
| 40 |
+
assert pv._average_vowel_duration > 0
|
| 41 |
+
|
| 42 |
+
def test_custom_sample_rate(self):
|
| 43 |
+
"""Validator accepts custom sample rate"""
|
| 44 |
+
pv = PhysicsValidator(sample_rate=16000)
|
| 45 |
+
assert pv.sample_rate == 16000
|
| 46 |
+
|
| 47 |
+
def test_thresholds_exist(self):
|
| 48 |
+
"""All Tajweed thresholds are defined"""
|
| 49 |
+
pv = PhysicsValidator()
|
| 50 |
+
assert hasattr(pv, 'QALQALAH_DIP_THRESHOLD')
|
| 51 |
+
assert hasattr(pv, 'MADD_RATIO_ASLI')
|
| 52 |
+
assert hasattr(pv, 'MADD_RATIO_WAJIB')
|
| 53 |
+
assert hasattr(pv, 'MADD_RATIO_LAZIM')
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class TestQalqalahValidation:
|
| 57 |
+
"""Test Qalqalah (echo/bounce) detection"""
|
| 58 |
+
|
| 59 |
+
@pytest.fixture
|
| 60 |
+
def validator(self):
|
| 61 |
+
return PhysicsValidator()
|
| 62 |
+
|
| 63 |
+
@pytest.fixture
|
| 64 |
+
def sample_audio(self):
|
| 65 |
+
"""Generate test audio: silence -> speech -> silence (qalqalah pattern)"""
|
| 66 |
+
sr = 22050
|
| 67 |
+
duration = 0.5 # 500ms
|
| 68 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 69 |
+
|
| 70 |
+
# Create dip-spike pattern typical of qalqalah
|
| 71 |
+
envelope = np.ones_like(t)
|
| 72 |
+
# Dip at 30-40%
|
| 73 |
+
envelope[int(0.3*len(t)):int(0.4*len(t))] = 0.1
|
| 74 |
+
# Spike at 40-50%
|
| 75 |
+
envelope[int(0.4*len(t)):int(0.5*len(t))] = 1.5
|
| 76 |
+
|
| 77 |
+
signal = envelope * np.sin(2 * np.pi * 200 * t)
|
| 78 |
+
return signal.astype(np.float32)
|
| 79 |
+
|
| 80 |
+
def test_qalqalah_returns_physics_result(self, validator, sample_audio):
|
| 81 |
+
"""Qalqalah validation returns PhysicsResult"""
|
| 82 |
+
result = validator.validate_qalqalah(sample_audio, 0.0, 0.5)
|
| 83 |
+
# Result type is QalqalahResult which inherits from PhysicsResult
|
| 84 |
+
assert hasattr(result, 'status')
|
| 85 |
+
assert hasattr(result, 'metric_name')
|
| 86 |
+
|
| 87 |
+
def test_qalqalah_detects_dip_spike(self, validator, sample_audio):
|
| 88 |
+
"""Qalqalah validator detects dip-spike pattern"""
|
| 89 |
+
result = validator.validate_qalqalah(sample_audio, 0.0, 0.5)
|
| 90 |
+
# Should at least have a score
|
| 91 |
+
assert result.score >= 0
|
| 92 |
+
|
| 93 |
+
def test_qalqalah_short_segment_handles_gracefully(self, validator):
|
| 94 |
+
"""Very short segments should be handled gracefully"""
|
| 95 |
+
short_audio = np.zeros(100, dtype=np.float32) # ~4.5ms at 22050
|
| 96 |
+
result = validator.validate_qalqalah(short_audio, 0.0, 0.005)
|
| 97 |
+
# Should not crash, status can be FAIL or SKIPPED
|
| 98 |
+
assert result.status in [ValidationStatus.SKIPPED, ValidationStatus.FAIL]
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class TestMaddValidation:
|
| 102 |
+
"""Test Madd (elongation) detection"""
|
| 103 |
+
|
| 104 |
+
@pytest.fixture
|
| 105 |
+
def validator(self):
|
| 106 |
+
return PhysicsValidator()
|
| 107 |
+
|
| 108 |
+
@pytest.fixture
|
| 109 |
+
def vowel_audio(self):
|
| 110 |
+
"""Generate sustained vowel-like audio"""
|
| 111 |
+
sr = 22050
|
| 112 |
+
duration = 0.4 # 400ms (should be ~2 counts)
|
| 113 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 114 |
+
signal = np.sin(2 * np.pi * 200 * t)
|
| 115 |
+
return signal.astype(np.float32)
|
| 116 |
+
|
| 117 |
+
def test_madd_returns_physics_result(self, validator, vowel_audio):
|
| 118 |
+
"""Madd validation returns PhysicsResult"""
|
| 119 |
+
result = validator.validate_madd(vowel_audio, 0.0, 0.4, expected_count=2)
|
| 120 |
+
assert hasattr(result, 'status')
|
| 121 |
+
assert hasattr(result, 'score')
|
| 122 |
+
|
| 123 |
+
def test_madd_asli_duration(self, validator, vowel_audio):
|
| 124 |
+
"""Madd Asli (2 counts) should pass for ~400ms vowel"""
|
| 125 |
+
result = validator.validate_madd(vowel_audio, 0.0, 0.4, expected_count=2)
|
| 126 |
+
# Natural madd is 2 counts
|
| 127 |
+
assert result.score >= 0
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
class TestGhunnahValidation:
|
| 131 |
+
"""Test Ghunnah (nasalization) detection"""
|
| 132 |
+
|
| 133 |
+
@pytest.fixture
|
| 134 |
+
def validator(self):
|
| 135 |
+
return PhysicsValidator()
|
| 136 |
+
|
| 137 |
+
@pytest.fixture
|
| 138 |
+
def nasal_audio(self):
|
| 139 |
+
"""Generate nasal-like audio with limited bandwidth"""
|
| 140 |
+
sr = 22050
|
| 141 |
+
duration = 0.3
|
| 142 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 143 |
+
# Low frequency nasal resonance
|
| 144 |
+
signal = np.sin(2 * np.pi * 300 * t) + 0.5 * np.sin(2 * np.pi * 500 * t)
|
| 145 |
+
return signal.astype(np.float32)
|
| 146 |
+
|
| 147 |
+
def test_ghunnah_returns_physics_result(self, validator, nasal_audio):
|
| 148 |
+
"""Ghunnah validation returns PhysicsResult"""
|
| 149 |
+
result = validator.validate_ghunnah(nasal_audio, 0.0, 0.3)
|
| 150 |
+
assert hasattr(result, 'status')
|
| 151 |
+
assert hasattr(result, 'score')
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class TestTafkheemValidation:
|
| 155 |
+
"""Test Tafkheem (heavy letter) detection via F2 formant"""
|
| 156 |
+
|
| 157 |
+
@pytest.fixture
|
| 158 |
+
def validator(self):
|
| 159 |
+
return PhysicsValidator()
|
| 160 |
+
|
| 161 |
+
@pytest.fixture
|
| 162 |
+
def heavy_audio(self):
|
| 163 |
+
"""Generate audio with low F2 characteristic"""
|
| 164 |
+
sr = 22050
|
| 165 |
+
duration = 0.2
|
| 166 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 167 |
+
# Lower frequency components for "heavy" sound
|
| 168 |
+
signal = np.sin(2 * np.pi * 150 * t) + 0.3 * np.sin(2 * np.pi * 1000 * t)
|
| 169 |
+
return signal.astype(np.float32)
|
| 170 |
+
|
| 171 |
+
def test_tafkheem_returns_physics_result(self, validator, heavy_audio):
|
| 172 |
+
"""Tafkheem validation returns PhysicsResult"""
|
| 173 |
+
result = validator.validate_tafkheem(heavy_audio, 0.0, 0.2)
|
| 174 |
+
assert hasattr(result, 'status')
|
| 175 |
+
assert hasattr(result, 'score')
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
class TestIdghamValidation:
|
| 179 |
+
"""Test Idgham (assimilation) detection"""
|
| 180 |
+
|
| 181 |
+
@pytest.fixture
|
| 182 |
+
def validator(self):
|
| 183 |
+
return PhysicsValidator()
|
| 184 |
+
|
| 185 |
+
@pytest.fixture
|
| 186 |
+
def merged_audio(self):
|
| 187 |
+
"""Generate smoothly merged audio (no boundary)"""
|
| 188 |
+
sr = 22050
|
| 189 |
+
duration = 0.4
|
| 190 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 191 |
+
signal = np.sin(2 * np.pi * 200 * t)
|
| 192 |
+
return signal.astype(np.float32)
|
| 193 |
+
|
| 194 |
+
def test_idgham_returns_physics_result(self, validator, merged_audio):
|
| 195 |
+
"""Idgham validation returns PhysicsResult"""
|
| 196 |
+
result = validator.validate_idgham(merged_audio, 0.0, 0.2, 0.4, has_ghunnah=True)
|
| 197 |
+
assert hasattr(result, 'status')
|
| 198 |
+
assert hasattr(result, 'score')
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
class TestIkhfaValidation:
|
| 202 |
+
"""Test Ikhfa (concealment) detection"""
|
| 203 |
+
|
| 204 |
+
@pytest.fixture
|
| 205 |
+
def validator(self):
|
| 206 |
+
return PhysicsValidator()
|
| 207 |
+
|
| 208 |
+
@pytest.fixture
|
| 209 |
+
def concealed_audio(self):
|
| 210 |
+
"""Generate gradually fading nasal audio"""
|
| 211 |
+
sr = 22050
|
| 212 |
+
duration = 0.3
|
| 213 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 214 |
+
envelope = np.exp(-3 * t / duration) # Fading
|
| 215 |
+
signal = envelope * np.sin(2 * np.pi * 300 * t)
|
| 216 |
+
return signal.astype(np.float32)
|
| 217 |
+
|
| 218 |
+
def test_ikhfa_returns_physics_result(self, validator, concealed_audio):
|
| 219 |
+
"""Ikhfa validation returns PhysicsResult"""
|
| 220 |
+
result = validator.validate_ikhfa(concealed_audio, 0.0, 0.3)
|
| 221 |
+
assert hasattr(result, 'status')
|
| 222 |
+
assert hasattr(result, 'score')
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
class TestIzharValidation:
|
| 226 |
+
"""Test Izhar (clear pronunciation) detection"""
|
| 227 |
+
|
| 228 |
+
@pytest.fixture
|
| 229 |
+
def validator(self):
|
| 230 |
+
return PhysicsValidator()
|
| 231 |
+
|
| 232 |
+
@pytest.fixture
|
| 233 |
+
def clear_audio(self):
|
| 234 |
+
"""Generate audio with clear boundary between sounds"""
|
| 235 |
+
sr = 22050
|
| 236 |
+
duration = 0.4
|
| 237 |
+
t = np.linspace(0, duration, int(sr * duration))
|
| 238 |
+
signal = np.zeros_like(t)
|
| 239 |
+
# First letter
|
| 240 |
+
signal[:len(t)//2] = np.sin(2 * np.pi * 200 * t[:len(t)//2])
|
| 241 |
+
# Gap (silence)
|
| 242 |
+
# Second letter
|
| 243 |
+
signal[int(0.55*len(t)):] = np.sin(2 * np.pi * 300 * t[int(0.55*len(t)):])
|
| 244 |
+
return signal.astype(np.float32)
|
| 245 |
+
|
| 246 |
+
def test_izhar_returns_physics_result(self, validator, clear_audio):
|
| 247 |
+
"""Izhar validation returns PhysicsResult"""
|
| 248 |
+
result = validator.validate_izhar(clear_audio, 0.0, 0.2, 0.22)
|
| 249 |
+
assert hasattr(result, 'status')
|
| 250 |
+
assert hasattr(result, 'score')
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
class TestValidationResults:
|
| 254 |
+
"""Test result dataclasses"""
|
| 255 |
+
|
| 256 |
+
def test_physics_result_fields(self):
|
| 257 |
+
"""PhysicsResult has all required fields"""
|
| 258 |
+
result = PhysicsResult(
|
| 259 |
+
status=ValidationStatus.PASS,
|
| 260 |
+
metric_name="test",
|
| 261 |
+
expected_pattern="dip-spike",
|
| 262 |
+
observed_pattern="dip-spike",
|
| 263 |
+
score=0.95
|
| 264 |
+
)
|
| 265 |
+
assert result.status == ValidationStatus.PASS
|
| 266 |
+
assert result.score == 0.95
|
| 267 |
+
|
| 268 |
+
def test_qalqalah_result_fields(self):
|
| 269 |
+
"""QalqalahResult has specific fields"""
|
| 270 |
+
# QalqalahResult inherits from PhysicsResult and has extra fields
|
| 271 |
+
from physics_validator import QalqalahResult, ValidationStatus
|
| 272 |
+
result = QalqalahResult(
|
| 273 |
+
status=ValidationStatus.PASS,
|
| 274 |
+
metric_name="RMS Energy",
|
| 275 |
+
expected_pattern="dip_then_spike",
|
| 276 |
+
observed_pattern="dip_then_spike",
|
| 277 |
+
score=0.8,
|
| 278 |
+
rms_profile="dip-spike",
|
| 279 |
+
dip_depth=0.3,
|
| 280 |
+
spike_height=1.5,
|
| 281 |
+
closure_duration_ms=50
|
| 282 |
+
)
|
| 283 |
+
assert result.dip_depth == 0.3
|
| 284 |
+
assert result.spike_height == 1.5
|
| 285 |
+
|
| 286 |
+
def test_madd_result_fields(self):
|
| 287 |
+
"""MaddResult has duration fields"""
|
| 288 |
+
from physics_validator import MaddResult, ValidationStatus
|
| 289 |
+
result = MaddResult(
|
| 290 |
+
status=ValidationStatus.PASS,
|
| 291 |
+
metric_name="Duration Ratio",
|
| 292 |
+
expected_pattern="extended",
|
| 293 |
+
observed_pattern="extended",
|
| 294 |
+
score=1.0,
|
| 295 |
+
actual_duration_ms=400,
|
| 296 |
+
expected_duration_ms=400,
|
| 297 |
+
ratio=1.0
|
| 298 |
+
)
|
| 299 |
+
assert result.ratio == 1.0
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
if __name__ == "__main__":
|
| 303 |
+
pytest.main([__file__, "-v"])
|
tests/test_pipeline.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
TajweedSST - Pipeline Integration Tests
|
| 4 |
+
|
| 5 |
+
Tests the full alignment pipeline end-to-end:
|
| 6 |
+
- Text parsing → Alignment → Physics Validation
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
import os
|
| 11 |
+
import sys
|
| 12 |
+
|
| 13 |
+
# Add src to path
|
| 14 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
| 15 |
+
|
| 16 |
+
from alignment_engine import MockAlignmentEngine, AlignmentResult
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class TestFullPipeline:
|
| 20 |
+
"""Integration tests for complete pipeline"""
|
| 21 |
+
|
| 22 |
+
@pytest.fixture
|
| 23 |
+
def mock_engine(self):
|
| 24 |
+
return MockAlignmentEngine()
|
| 25 |
+
|
| 26 |
+
def test_surah_91_ayah_1(self, mock_engine):
|
| 27 |
+
"""Test alignment for Surah 91, Ayah 1: والشمس وضحاها"""
|
| 28 |
+
phonetic_words = [
|
| 29 |
+
"w a l sh sh a m s i",
|
| 30 |
+
"w a D u H aa h aa"
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
result = mock_engine.align(
|
| 34 |
+
audio_path="/path/to/surah_91_ayah_1.wav",
|
| 35 |
+
phonetic_words=phonetic_words,
|
| 36 |
+
surah=91,
|
| 37 |
+
ayah=1
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
assert result.surah == 91
|
| 41 |
+
assert result.ayah == 1
|
| 42 |
+
assert len(result.words) == 2
|
| 43 |
+
|
| 44 |
+
# Verify monotonicity
|
| 45 |
+
for i in range(1, len(result.words)):
|
| 46 |
+
assert result.words[i].whisper_start >= result.words[i-1].whisper_end
|
| 47 |
+
|
| 48 |
+
def test_grapheme_count_matches(self, mock_engine):
|
| 49 |
+
"""Total graphemes should match input"""
|
| 50 |
+
phonetic_words = ["a b c", "d e f g"] # 7 phonemes total
|
| 51 |
+
|
| 52 |
+
result = mock_engine.align(
|
| 53 |
+
audio_path="/fake.wav",
|
| 54 |
+
phonetic_words=phonetic_words,
|
| 55 |
+
surah=1,
|
| 56 |
+
ayah=1
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
total_phonemes = sum(len(w.phonemes) for w in result.words)
|
| 60 |
+
# Each space-separated token should become a phoneme
|
| 61 |
+
expected = sum(len(w.split()) for w in phonetic_words)
|
| 62 |
+
assert total_phonemes >= expected - 2 # Allow some variance
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class TestTimingRegression:
|
| 66 |
+
"""Tests to catch timing regressions"""
|
| 67 |
+
|
| 68 |
+
@pytest.fixture
|
| 69 |
+
def mock_engine(self):
|
| 70 |
+
return MockAlignmentEngine()
|
| 71 |
+
|
| 72 |
+
def test_no_negative_durations(self, mock_engine):
|
| 73 |
+
"""No phoneme should have negative duration"""
|
| 74 |
+
result = mock_engine.align(
|
| 75 |
+
audio_path="/fake.wav",
|
| 76 |
+
phonetic_words=["a b c d e f g h i j"],
|
| 77 |
+
surah=1,
|
| 78 |
+
ayah=1
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
for word in result.words:
|
| 82 |
+
for phoneme in word.phonemes:
|
| 83 |
+
assert phoneme.duration >= 0, \
|
| 84 |
+
f"Negative duration: {phoneme.phoneme} = {phoneme.duration}"
|
| 85 |
+
|
| 86 |
+
def test_no_zero_duration_phonemes(self, mock_engine):
|
| 87 |
+
"""Phonemes should have positive duration"""
|
| 88 |
+
result = mock_engine.align(
|
| 89 |
+
audio_path="/fake.wav",
|
| 90 |
+
phonetic_words=["test word"],
|
| 91 |
+
surah=1,
|
| 92 |
+
ayah=1
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
for word in result.words:
|
| 96 |
+
for phoneme in word.phonemes:
|
| 97 |
+
assert phoneme.duration > 0, \
|
| 98 |
+
f"Zero duration phoneme: {phoneme.phoneme}"
|
| 99 |
+
|
| 100 |
+
def test_no_overlapping_phonemes(self, mock_engine):
|
| 101 |
+
"""Phonemes within a word should not overlap"""
|
| 102 |
+
result = mock_engine.align(
|
| 103 |
+
audio_path="/fake.wav",
|
| 104 |
+
phonetic_words=["a l r a h m a n"],
|
| 105 |
+
surah=1,
|
| 106 |
+
ayah=1
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
for word in result.words:
|
| 110 |
+
for i in range(1, len(word.phonemes)):
|
| 111 |
+
prev = word.phonemes[i-1]
|
| 112 |
+
curr = word.phonemes[i]
|
| 113 |
+
assert curr.start >= prev.end, \
|
| 114 |
+
f"Overlap: {prev.phoneme} ({prev.end}) > {curr.phoneme} ({curr.start})"
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
pytest.main([__file__, "-v"])
|
whisperx_align_90.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
WhisperX Forced Alignment for Surah 90 (Al-Balad)
|
| 4 |
+
Uses wav2vec2 to FORCE align the known Quran text to the audio.
|
| 5 |
+
This gives perfect letter timing since we provide the exact text upfront.
|
| 6 |
+
|
| 7 |
+
Based on MahQuranApp/scripts/whisperx_forced_align.py
|
| 8 |
+
"""
|
| 9 |
+
import os
|
| 10 |
+
import json
|
| 11 |
+
import torch
|
| 12 |
+
import whisperx
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
# Monkeypatch torch.load for PyTorch 2.6+ compatibility
|
| 16 |
+
try:
|
| 17 |
+
from omegaconf import OmegaConf
|
| 18 |
+
from omegaconf.listconfig import ListConfig
|
| 19 |
+
from omegaconf.dictconfig import DictConfig
|
| 20 |
+
from omegaconf.base import ContainerMetadata
|
| 21 |
+
torch.serialization.add_safe_globals([ListConfig, DictConfig, ContainerMetadata])
|
| 22 |
+
print("Added OmegaConf to torch safe globals.")
|
| 23 |
+
except ImportError:
|
| 24 |
+
print("OmegaConf not found, using aggressive torch.load patch.")
|
| 25 |
+
|
| 26 |
+
original_load = torch.load
|
| 27 |
+
def safe_load(*args, **kwargs):
|
| 28 |
+
kwargs['weights_only'] = False
|
| 29 |
+
return original_load(*args, **kwargs)
|
| 30 |
+
torch.load = safe_load
|
| 31 |
+
|
| 32 |
+
# Configuration
|
| 33 |
+
SURAH_NUM = 90
|
| 34 |
+
PROJECT_ROOT = Path("/home/absolut7/Documents/26apps/MahQuranApp")
|
| 35 |
+
AUDIO_PATH = PROJECT_ROOT / "public/audio/abdul_basit/surah_090.mp3"
|
| 36 |
+
OUTPUT_DIR = PROJECT_ROOT / "public/data"
|
| 37 |
+
VERSES_PATH = PROJECT_ROOT / "public/data/verses_v4.json"
|
| 38 |
+
DEVICE = "cpu" # Use CPU for compatibility
|
| 39 |
+
|
| 40 |
+
def get_surah_text():
|
| 41 |
+
"""Get Surah 90 text from verses_v4.json"""
|
| 42 |
+
with open(VERSES_PATH, 'r', encoding='utf-8') as f:
|
| 43 |
+
data = json.load(f)
|
| 44 |
+
|
| 45 |
+
text = ' '.join(v['text'] for v in data[str(SURAH_NUM)])
|
| 46 |
+
return text
|
| 47 |
+
|
| 48 |
+
def main():
|
| 49 |
+
print("=" * 60)
|
| 50 |
+
print(f"WhisperX FORCED ALIGNMENT for Surah {SURAH_NUM} (Al-Balad)")
|
| 51 |
+
print("Using known Quran text for direct wav2vec2 alignment")
|
| 52 |
+
print("=" * 60)
|
| 53 |
+
|
| 54 |
+
# 1. Check audio exists
|
| 55 |
+
if not AUDIO_PATH.exists():
|
| 56 |
+
print(f"ERROR: Audio not found at {AUDIO_PATH}")
|
| 57 |
+
return
|
| 58 |
+
|
| 59 |
+
# 2. Get Quran text
|
| 60 |
+
quran_text = get_surah_text()
|
| 61 |
+
print(f"\nQuran text ({len(quran_text)} chars):")
|
| 62 |
+
print(quran_text[:100] + "...")
|
| 63 |
+
|
| 64 |
+
# 3. Load Alignment Model (wav2vec2)
|
| 65 |
+
print("\nLoading wav2vec2 alignment model (Arabic)...")
|
| 66 |
+
model_a, metadata = whisperx.load_align_model(language_code="ar", device=DEVICE)
|
| 67 |
+
print("Alignment model loaded.")
|
| 68 |
+
|
| 69 |
+
# 4. Load Audio
|
| 70 |
+
print("Loading audio...")
|
| 71 |
+
audio = whisperx.load_audio(str(AUDIO_PATH))
|
| 72 |
+
audio_duration = len(audio) / 16000 # Assuming 16kHz sample rate
|
| 73 |
+
print(f"Audio duration: {audio_duration:.2f}s")
|
| 74 |
+
|
| 75 |
+
# 5. Create "fake" segments from the known Quran text
|
| 76 |
+
# WhisperX's align() function expects segments with 'text', 'start', 'end'
|
| 77 |
+
# We provide the full Quran text as a single segment spanning the entire audio
|
| 78 |
+
print("\nCreating forced alignment segment from Quran text...")
|
| 79 |
+
segments = [{
|
| 80 |
+
"text": quran_text,
|
| 81 |
+
"start": 0.0,
|
| 82 |
+
"end": audio_duration
|
| 83 |
+
}]
|
| 84 |
+
|
| 85 |
+
# 6. Force Align
|
| 86 |
+
print("Performing FORCED ALIGNMENT with wav2vec2...")
|
| 87 |
+
result = whisperx.align(
|
| 88 |
+
segments,
|
| 89 |
+
model_a,
|
| 90 |
+
metadata,
|
| 91 |
+
audio,
|
| 92 |
+
DEVICE,
|
| 93 |
+
return_char_alignments=True
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# 7. Extract character-level timing (SECONDS format for MahQuranApp)
|
| 97 |
+
print("\nExtracting character timings...")
|
| 98 |
+
output_timing = []
|
| 99 |
+
idx = 0
|
| 100 |
+
|
| 101 |
+
for seg in result.get("segments", []):
|
| 102 |
+
if "chars" in seg:
|
| 103 |
+
for ch in seg["chars"]:
|
| 104 |
+
char = ch.get("char", "")
|
| 105 |
+
start = ch.get("start", 0)
|
| 106 |
+
end = ch.get("end", 0)
|
| 107 |
+
|
| 108 |
+
# Skip spaces
|
| 109 |
+
if char.isspace():
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
output_timing.append({
|
| 113 |
+
"char": char,
|
| 114 |
+
"start": round(start, 3), # seconds
|
| 115 |
+
"end": round(end, 3),
|
| 116 |
+
"idx": idx
|
| 117 |
+
})
|
| 118 |
+
idx += 1
|
| 119 |
+
|
| 120 |
+
print(f"Got {len(output_timing)} characters with timing")
|
| 121 |
+
|
| 122 |
+
# 8. Save output
|
| 123 |
+
output_path = OUTPUT_DIR / f"letter_timing_{SURAH_NUM}.json"
|
| 124 |
+
with open(output_path, "w", encoding="utf-8") as f:
|
| 125 |
+
json.dump(output_timing, f, ensure_ascii=False, indent=2)
|
| 126 |
+
|
| 127 |
+
print(f"\nSaved to {output_path}")
|
| 128 |
+
|
| 129 |
+
# Print first 20 for verification
|
| 130 |
+
print("\n=== First 20 characters ===")
|
| 131 |
+
for e in output_timing[:20]:
|
| 132 |
+
dur_ms = (e['end'] - e['start']) * 1000
|
| 133 |
+
print(f" {e['idx']:3d}: '{e['char']}' @ {e['start']:.3f}s - {e['end']:.3f}s ({dur_ms:.0f}ms)")
|
| 134 |
+
|
| 135 |
+
print("\n" + "=" * 60)
|
| 136 |
+
print("✓ Forced alignment complete!")
|
| 137 |
+
print("=" * 60)
|
| 138 |
+
|
| 139 |
+
if __name__ == "__main__":
|
| 140 |
+
main()
|
whisperx_surah90.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Generate new precision timing for Surah 90 using faster-whisper
|
| 4 |
+
|
| 5 |
+
Uses faster-whisper directly (which WhisperX wraps) to avoid pyannote VAD issues.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from faster_whisper import WhisperModel
|
| 11 |
+
|
| 12 |
+
# Audio path
|
| 13 |
+
AUDIO_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/audio/abdul_basit/surah_090.mp3"
|
| 14 |
+
VERSES_PATH = "/home/absolut7/Documents/26apps/MahQuranApp/public/data/verses_v4.json"
|
| 15 |
+
OUTPUT_PATH = Path(__file__).parent / "output/surah_90_new.json"
|
| 16 |
+
|
| 17 |
+
def run_alignment():
|
| 18 |
+
print("=" * 60)
|
| 19 |
+
print("Faster-Whisper Alignment - Surah 90")
|
| 20 |
+
print("=" * 60)
|
| 21 |
+
|
| 22 |
+
# Load model
|
| 23 |
+
print("\n[1] Loading Whisper model (large-v3)...")
|
| 24 |
+
model = WhisperModel("large-v3", device="cpu", compute_type="int8")
|
| 25 |
+
|
| 26 |
+
# Transcribe with word timestamps
|
| 27 |
+
print(f"\n[2] Transcribing: {AUDIO_PATH}")
|
| 28 |
+
segments, info = model.transcribe(
|
| 29 |
+
AUDIO_PATH,
|
| 30 |
+
language="ar",
|
| 31 |
+
word_timestamps=True,
|
| 32 |
+
vad_filter=True,
|
| 33 |
+
vad_parameters=dict(min_silence_duration_ms=500)
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
print(f" Language: {info.language} (prob: {info.language_probability:.2f})")
|
| 37 |
+
print(f" Duration: {info.duration:.1f}s")
|
| 38 |
+
|
| 39 |
+
# Extract word and character timing
|
| 40 |
+
print("\n[3] Extracting letter timing...")
|
| 41 |
+
letter_timing = []
|
| 42 |
+
global_idx = 0
|
| 43 |
+
all_segments = list(segments)
|
| 44 |
+
|
| 45 |
+
print(f" Segments: {len(all_segments)}")
|
| 46 |
+
|
| 47 |
+
for segment in all_segments:
|
| 48 |
+
if segment.words:
|
| 49 |
+
for word in segment.words:
|
| 50 |
+
word_text = word.word.strip()
|
| 51 |
+
word_start = word.start
|
| 52 |
+
word_end = word.end
|
| 53 |
+
|
| 54 |
+
# Distribute timing across characters
|
| 55 |
+
chars = list(word_text)
|
| 56 |
+
if chars:
|
| 57 |
+
char_duration = (word_end - word_start) / len(chars)
|
| 58 |
+
for i, char in enumerate(chars):
|
| 59 |
+
char_start = word_start + (i * char_duration)
|
| 60 |
+
char_end = char_start + char_duration
|
| 61 |
+
letter_timing.append({
|
| 62 |
+
"char": char,
|
| 63 |
+
"start": round(char_start, 3),
|
| 64 |
+
"end": round(char_end, 3),
|
| 65 |
+
"idx": global_idx,
|
| 66 |
+
"word": word_text,
|
| 67 |
+
"source": "faster_whisper"
|
| 68 |
+
})
|
| 69 |
+
global_idx += 1
|
| 70 |
+
|
| 71 |
+
print(f" Total letters: {len(letter_timing)}")
|
| 72 |
+
|
| 73 |
+
# Save output
|
| 74 |
+
print(f"\n[4] Saving to: {OUTPUT_PATH}")
|
| 75 |
+
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 76 |
+
|
| 77 |
+
output_data = {
|
| 78 |
+
"surah": 90,
|
| 79 |
+
"name": "Al-Balad",
|
| 80 |
+
"source": "faster-whisper large-v3",
|
| 81 |
+
"language": info.language,
|
| 82 |
+
"language_probability": round(info.language_probability, 3),
|
| 83 |
+
"duration": round(info.duration, 1),
|
| 84 |
+
"total_letters": len(letter_timing),
|
| 85 |
+
"letters": letter_timing
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
| 89 |
+
json.dump(output_data, f, ensure_ascii=False, indent=2)
|
| 90 |
+
|
| 91 |
+
# Save in MahQuranApp format
|
| 92 |
+
mahquran_format = []
|
| 93 |
+
for lt in letter_timing:
|
| 94 |
+
mahquran_format.append({
|
| 95 |
+
"char": lt["char"],
|
| 96 |
+
"start": lt["start"],
|
| 97 |
+
"end": lt["end"],
|
| 98 |
+
"idx": lt["idx"]
|
| 99 |
+
})
|
| 100 |
+
|
| 101 |
+
mahquran_path = OUTPUT_PATH.parent / "letter_timing_90_new.json"
|
| 102 |
+
with open(mahquran_path, 'w', encoding='utf-8') as f:
|
| 103 |
+
json.dump(mahquran_format, f, ensure_ascii=False, indent=2)
|
| 104 |
+
print(f" Also saved: {mahquran_path}")
|
| 105 |
+
|
| 106 |
+
print("\n" + "=" * 60)
|
| 107 |
+
print("✓ Alignment complete!")
|
| 108 |
+
print("=" * 60)
|
| 109 |
+
|
| 110 |
+
# Show sample
|
| 111 |
+
print("\nSample (first 10 letters):")
|
| 112 |
+
for lt in letter_timing[:10]:
|
| 113 |
+
print(f" [{lt['char']}] {lt['start']:.3f}s - {lt['end']:.3f}s ({lt['word']})")
|
| 114 |
+
|
| 115 |
+
return letter_timing
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
run_alignment()
|