| | |
| | """ |
| | TajweedSST - Duration Model |
| | |
| | Calibrates and validates letter durations based on Tajweed rules. |
| | Works with harakat (beat) counts and reciter-specific speech rates. |
| | |
| | Key Features: |
| | - Per-reciter harakat calibration |
| | - Madd type detection from Quranic context |
| | - Duration validation against Tajweed expectations |
| | - Speech rate normalization |
| | """ |
| |
|
| | import json |
| | import numpy as np |
| | from dataclasses import dataclass, field |
| | from typing import List, Dict, Optional, Tuple |
| | from pathlib import Path |
| | from enum import Enum |
| |
|
| |
|
| | class MaddType(Enum): |
| | NONE = "none" |
| | ASLI = "asli" |
| | WAJIB = "wajib" |
| | JAIZ = "jaiz" |
| | LAZIM = "lazim" |
| | LEEN = "leen" |
| | ARID = "arid" |
| | BADAL = "badal" |
| | SILAH = "silah" |
| |
|
| |
|
| | @dataclass |
| | class HarakatCalibration: |
| | """Per-reciter timing calibration""" |
| | reciter_name: str |
| | harakat_base_ms: float = 100.0 |
| | speech_rate_wpm: float = 60.0 |
| | pitch_range_hz: Tuple[float, float] = (80.0, 300.0) |
| | sample_size: int = 0 |
| |
|
| |
|
| | @dataclass |
| | class DurationExpectation: |
| | """Expected duration for a Tajweed rule""" |
| | rule_name: str |
| | min_harakat: int |
| | max_harakat: int |
| | expected_ms_range: Tuple[float, float] |
| | tolerance: float = 0.25 |
| |
|
| |
|
| | @dataclass |
| | class DurationResult: |
| | """Result of duration validation""" |
| | is_valid: bool |
| | actual_ms: float |
| | expected_ms: float |
| | harakat_count: float |
| | deviation_percent: float |
| | rule_applied: str |
| |
|
| |
|
| | class DurationModel: |
| | """ |
| | Duration model for Tajweed-based timing validation |
| | """ |
| | |
| | |
| | DEFAULT_HARAKAT_MS = 100.0 |
| | |
| | |
| | TAJWEED_DURATIONS = { |
| | MaddType.ASLI: DurationExpectation("Madd Asli", 2, 2, (150, 280), 0.30), |
| | MaddType.WAJIB: DurationExpectation("Madd Wajib", 4, 5, (350, 550), 0.25), |
| | MaddType.LAZIM: DurationExpectation("Madd Lazim", 6, 6, (500, 800), 0.20), |
| | MaddType.JAIZ: DurationExpectation("Madd Jaiz", 2, 6, (150, 700), 0.30), |
| | MaddType.ARID: DurationExpectation("Madd Arid", 2, 6, (150, 700), 0.30), |
| | MaddType.LEEN: DurationExpectation("Madd Leen", 2, 6, (150, 700), 0.30), |
| | } |
| | |
| | |
| | GHUNNAH_DURATION = DurationExpectation("Ghunnah", 2, 2, (80, 250), 0.30) |
| | |
| | def __init__(self, lisan_path: Optional[str] = None): |
| | """Initialize with optional path to lisan_phonemes.json""" |
| | self.calibration: Optional[HarakatCalibration] = None |
| | self.lisan_data: Dict = {} |
| | |
| | if lisan_path and Path(lisan_path).exists(): |
| | with open(lisan_path, 'r', encoding='utf-8') as f: |
| | self.lisan_data = json.load(f) |
| | |
| | def calibrate_from_samples(self, |
| | reciter_name: str, |
| | vowel_durations: List[float], |
| | words_per_minute: float = 60.0) -> HarakatCalibration: |
| | """ |
| | Calibrate harakat duration from sample vowel measurements |
| | |
| | Args: |
| | reciter_name: Name of reciter for identification |
| | vowel_durations: List of short vowel durations in seconds |
| | words_per_minute: Estimated speech rate |
| | |
| | Returns: |
| | HarakatCalibration object |
| | """ |
| | if not vowel_durations: |
| | |
| | self.calibration = HarakatCalibration( |
| | reciter_name=reciter_name, |
| | harakat_base_ms=self.DEFAULT_HARAKAT_MS, |
| | speech_rate_wpm=words_per_minute, |
| | sample_size=0 |
| | ) |
| | return self.calibration |
| | |
| | |
| | durations_ms = [d * 1000 for d in vowel_durations] |
| | harakat_base = np.median(durations_ms) |
| | |
| | self.calibration = HarakatCalibration( |
| | reciter_name=reciter_name, |
| | harakat_base_ms=harakat_base, |
| | speech_rate_wpm=words_per_minute, |
| | sample_size=len(vowel_durations) |
| | ) |
| | |
| | return self.calibration |
| | |
| | def get_expected_duration(self, |
| | madd_type: MaddType, |
| | harakat_count: Optional[int] = None) -> Tuple[float, float]: |
| | """ |
| | Get expected duration range for a Madd type |
| | |
| | Returns: |
| | Tuple of (min_ms, max_ms) |
| | """ |
| | if not self.calibration: |
| | base_ms = self.DEFAULT_HARAKAT_MS |
| | else: |
| | base_ms = self.calibration.harakat_base_ms |
| | |
| | if madd_type in self.TAJWEED_DURATIONS: |
| | expectation = self.TAJWEED_DURATIONS[madd_type] |
| | if harakat_count: |
| | |
| | center = harakat_count * base_ms |
| | tolerance = expectation.tolerance |
| | return (center * (1 - tolerance), center * (1 + tolerance)) |
| | else: |
| | |
| | min_ms = expectation.min_harakat * base_ms * (1 - expectation.tolerance) |
| | max_ms = expectation.max_harakat * base_ms * (1 + expectation.tolerance) |
| | return (min_ms, max_ms) |
| | |
| | |
| | return (base_ms * 0.7, base_ms * 1.3) |
| | |
| | def validate_duration(self, |
| | actual_duration_s: float, |
| | madd_type: MaddType, |
| | expected_harakat: int = 2) -> DurationResult: |
| | """ |
| | Validate if actual duration matches Tajweed expectation |
| | |
| | Args: |
| | actual_duration_s: Actual duration in seconds |
| | madd_type: Type of Madd rule |
| | expected_harakat: Expected harakat count |
| | |
| | Returns: |
| | DurationResult with validation details |
| | """ |
| | actual_ms = actual_duration_s * 1000 |
| | min_ms, max_ms = self.get_expected_duration(madd_type, expected_harakat) |
| | expected_ms = (min_ms + max_ms) / 2 |
| | |
| | is_valid = min_ms <= actual_ms <= max_ms |
| | deviation = abs(actual_ms - expected_ms) / expected_ms * 100 if expected_ms > 0 else 0 |
| | |
| | |
| | base_ms = self.calibration.harakat_base_ms if self.calibration else self.DEFAULT_HARAKAT_MS |
| | harakat_count = actual_ms / base_ms if base_ms > 0 else 0 |
| | |
| | return DurationResult( |
| | is_valid=is_valid, |
| | actual_ms=actual_ms, |
| | expected_ms=expected_ms, |
| | harakat_count=harakat_count, |
| | deviation_percent=deviation, |
| | rule_applied=madd_type.value |
| | ) |
| | |
| | def validate_ghunnah_duration(self, actual_duration_s: float) -> DurationResult: |
| | """Validate Ghunnah duration (2 harakat)""" |
| | return self.validate_duration(actual_duration_s, MaddType.ASLI, 2) |
| | |
| | def suggest_correction(self, |
| | actual_duration_s: float, |
| | madd_type: MaddType, |
| | expected_harakat: int = 2) -> Tuple[float, float]: |
| | """ |
| | Suggest corrected start/end times based on Tajweed expectations |
| | |
| | Returns: |
| | Tuple of (suggested_duration_s, adjustment_s) |
| | """ |
| | min_ms, max_ms = self.get_expected_duration(madd_type, expected_harakat) |
| | actual_ms = actual_duration_s * 1000 |
| | |
| | if actual_ms < min_ms: |
| | |
| | suggested_ms = min_ms |
| | elif actual_ms > max_ms: |
| | |
| | suggested_ms = max_ms |
| | else: |
| | |
| | suggested_ms = actual_ms |
| | |
| | adjustment_ms = suggested_ms - actual_ms |
| | return (suggested_ms / 1000, adjustment_ms / 1000) |
| | |
| | def detect_madd_type_from_context(self, |
| | current_letter: str, |
| | next_letter: Optional[str], |
| | next_harakat: Optional[str], |
| | is_word_end: bool, |
| | is_waqf: bool = False) -> MaddType: |
| | """ |
| | Auto-detect Madd type from Quranic text context |
| | |
| | Args: |
| | current_letter: The Madd letter (ا و ي) |
| | next_letter: Following letter (if any) |
| | next_harakat: Harakat on next letter |
| | is_word_end: Whether this is at word boundary |
| | is_waqf: Whether reciter is pausing here |
| | |
| | Returns: |
| | Detected MaddType |
| | """ |
| | SUKUN = '\u0652' |
| | SHADDA = '\u0651' |
| | |
| | |
| | if is_waqf and is_word_end: |
| | return MaddType.ARID |
| | |
| | |
| | if next_harakat: |
| | if SHADDA in next_harakat or SUKUN in next_harakat: |
| | return MaddType.LAZIM |
| | |
| | |
| | if next_letter and next_letter in 'ءأإؤئ': |
| | return MaddType.WAJIB |
| | |
| | |
| | return MaddType.ASLI |
| |
|
| |
|
| | def main(): |
| | """Test duration model""" |
| | print("=" * 50) |
| | print("TajweedSST Duration Model Test") |
| | print("=" * 50) |
| | |
| | model = DurationModel() |
| | |
| | |
| | sample_vowels = [0.095, 0.105, 0.098, 0.102, 0.100, 0.103, 0.097] |
| | calibration = model.calibrate_from_samples("Abdul_Basit", sample_vowels) |
| | |
| | print(f"\nCalibration for {calibration.reciter_name}:") |
| | print(f" Harakat base: {calibration.harakat_base_ms:.1f} ms") |
| | print(f" Sample size: {calibration.sample_size}") |
| | |
| | |
| | print("\nDuration Validation Tests:") |
| | |
| | |
| | result = model.validate_duration(0.195, MaddType.ASLI, 2) |
| | print(f"\n Madd Asli (0.195s):") |
| | print(f" Valid: {result.is_valid}") |
| | print(f" Harakat: {result.harakat_count:.1f}") |
| | print(f" Deviation: {result.deviation_percent:.1f}%") |
| | |
| | |
| | result = model.validate_duration(0.580, MaddType.LAZIM, 6) |
| | print(f"\n Madd Lazim (0.580s):") |
| | print(f" Valid: {result.is_valid}") |
| | print(f" Harakat: {result.harakat_count:.1f}") |
| | print(f" Deviation: {result.deviation_percent:.1f}%") |
| | |
| | |
| | print("\nMadd Type Detection:") |
| | detected = model.detect_madd_type_from_context('ا', 'ء', None, False, False) |
| | print(f" ا before ء: {detected.value}") |
| | |
| | detected = model.detect_madd_type_from_context('ا', 'ب', '\u0651', False, False) |
| | print(f" ا before بّ: {detected.value}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|