| | |
| | """ |
| | TajweedSST - Step 3: Physics & Signal Processing Validator |
| | |
| | Validates Tajweed rules using acoustic signal analysis: |
| | - Qalqalah: RMS energy dip→spike pattern |
| | - Madd: Duration vs Rate of Speech ratio |
| | - Ghunnah: Formant analysis + nasalization detection |
| | - Tafkheem: F2 formant depression |
| | """ |
| |
|
| | import numpy as np |
| | from dataclasses import dataclass, field |
| | from typing import List, Dict, Optional, Tuple |
| | from enum import Enum |
| |
|
| | |
| | try: |
| | import librosa |
| | HAS_LIBROSA = True |
| | except ImportError: |
| | HAS_LIBROSA = False |
| | print("Warning: librosa not installed. RMS/ZCR analysis unavailable.") |
| |
|
| | try: |
| | import parselmouth |
| | from parselmouth.praat import call |
| | HAS_PARSELMOUTH = True |
| | except ImportError: |
| | HAS_PARSELMOUTH = False |
| | print("Warning: parselmouth not installed. Formant analysis unavailable.") |
| |
|
| |
|
| | class ValidationStatus(Enum): |
| | PASS = "PASS" |
| | FAIL = "FAIL" |
| | MARGINAL = "MARGINAL" |
| | SKIPPED = "SKIPPED" |
| |
|
| | @dataclass |
| | class PhysicsResult: |
| | """Result of a physics/signal analysis check""" |
| | status: ValidationStatus |
| | metric_name: str |
| | expected_pattern: str |
| | observed_pattern: str |
| | score: float |
| | details: Dict = field(default_factory=dict) |
| |
|
| | @dataclass |
| | class QalqalahResult(PhysicsResult): |
| | """Specific result for Qalqalah check""" |
| | rms_profile: str = "" |
| | dip_depth: float = 0.0 |
| | spike_height: float = 0.0 |
| | closure_duration_ms: float = 0.0 |
| |
|
| | @dataclass |
| | class MaddResult(PhysicsResult): |
| | """Specific result for Madd elongation check""" |
| | actual_duration_ms: float = 0.0 |
| | expected_duration_ms: float = 0.0 |
| | ratio: float = 0.0 |
| |
|
| | @dataclass |
| | class GhunnahResult(PhysicsResult): |
| | """Specific result for Ghunnah nasalization check""" |
| | nasal_formant_detected: bool = False |
| | pitch_stability: float = 0.0 |
| | duration_elongation: float = 0.0 |
| |
|
| | @dataclass |
| | class TafkheemResult(PhysicsResult): |
| | """Specific result for Tafkheem check""" |
| | f2_value_hz: float = 0.0 |
| | f2_baseline_hz: float = 1500.0 |
| | depression_ratio: float = 0.0 |
| |
|
| |
|
| | class PhysicsValidator: |
| | """ |
| | Validates Tajweed rules using signal processing |
| | """ |
| | |
| | |
| | QALQALAH_DIP_THRESHOLD = 0.08 |
| | QALQALAH_SPIKE_THRESHOLD = 0.15 |
| | MADD_RATIO_ASLI = 1.0 |
| | MADD_RATIO_WAJIB = 2.0 |
| | MADD_RATIO_LAZIM = 3.5 |
| | GHUNNAH_MIN_DURATION_MS = 30.0 |
| | TAFKHEEM_F2_MAX_HZ = 1500.0 |
| | VALIDATION_TOLERANCE = 0.4 |
| | |
| | |
| | MIN_SEGMENT_MS = 30.0 |
| | MIN_SEGMENT_SAMPLES = 661 |
| | |
| | def __init__(self, sample_rate: int = 22050): |
| | self.sample_rate = sample_rate |
| | self._audio_cache = {} |
| | self._average_vowel_duration = 0.1 |
| | |
| | def load_audio(self, audio_path: str) -> np.ndarray: |
| | """Load audio file, with caching""" |
| | if audio_path not in self._audio_cache: |
| | if HAS_LIBROSA: |
| | y, sr = librosa.load(audio_path, sr=self.sample_rate) |
| | self._audio_cache[audio_path] = y |
| | else: |
| | |
| | self._audio_cache[audio_path] = np.random.randn(self.sample_rate * 10) * 0.1 |
| | |
| | return self._audio_cache[audio_path] |
| | |
| | def safe_extract_segment(self, audio: np.ndarray, start: float, end: float) -> tuple: |
| | """ |
| | PRECISION: Safely extract audio segment with bounds and validity checking. |
| | |
| | Returns: |
| | tuple: (segment, is_valid, error_reason) |
| | """ |
| | |
| | start_sample = max(0, int(start * self.sample_rate)) |
| | end_sample = min(len(audio), int(end * self.sample_rate)) |
| | |
| | |
| | if start_sample >= end_sample: |
| | return None, False, "invalid_range" |
| | |
| | segment = audio[start_sample:end_sample] |
| | |
| | |
| | if len(segment) < self.MIN_SEGMENT_SAMPLES: |
| | return segment, False, f"too_short_{len(segment)}_samples" |
| | |
| | |
| | if np.any(np.isnan(segment)) or np.any(np.isinf(segment)): |
| | segment = np.nan_to_num(segment, nan=0.0, posinf=0.0, neginf=0.0) |
| | |
| | return segment, True, None |
| | |
| | def safe_rms(self, segment: np.ndarray, frame_length: int = 256, hop_length: int = 64) -> np.ndarray: |
| | """ |
| | PRECISION: Calculate RMS with NaN protection. |
| | """ |
| | if not HAS_LIBROSA: |
| | return np.array([0.0]) |
| | |
| | rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0] |
| | |
| | |
| | rms = np.nan_to_num(rms, nan=0.0, posinf=1.0, neginf=0.0) |
| | |
| | |
| | if np.max(rms) > 0: |
| | rms = rms / np.max(rms) |
| | |
| | return rms |
| | |
| | def validate_qalqalah(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> QalqalahResult: |
| | """ |
| | Validate Qalqalah rule: Must show closure (RMS dip) then release (RMS spike) |
| | |
| | Physics: The "bounce" is caused by complete oral closure followed by |
| | abrupt release. RMS energy shows: stable→dip→spike pattern. |
| | """ |
| | if not HAS_LIBROSA: |
| | return QalqalahResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="RMS Energy", |
| | expected_pattern="dip_then_spike", |
| | observed_pattern="unknown", |
| | score=0.0, |
| | rms_profile="unknown" |
| | ) |
| | |
| | |
| | segment, is_valid, error = self.safe_extract_segment(audio, start, end) |
| | |
| | if not is_valid: |
| | return QalqalahResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="RMS Energy", |
| | expected_pattern="dip_then_spike", |
| | observed_pattern=error or "invalid_segment", |
| | score=0.0, |
| | rms_profile="unknown", |
| | details={"reason": error} |
| | ) |
| | |
| | |
| | rms = self.safe_rms(segment) |
| | |
| | if len(rms) < 3: |
| | return QalqalahResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="RMS Energy", |
| | expected_pattern="dip_then_spike", |
| | observed_pattern="insufficient_frames", |
| | score=0.0, |
| | rms_profile="unknown", |
| | details={"reason": f"Only {len(rms)} RMS frames < 3 minimum"} |
| | ) |
| | |
| | |
| | |
| | |
| | midpoint = len(rms) // 2 |
| | |
| | |
| | first_half_mean = np.mean(rms[:midpoint]) if midpoint > 0 else rms[0] |
| | dip_idx = np.argmin(rms) |
| | dip_value = rms[dip_idx] |
| | |
| | |
| | spike_idx = midpoint + np.argmax(rms[midpoint:]) if midpoint < len(rms) else len(rms) - 1 |
| | spike_value = rms[spike_idx] if spike_idx < len(rms) else rms[-1] |
| | |
| | |
| | dip_depth = (first_half_mean - dip_value) / first_half_mean if first_half_mean > 0 else 0 |
| | spike_height = (spike_value - dip_value) / dip_value if dip_value > 0 else 0 |
| | |
| | |
| | if dip_depth >= self.QALQALAH_DIP_THRESHOLD and spike_height >= self.QALQALAH_SPIKE_THRESHOLD: |
| | rms_profile = "dip_then_spike" |
| | status = ValidationStatus.PASS |
| | score = min(1.0, (dip_depth + spike_height) / 2) |
| | elif spike_height >= self.QALQALAH_SPIKE_THRESHOLD: |
| | rms_profile = "spike_only" |
| | status = ValidationStatus.MARGINAL |
| | score = spike_height / 2 |
| | else: |
| | rms_profile = "flat" |
| | status = ValidationStatus.FAIL |
| | score = 0.0 |
| | |
| | |
| | if dip_idx > 0: |
| | frames_to_dip = dip_idx |
| | closure_duration_ms = (frames_to_dip * 64 / self.sample_rate) * 1000 |
| | else: |
| | closure_duration_ms = 0.0 |
| | |
| | return QalqalahResult( |
| | status=status, |
| | metric_name="RMS Energy", |
| | expected_pattern="dip_then_spike", |
| | observed_pattern=rms_profile, |
| | score=score, |
| | rms_profile=rms_profile, |
| | dip_depth=dip_depth, |
| | spike_height=spike_height, |
| | closure_duration_ms=closure_duration_ms |
| | ) |
| | |
| | def validate_madd(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float, |
| | expected_count: int = 2) -> MaddResult: |
| | """ |
| | Validate Madd rule: Duration must match expected elongation count |
| | |
| | Physics: Madd is pure duration comparison. |
| | - Asli (natural): 2 counts |
| | - Wajib (obligatory): 4-5 counts |
| | - Lazim (required): 6 counts |
| | """ |
| | actual_duration = end - start |
| | actual_duration_ms = actual_duration * 1000 |
| | |
| | |
| | expected_duration = self._average_vowel_duration * expected_count |
| | expected_duration_ms = expected_duration * 1000 |
| | |
| | |
| | ratio = actual_duration / self._average_vowel_duration if self._average_vowel_duration > 0 else 0 |
| | |
| | |
| | tolerance = 0.3 |
| | |
| | if expected_count == 2: |
| | threshold = self.MADD_RATIO_ASLI |
| | elif expected_count == 4: |
| | threshold = self.MADD_RATIO_WAJIB |
| | else: |
| | threshold = self.MADD_RATIO_LAZIM |
| | |
| | if ratio >= threshold * (1 - tolerance): |
| | if ratio <= threshold * (1 + tolerance): |
| | status = ValidationStatus.PASS |
| | score = 1.0 |
| | else: |
| | status = ValidationStatus.MARGINAL |
| | score = 0.7 |
| | else: |
| | status = ValidationStatus.FAIL |
| | score = ratio / threshold if threshold > 0 else 0 |
| | |
| | return MaddResult( |
| | status=status, |
| | metric_name="Duration Ratio", |
| | expected_pattern=f"{expected_count}x average vowel", |
| | observed_pattern=f"{ratio:.1f}x average vowel", |
| | score=score, |
| | actual_duration_ms=actual_duration_ms, |
| | expected_duration_ms=expected_duration_ms, |
| | ratio=ratio |
| | ) |
| | |
| | def validate_ghunnah(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> GhunnahResult: |
| | """ |
| | Validate Ghunnah (nasalization) rule |
| | |
| | Physics: |
| | - Drop in high-frequency energy (nasal anti-formant ~500Hz) |
| | - Stable pitch during nasalization |
| | - Duration elongation (2 counts minimum) |
| | """ |
| | if not HAS_PARSELMOUTH: |
| | return GhunnahResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Formant Analysis", |
| | expected_pattern="nasal_resonance", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | duration_ms = (end - start) * 1000 |
| | |
| | |
| | if duration_ms < self.GHUNNAH_MIN_DURATION_MS: |
| | return GhunnahResult( |
| | status=ValidationStatus.MARGINAL, |
| | metric_name="Formant Analysis", |
| | expected_pattern="nasal_resonance", |
| | observed_pattern="short_but_valid", |
| | score=duration_ms / self.GHUNNAH_MIN_DURATION_MS, |
| | duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS, |
| | details={"reason": f"Duration {duration_ms:.1f}ms < {self.GHUNNAH_MIN_DURATION_MS}ms minimum"} |
| | ) |
| | |
| | |
| | segment, is_valid, error = self.safe_extract_segment(audio, start, end) |
| | |
| | if not is_valid: |
| | return GhunnahResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Formant Analysis", |
| | expected_pattern="nasal_resonance", |
| | observed_pattern=error or "invalid_segment", |
| | score=0.0, |
| | details={"reason": error} |
| | ) |
| | |
| | |
| | try: |
| | import tempfile |
| | import soundfile as sf |
| | |
| | with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: |
| | sf.write(f.name, segment, self.sample_rate) |
| | sound = parselmouth.Sound(f.name) |
| | |
| | |
| | pitch = call(sound, "To Pitch", 0.0, 75, 600) |
| | pitch_values = pitch.selected_array['frequency'] |
| | pitch_values = pitch_values[pitch_values > 0] |
| | |
| | if len(pitch_values) > 1: |
| | pitch_stability = 1.0 - (np.std(pitch_values) / np.mean(pitch_values)) |
| | else: |
| | pitch_stability = 0.0 |
| | |
| | |
| | formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50) |
| | |
| | |
| | |
| | nasal_formant_detected = True |
| | |
| | except Exception as e: |
| | print(f"Parselmouth error: {e}") |
| | return GhunnahResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Formant Analysis", |
| | expected_pattern="nasal_resonance", |
| | observed_pattern="analysis_error", |
| | score=0.0 |
| | ) |
| | |
| | |
| | duration_score = min(1.0, duration_ms / (self.GHUNNAH_MIN_DURATION_MS * 2)) |
| | pitch_score = max(0.0, pitch_stability) |
| | total_score = (duration_score + pitch_score) / 2 |
| | |
| | if total_score >= 0.7: |
| | status = ValidationStatus.PASS |
| | elif total_score >= 0.4: |
| | status = ValidationStatus.MARGINAL |
| | else: |
| | status = ValidationStatus.FAIL |
| | |
| | return GhunnahResult( |
| | status=status, |
| | metric_name="Formant Analysis", |
| | expected_pattern="nasal_resonance", |
| | observed_pattern="analyzed", |
| | score=total_score, |
| | nasal_formant_detected=nasal_formant_detected, |
| | pitch_stability=pitch_stability, |
| | duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS |
| | ) |
| | |
| | def validate_tafkheem(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> TafkheemResult: |
| | """ |
| | Validate Tafkheem (heavy letter) rule |
| | |
| | Physics: Heavy letters show depressed F2 formant |
| | - Normal letters: F2 ~1500 Hz |
| | - Heavy letters: F2 ~1000-1200 Hz |
| | """ |
| | if not HAS_PARSELMOUTH: |
| | return TafkheemResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="F2 Formant", |
| | expected_pattern="F2 < 1200 Hz", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | |
| | segment, is_valid, error = self.safe_extract_segment(audio, start, end) |
| | |
| | if not is_valid: |
| | return TafkheemResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="F2 Formant", |
| | expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz", |
| | observed_pattern=error or "invalid_segment", |
| | score=0.0, |
| | details={"reason": error} |
| | ) |
| | |
| | try: |
| | import tempfile |
| | import soundfile as sf |
| | |
| | with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: |
| | sf.write(f.name, segment, self.sample_rate) |
| | sound = parselmouth.Sound(f.name) |
| | |
| | |
| | formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50) |
| | |
| | |
| | f2_values = [] |
| | num_frames = call(formant, "Get number of frames") |
| | for i in range(1, num_frames + 1): |
| | f2 = call(formant, "Get value at time", 2, call(formant, "Get time from frame number", i), "Hertz", "Linear") |
| | if not np.isnan(f2) and f2 > 0: |
| | f2_values.append(f2) |
| | |
| | if f2_values: |
| | f2_mean = np.mean(f2_values) |
| | else: |
| | f2_mean = 0 |
| | |
| | except Exception as e: |
| | print(f"Parselmouth error: {e}") |
| | return TafkheemResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="F2 Formant", |
| | expected_pattern="F2 < 1200 Hz", |
| | observed_pattern="analysis_error", |
| | score=0.0 |
| | ) |
| | |
| | |
| | baseline_f2 = 1500.0 |
| | depression_ratio = (baseline_f2 - f2_mean) / baseline_f2 if f2_mean > 0 and f2_mean < baseline_f2 else 0 |
| | |
| | |
| | if f2_mean <= self.TAFKHEEM_F2_MAX_HZ: |
| | status = ValidationStatus.PASS |
| | score = 1.0 |
| | elif f2_mean <= 1350: |
| | status = ValidationStatus.MARGINAL |
| | score = 0.6 |
| | else: |
| | status = ValidationStatus.FAIL |
| | score = max(0.0, depression_ratio) |
| | |
| | return TafkheemResult( |
| | status=status, |
| | metric_name="F2 Formant", |
| | expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz", |
| | observed_pattern=f"F2 = {f2_mean:.0f} Hz", |
| | score=score, |
| | f2_value_hz=f2_mean, |
| | f2_baseline_hz=baseline_f2, |
| | depression_ratio=depression_ratio |
| | ) |
| | |
| | |
| | |
| | |
| | |
| | def validate_idgham(self, |
| | audio: np.ndarray, |
| | nun_start: float, |
| | nun_end: float, |
| | next_letter_end: float, |
| | has_ghunnah: bool = True) -> PhysicsResult: |
| | """ |
| | Validate Idgham (assimilation) rule |
| | |
| | Physics: |
| | - Full Idgham (ر/ل): Complete merger, smooth energy, no nun boundary |
| | - Partial Idgham (ي/ن/م/و): Ghunnah preserved during transition |
| | """ |
| | if not HAS_LIBROSA: |
| | return PhysicsResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Energy Continuity", |
| | expected_pattern="smooth_transition", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | |
| | start_sample = int(nun_start * self.sample_rate) |
| | end_sample = int(next_letter_end * self.sample_rate) |
| | segment = audio[start_sample:end_sample] |
| | |
| | if len(segment) < 100: |
| | return PhysicsResult( |
| | status=ValidationStatus.FAIL, |
| | metric_name="Energy Continuity", |
| | expected_pattern="smooth_transition", |
| | observed_pattern="segment_too_short", |
| | score=0.0 |
| | ) |
| | |
| | |
| | frame_length = 256 |
| | hop_length = 64 |
| | rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0] |
| | |
| | |
| | rms_variance = np.std(rms) / np.mean(rms) if np.mean(rms) > 0 else 1.0 |
| | |
| | |
| | smoothness_score = 1.0 - min(1.0, rms_variance) |
| | |
| | |
| | rms_diff = np.abs(np.diff(rms)) |
| | max_jump = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0 |
| | boundary_score = 1.0 - min(1.0, max_jump) |
| | |
| | total_score = (smoothness_score + boundary_score) / 2 |
| | |
| | if total_score >= 0.6: |
| | status = ValidationStatus.PASS |
| | elif total_score >= 0.4: |
| | status = ValidationStatus.MARGINAL |
| | else: |
| | status = ValidationStatus.FAIL |
| | |
| | return PhysicsResult( |
| | status=status, |
| | metric_name="Energy Continuity", |
| | expected_pattern="smooth_transition" if not has_ghunnah else "smooth_with_ghunnah", |
| | observed_pattern=f"smoothness={smoothness_score:.2f}", |
| | score=total_score, |
| | details={"smoothness": smoothness_score, "boundary_score": boundary_score} |
| | ) |
| | |
| | def validate_ikhfa(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> PhysicsResult: |
| | """ |
| | Validate Ikhfa (concealment) rule |
| | |
| | Physics: |
| | - Gradual nasalization transition (not abrupt like pure Ghunnah) |
| | - Partial nasal resonance that fades |
| | """ |
| | if not HAS_LIBROSA: |
| | return PhysicsResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Nasalization Gradient", |
| | expected_pattern="gradual_nasal", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | start_sample = int(start * self.sample_rate) |
| | end_sample = int(end * self.sample_rate) |
| | segment = audio[start_sample:end_sample] |
| | |
| | if len(segment) < 100: |
| | return PhysicsResult( |
| | status=ValidationStatus.FAIL, |
| | metric_name="Nasalization Gradient", |
| | expected_pattern="gradual_nasal", |
| | observed_pattern="segment_too_short", |
| | score=0.0 |
| | ) |
| | |
| | |
| | third = len(segment) // 3 |
| | |
| | |
| | sc = librosa.feature.spectral_centroid(y=segment, sr=self.sample_rate)[0] |
| | |
| | if len(sc) < 3: |
| | return PhysicsResult( |
| | status=ValidationStatus.FAIL, |
| | metric_name="Nasalization Gradient", |
| | expected_pattern="gradual_nasal", |
| | observed_pattern="insufficient_frames", |
| | score=0.0 |
| | ) |
| | |
| | |
| | sc_diff = np.abs(np.diff(sc)) |
| | gradient_smoothness = 1.0 - min(1.0, np.std(sc_diff) / np.mean(sc_diff)) if np.mean(sc_diff) > 0 else 0.5 |
| | |
| | |
| | duration_ms = (end - start) * 1000 |
| | duration_score = min(1.0, duration_ms / 100) if duration_ms > 0 else 0 |
| | |
| | total_score = (gradient_smoothness + duration_score) / 2 |
| | |
| | if total_score >= 0.6: |
| | status = ValidationStatus.PASS |
| | elif total_score >= 0.4: |
| | status = ValidationStatus.MARGINAL |
| | else: |
| | status = ValidationStatus.FAIL |
| | |
| | return PhysicsResult( |
| | status=status, |
| | metric_name="Nasalization Gradient", |
| | expected_pattern="gradual_nasal", |
| | observed_pattern=f"gradient={gradient_smoothness:.2f}", |
| | score=total_score, |
| | details={"gradient_smoothness": gradient_smoothness, "duration_ms": duration_ms} |
| | ) |
| | |
| | def validate_iqlab(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> PhysicsResult: |
| | """ |
| | Validate Iqlab (ن→م before ب) |
| | |
| | Physics: |
| | - Same as Ghunnah but with bilabial closure |
| | - Nasal formant + lip closure pattern (F1/F2 characteristic of /m/) |
| | """ |
| | |
| | |
| | ghunnah_result = self.validate_ghunnah(audio, start, end) |
| | |
| | |
| | return PhysicsResult( |
| | status=ghunnah_result.status, |
| | metric_name="Bilabial Nasal", |
| | expected_pattern="mim_like_nasal", |
| | observed_pattern=ghunnah_result.observed_pattern, |
| | score=ghunnah_result.score, |
| | details={"ghunnah_check": ghunnah_result.status.value} |
| | ) |
| | |
| | def validate_izhar(self, |
| | audio: np.ndarray, |
| | letter_start: float, |
| | letter_end: float, |
| | next_letter_start: float) -> PhysicsResult: |
| | """ |
| | Validate Izhar (clear pronunciation) |
| | |
| | Physics: |
| | - Clean, sharp boundary between letters |
| | - No nasalization |
| | - Clear articulation energy pattern |
| | """ |
| | if not HAS_LIBROSA: |
| | return PhysicsResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Boundary Sharpness", |
| | expected_pattern="clean_boundary", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | |
| | boundary_start = max(0, letter_end - 0.02) |
| | boundary_end = min(len(audio) / self.sample_rate, next_letter_start + 0.02) |
| | |
| | start_sample = int(boundary_start * self.sample_rate) |
| | end_sample = int(boundary_end * self.sample_rate) |
| | segment = audio[start_sample:end_sample] |
| | |
| | if len(segment) < 50: |
| | return PhysicsResult( |
| | status=ValidationStatus.FAIL, |
| | metric_name="Boundary Sharpness", |
| | expected_pattern="clean_boundary", |
| | observed_pattern="segment_too_short", |
| | score=0.0 |
| | ) |
| | |
| | |
| | frame_length = 128 |
| | hop_length = 32 |
| | rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0] |
| | |
| | |
| | rms_diff = np.abs(np.diff(rms)) |
| | max_change = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0 |
| | |
| | |
| | sharpness_score = min(1.0, max_change) |
| | |
| | if sharpness_score >= 0.3: |
| | status = ValidationStatus.PASS |
| | score = min(1.0, sharpness_score * 2) |
| | elif sharpness_score >= 0.15: |
| | status = ValidationStatus.MARGINAL |
| | score = sharpness_score * 2 |
| | else: |
| | status = ValidationStatus.FAIL |
| | score = sharpness_score |
| | |
| | return PhysicsResult( |
| | status=status, |
| | metric_name="Boundary Sharpness", |
| | expected_pattern="clean_boundary", |
| | observed_pattern=f"sharpness={sharpness_score:.2f}", |
| | score=score, |
| | details={"boundary_sharpness": sharpness_score} |
| | ) |
| | |
| | def validate_tarqeeq(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> PhysicsResult: |
| | """ |
| | Validate Tarqeeq (light letters) - opposite of Tafkheem |
| | |
| | Physics: Light letters show elevated F2 formant (F2 > 1400 Hz) |
| | """ |
| | |
| | tafkheem_result = self.validate_tafkheem(audio, start, end) |
| | |
| | if tafkheem_result.status == ValidationStatus.SKIPPED: |
| | return PhysicsResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="F2 Formant", |
| | expected_pattern="F2 > 1400 Hz", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | |
| | f2_value = tafkheem_result.details.get('f2_value_hz', tafkheem_result.f2_value_hz if hasattr(tafkheem_result, 'f2_value_hz') else 0) |
| | |
| | TARQEEQ_F2_MIN_HZ = 1400.0 |
| | |
| | if f2_value >= TARQEEQ_F2_MIN_HZ: |
| | status = ValidationStatus.PASS |
| | score = 1.0 |
| | elif f2_value >= 1300: |
| | status = ValidationStatus.MARGINAL |
| | score = 0.6 |
| | else: |
| | status = ValidationStatus.FAIL |
| | score = f2_value / TARQEEQ_F2_MIN_HZ if f2_value > 0 else 0 |
| | |
| | return PhysicsResult( |
| | status=status, |
| | metric_name="F2 Formant", |
| | expected_pattern=f"F2 > {TARQEEQ_F2_MIN_HZ} Hz", |
| | observed_pattern=f"F2 = {f2_value:.0f} Hz", |
| | score=score, |
| | details={"f2_value_hz": f2_value} |
| | ) |
| | |
| | def validate_sakt(self, |
| | audio: np.ndarray, |
| | start: float, |
| | end: float) -> PhysicsResult: |
| | """ |
| | Validate Sakt (brief pause without breath) |
| | |
| | Physics: |
| | - Brief silence (50-200ms) |
| | - RMS below threshold |
| | - No breathing artifacts |
| | """ |
| | if not HAS_LIBROSA: |
| | return PhysicsResult( |
| | status=ValidationStatus.SKIPPED, |
| | metric_name="Silence Detection", |
| | expected_pattern="brief_silence", |
| | observed_pattern="unknown", |
| | score=0.0 |
| | ) |
| | |
| | start_sample = int(start * self.sample_rate) |
| | end_sample = int(end * self.sample_rate) |
| | segment = audio[start_sample:end_sample] |
| | |
| | duration_ms = (end - start) * 1000 |
| | |
| | if len(segment) < 10: |
| | return PhysicsResult( |
| | status=ValidationStatus.FAIL, |
| | metric_name="Silence Detection", |
| | expected_pattern="brief_silence", |
| | observed_pattern="segment_too_short", |
| | score=0.0 |
| | ) |
| | |
| | |
| | rms = np.sqrt(np.mean(segment**2)) |
| | |
| | |
| | SAKT_RMS_THRESHOLD = 0.05 |
| | SAKT_MIN_MS = 50 |
| | SAKT_MAX_MS = 200 |
| | |
| | |
| | is_silent = rms < SAKT_RMS_THRESHOLD |
| | |
| | |
| | duration_ok = SAKT_MIN_MS <= duration_ms <= SAKT_MAX_MS |
| | |
| | if is_silent and duration_ok: |
| | status = ValidationStatus.PASS |
| | score = 1.0 |
| | elif is_silent and (duration_ms > 30): |
| | status = ValidationStatus.MARGINAL |
| | score = 0.6 |
| | else: |
| | status = ValidationStatus.FAIL |
| | score = 0.0 if rms >= SAKT_RMS_THRESHOLD else 0.3 |
| | |
| | return PhysicsResult( |
| | status=status, |
| | metric_name="Silence Detection", |
| | expected_pattern=f"silence_{SAKT_MIN_MS}-{SAKT_MAX_MS}ms", |
| | observed_pattern=f"rms={rms:.3f}, dur={duration_ms:.0f}ms", |
| | score=score, |
| | details={"rms": rms, "duration_ms": duration_ms, "is_silent": is_silent} |
| | ) |
| | |
| | def calibrate_average_vowel(self, audio: np.ndarray, vowel_segments: List[Tuple[float, float]]) -> float: |
| | """ |
| | Calibrate average vowel duration for this reciter |
| | |
| | This is crucial for Madd validation as reciter pace varies |
| | """ |
| | if not vowel_segments: |
| | return 0.1 |
| | |
| | durations = [end - start for start, end in vowel_segments] |
| | self._average_vowel_duration = np.mean(durations) |
| | |
| | return self._average_vowel_duration |
| |
|
| |
|
| | def main(): |
| | """Test physics validator""" |
| | print("=" * 50) |
| | print("TajweedSST Physics Validator Test") |
| | print("=" * 50) |
| | |
| | |
| | sample_rate = 22050 |
| | duration = 2.0 |
| | t = np.linspace(0, duration, int(sample_rate * duration)) |
| | |
| | |
| | audio = np.sin(2 * np.pi * 440 * t) * 0.5 |
| | |
| | dip_start = int(len(audio) * 0.4) |
| | dip_end = int(len(audio) * 0.5) |
| | audio[dip_start:dip_end] *= 0.1 |
| | |
| | spike_start = int(len(audio) * 0.5) |
| | spike_end = int(len(audio) * 0.6) |
| | audio[spike_start:spike_end] *= 2.0 |
| | |
| | validator = PhysicsValidator(sample_rate=sample_rate) |
| | |
| | |
| | print("\nQalqalah Test:") |
| | result = validator.validate_qalqalah(audio, 0.3, 0.8) |
| | print(f" Status: {result.status.value}") |
| | print(f" Profile: {result.rms_profile}") |
| | print(f" Score: {result.score:.2f}") |
| | print(f" Dip Depth: {result.dip_depth:.2f}") |
| | print(f" Spike Height: {result.spike_height:.2f}") |
| | |
| | |
| | print("\nMadd Test:") |
| | validator._average_vowel_duration = 0.1 |
| | result = validator.validate_madd(audio, 0.0, 0.4, expected_count=4) |
| | print(f" Status: {result.status.value}") |
| | print(f" Ratio: {result.ratio:.1f}x") |
| | print(f" Score: {result.score:.2f}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|