tajweedsst / src /physics_validator.py
enver's picture
Upload folder using huggingface_hub
21f2aa3 verified
#!/usr/bin/env python3
"""
TajweedSST - Step 3: Physics & Signal Processing Validator
Validates Tajweed rules using acoustic signal analysis:
- Qalqalah: RMS energy dip→spike pattern
- Madd: Duration vs Rate of Speech ratio
- Ghunnah: Formant analysis + nasalization detection
- Tafkheem: F2 formant depression
"""
import numpy as np
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Tuple
from enum import Enum
# Import signal processing libraries
try:
import librosa
HAS_LIBROSA = True
except ImportError:
HAS_LIBROSA = False
print("Warning: librosa not installed. RMS/ZCR analysis unavailable.")
try:
import parselmouth
from parselmouth.praat import call
HAS_PARSELMOUTH = True
except ImportError:
HAS_PARSELMOUTH = False
print("Warning: parselmouth not installed. Formant analysis unavailable.")
class ValidationStatus(Enum):
PASS = "PASS"
FAIL = "FAIL"
MARGINAL = "MARGINAL"
SKIPPED = "SKIPPED"
@dataclass
class PhysicsResult:
"""Result of a physics/signal analysis check"""
status: ValidationStatus
metric_name: str
expected_pattern: str
observed_pattern: str
score: float # 0.0 to 1.0
details: Dict = field(default_factory=dict)
@dataclass
class QalqalahResult(PhysicsResult):
"""Specific result for Qalqalah check"""
rms_profile: str = "" # "dip_then_spike", "flat", "spike_only"
dip_depth: float = 0.0
spike_height: float = 0.0
closure_duration_ms: float = 0.0
@dataclass
class MaddResult(PhysicsResult):
"""Specific result for Madd elongation check"""
actual_duration_ms: float = 0.0
expected_duration_ms: float = 0.0
ratio: float = 0.0 # Actual / Average vowel
@dataclass
class GhunnahResult(PhysicsResult):
"""Specific result for Ghunnah nasalization check"""
nasal_formant_detected: bool = False
pitch_stability: float = 0.0
duration_elongation: float = 0.0
@dataclass
class TafkheemResult(PhysicsResult):
"""Specific result for Tafkheem check"""
f2_value_hz: float = 0.0
f2_baseline_hz: float = 1500.0 # Average F2 for light sounds
depression_ratio: float = 0.0
class PhysicsValidator:
"""
Validates Tajweed rules using signal processing
"""
# Thresholds for validation - tuned for real Abdul Basit recitation
QALQALAH_DIP_THRESHOLD = 0.08 # RMS must drop by 8%
QALQALAH_SPIKE_THRESHOLD = 0.15 # RMS must rise by 15%
MADD_RATIO_ASLI = 1.0 # 1.0x average vowel (baseline)
MADD_RATIO_WAJIB = 2.0 # 2.0x average vowel
MADD_RATIO_LAZIM = 3.5 # 3.5x average vowel
GHUNNAH_MIN_DURATION_MS = 30.0 # Very relaxed
TAFKHEEM_F2_MAX_HZ = 1500.0 # Maximum tolerance for F2
VALIDATION_TOLERANCE = 0.4 # 40% tolerance for all validations
# Precision thresholds - tuned for Arabic letters which can be very short
MIN_SEGMENT_MS = 30.0 # Minimum segment duration for valid analysis
MIN_SEGMENT_SAMPLES = 661 # ~30ms at 22050 Hz
def __init__(self, sample_rate: int = 22050):
self.sample_rate = sample_rate
self._audio_cache = {}
self._average_vowel_duration = 0.1 # Will be calibrated per reciter
def load_audio(self, audio_path: str) -> np.ndarray:
"""Load audio file, with caching"""
if audio_path not in self._audio_cache:
if HAS_LIBROSA:
y, sr = librosa.load(audio_path, sr=self.sample_rate)
self._audio_cache[audio_path] = y
else:
# Fallback: generate noise for testing
self._audio_cache[audio_path] = np.random.randn(self.sample_rate * 10) * 0.1
return self._audio_cache[audio_path]
def safe_extract_segment(self, audio: np.ndarray, start: float, end: float) -> tuple:
"""
PRECISION: Safely extract audio segment with bounds and validity checking.
Returns:
tuple: (segment, is_valid, error_reason)
"""
# Bounds checking
start_sample = max(0, int(start * self.sample_rate))
end_sample = min(len(audio), int(end * self.sample_rate))
# Sanity check
if start_sample >= end_sample:
return None, False, "invalid_range"
segment = audio[start_sample:end_sample]
# Length check
if len(segment) < self.MIN_SEGMENT_SAMPLES:
return segment, False, f"too_short_{len(segment)}_samples"
# NaN/Inf check
if np.any(np.isnan(segment)) or np.any(np.isinf(segment)):
segment = np.nan_to_num(segment, nan=0.0, posinf=0.0, neginf=0.0)
return segment, True, None
def safe_rms(self, segment: np.ndarray, frame_length: int = 256, hop_length: int = 64) -> np.ndarray:
"""
PRECISION: Calculate RMS with NaN protection.
"""
if not HAS_LIBROSA:
return np.array([0.0])
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
# Protect against NaN/Inf
rms = np.nan_to_num(rms, nan=0.0, posinf=1.0, neginf=0.0)
# Normalize to prevent division issues
if np.max(rms) > 0:
rms = rms / np.max(rms)
return rms
def validate_qalqalah(self,
audio: np.ndarray,
start: float,
end: float) -> QalqalahResult:
"""
Validate Qalqalah rule: Must show closure (RMS dip) then release (RMS spike)
Physics: The "bounce" is caused by complete oral closure followed by
abrupt release. RMS energy shows: stable→dip→spike pattern.
"""
if not HAS_LIBROSA:
return QalqalahResult(
status=ValidationStatus.SKIPPED,
metric_name="RMS Energy",
expected_pattern="dip_then_spike",
observed_pattern="unknown",
score=0.0,
rms_profile="unknown"
)
# PRECISION: Use safe extraction
segment, is_valid, error = self.safe_extract_segment(audio, start, end)
if not is_valid:
return QalqalahResult(
status=ValidationStatus.SKIPPED,
metric_name="RMS Energy",
expected_pattern="dip_then_spike",
observed_pattern=error or "invalid_segment",
score=0.0,
rms_profile="unknown",
details={"reason": error}
)
# PRECISION: Use safe RMS with NaN protection
rms = self.safe_rms(segment)
if len(rms) < 3:
return QalqalahResult(
status=ValidationStatus.SKIPPED,
metric_name="RMS Energy",
expected_pattern="dip_then_spike",
observed_pattern="insufficient_frames",
score=0.0,
rms_profile="unknown",
details={"reason": f"Only {len(rms)} RMS frames < 3 minimum"}
)
# Analyze RMS pattern
# Qalqalah should show: high → dip → spike
# Find minimum and maximum in second half (release)
midpoint = len(rms) // 2
# First half: Find the dip (closure)
first_half_mean = np.mean(rms[:midpoint]) if midpoint > 0 else rms[0]
dip_idx = np.argmin(rms)
dip_value = rms[dip_idx]
# Second half: Find the spike (release)
spike_idx = midpoint + np.argmax(rms[midpoint:]) if midpoint < len(rms) else len(rms) - 1
spike_value = rms[spike_idx] if spike_idx < len(rms) else rms[-1]
# Calculate metrics
dip_depth = (first_half_mean - dip_value) / first_half_mean if first_half_mean > 0 else 0
spike_height = (spike_value - dip_value) / dip_value if dip_value > 0 else 0
# Determine pattern
if dip_depth >= self.QALQALAH_DIP_THRESHOLD and spike_height >= self.QALQALAH_SPIKE_THRESHOLD:
rms_profile = "dip_then_spike"
status = ValidationStatus.PASS
score = min(1.0, (dip_depth + spike_height) / 2)
elif spike_height >= self.QALQALAH_SPIKE_THRESHOLD:
rms_profile = "spike_only"
status = ValidationStatus.MARGINAL
score = spike_height / 2
else:
rms_profile = "flat"
status = ValidationStatus.FAIL
score = 0.0
# Estimate closure duration (using safe_rms default hop_length=64)
if dip_idx > 0:
frames_to_dip = dip_idx
closure_duration_ms = (frames_to_dip * 64 / self.sample_rate) * 1000
else:
closure_duration_ms = 0.0
return QalqalahResult(
status=status,
metric_name="RMS Energy",
expected_pattern="dip_then_spike",
observed_pattern=rms_profile,
score=score,
rms_profile=rms_profile,
dip_depth=dip_depth,
spike_height=spike_height,
closure_duration_ms=closure_duration_ms
)
def validate_madd(self,
audio: np.ndarray,
start: float,
end: float,
expected_count: int = 2) -> MaddResult:
"""
Validate Madd rule: Duration must match expected elongation count
Physics: Madd is pure duration comparison.
- Asli (natural): 2 counts
- Wajib (obligatory): 4-5 counts
- Lazim (required): 6 counts
"""
actual_duration = end - start
actual_duration_ms = actual_duration * 1000
# Expected duration based on average vowel and count
expected_duration = self._average_vowel_duration * expected_count
expected_duration_ms = expected_duration * 1000
# Calculate ratio
ratio = actual_duration / self._average_vowel_duration if self._average_vowel_duration > 0 else 0
# Determine pass/fail based on expected count
tolerance = 0.3 # 30% tolerance
if expected_count == 2:
threshold = self.MADD_RATIO_ASLI
elif expected_count == 4:
threshold = self.MADD_RATIO_WAJIB
else:
threshold = self.MADD_RATIO_LAZIM
if ratio >= threshold * (1 - tolerance):
if ratio <= threshold * (1 + tolerance):
status = ValidationStatus.PASS
score = 1.0
else:
status = ValidationStatus.MARGINAL # Too long, but acceptable
score = 0.7
else:
status = ValidationStatus.FAIL
score = ratio / threshold if threshold > 0 else 0
return MaddResult(
status=status,
metric_name="Duration Ratio",
expected_pattern=f"{expected_count}x average vowel",
observed_pattern=f"{ratio:.1f}x average vowel",
score=score,
actual_duration_ms=actual_duration_ms,
expected_duration_ms=expected_duration_ms,
ratio=ratio
)
def validate_ghunnah(self,
audio: np.ndarray,
start: float,
end: float) -> GhunnahResult:
"""
Validate Ghunnah (nasalization) rule
Physics:
- Drop in high-frequency energy (nasal anti-formant ~500Hz)
- Stable pitch during nasalization
- Duration elongation (2 counts minimum)
"""
if not HAS_PARSELMOUTH:
return GhunnahResult(
status=ValidationStatus.SKIPPED,
metric_name="Formant Analysis",
expected_pattern="nasal_resonance",
observed_pattern="unknown",
score=0.0
)
duration_ms = (end - start) * 1000
# Check minimum duration
if duration_ms < self.GHUNNAH_MIN_DURATION_MS:
return GhunnahResult(
status=ValidationStatus.MARGINAL, # PRECISION: Changed from FAIL to MARGINAL
metric_name="Formant Analysis",
expected_pattern="nasal_resonance",
observed_pattern="short_but_valid",
score=duration_ms / self.GHUNNAH_MIN_DURATION_MS,
duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS,
details={"reason": f"Duration {duration_ms:.1f}ms < {self.GHUNNAH_MIN_DURATION_MS}ms minimum"}
)
# PRECISION: Use safe extraction
segment, is_valid, error = self.safe_extract_segment(audio, start, end)
if not is_valid:
return GhunnahResult(
status=ValidationStatus.SKIPPED,
metric_name="Formant Analysis",
expected_pattern="nasal_resonance",
observed_pattern=error or "invalid_segment",
score=0.0,
details={"reason": error}
)
# Convert to Praat Sound object
try:
import tempfile
import soundfile as sf
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
sf.write(f.name, segment, self.sample_rate)
sound = parselmouth.Sound(f.name)
# Get pitch for stability analysis
pitch = call(sound, "To Pitch", 0.0, 75, 600)
pitch_values = pitch.selected_array['frequency']
pitch_values = pitch_values[pitch_values > 0] # Remove unvoiced
if len(pitch_values) > 1:
pitch_stability = 1.0 - (np.std(pitch_values) / np.mean(pitch_values))
else:
pitch_stability = 0.0
# Formant analysis for nasal detection
formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
# Nasalization shows anti-resonance around F1 region
# Check for characteristic nasal formant pattern
nasal_formant_detected = True # Simplified detection
except Exception as e:
print(f"Parselmouth error: {e}")
return GhunnahResult(
status=ValidationStatus.SKIPPED,
metric_name="Formant Analysis",
expected_pattern="nasal_resonance",
observed_pattern="analysis_error",
score=0.0
)
# Scoring
duration_score = min(1.0, duration_ms / (self.GHUNNAH_MIN_DURATION_MS * 2))
pitch_score = max(0.0, pitch_stability)
total_score = (duration_score + pitch_score) / 2
if total_score >= 0.7:
status = ValidationStatus.PASS
elif total_score >= 0.4:
status = ValidationStatus.MARGINAL
else:
status = ValidationStatus.FAIL
return GhunnahResult(
status=status,
metric_name="Formant Analysis",
expected_pattern="nasal_resonance",
observed_pattern="analyzed",
score=total_score,
nasal_formant_detected=nasal_formant_detected,
pitch_stability=pitch_stability,
duration_elongation=duration_ms / self.GHUNNAH_MIN_DURATION_MS
)
def validate_tafkheem(self,
audio: np.ndarray,
start: float,
end: float) -> TafkheemResult:
"""
Validate Tafkheem (heavy letter) rule
Physics: Heavy letters show depressed F2 formant
- Normal letters: F2 ~1500 Hz
- Heavy letters: F2 ~1000-1200 Hz
"""
if not HAS_PARSELMOUTH:
return TafkheemResult(
status=ValidationStatus.SKIPPED,
metric_name="F2 Formant",
expected_pattern="F2 < 1200 Hz",
observed_pattern="unknown",
score=0.0
)
# PRECISION: Use safe extraction
segment, is_valid, error = self.safe_extract_segment(audio, start, end)
if not is_valid:
return TafkheemResult(
status=ValidationStatus.SKIPPED,
metric_name="F2 Formant",
expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz",
observed_pattern=error or "invalid_segment",
score=0.0,
details={"reason": error}
)
try:
import tempfile
import soundfile as sf
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
sf.write(f.name, segment, self.sample_rate)
sound = parselmouth.Sound(f.name)
# Get F2 formant
formant = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 50)
# Get average F2
f2_values = []
num_frames = call(formant, "Get number of frames")
for i in range(1, num_frames + 1):
f2 = call(formant, "Get value at time", 2, call(formant, "Get time from frame number", i), "Hertz", "Linear")
if not np.isnan(f2) and f2 > 0:
f2_values.append(f2)
if f2_values:
f2_mean = np.mean(f2_values)
else:
f2_mean = 0
except Exception as e:
print(f"Parselmouth error: {e}")
return TafkheemResult(
status=ValidationStatus.SKIPPED,
metric_name="F2 Formant",
expected_pattern="F2 < 1200 Hz",
observed_pattern="analysis_error",
score=0.0
)
# Calculate depression ratio
baseline_f2 = 1500.0
depression_ratio = (baseline_f2 - f2_mean) / baseline_f2 if f2_mean > 0 and f2_mean < baseline_f2 else 0
# Scoring
if f2_mean <= self.TAFKHEEM_F2_MAX_HZ:
status = ValidationStatus.PASS
score = 1.0
elif f2_mean <= 1350:
status = ValidationStatus.MARGINAL
score = 0.6
else:
status = ValidationStatus.FAIL
score = max(0.0, depression_ratio)
return TafkheemResult(
status=status,
metric_name="F2 Formant",
expected_pattern=f"F2 < {self.TAFKHEEM_F2_MAX_HZ} Hz",
observed_pattern=f"F2 = {f2_mean:.0f} Hz",
score=score,
f2_value_hz=f2_mean,
f2_baseline_hz=baseline_f2,
depression_ratio=depression_ratio
)
# =========================================================================
# NEW VALIDATORS: Complete Tajweed Physics Coverage
# =========================================================================
def validate_idgham(self,
audio: np.ndarray,
nun_start: float,
nun_end: float,
next_letter_end: float,
has_ghunnah: bool = True) -> PhysicsResult:
"""
Validate Idgham (assimilation) rule
Physics:
- Full Idgham (ر/ل): Complete merger, smooth energy, no nun boundary
- Partial Idgham (ي/ن/م/و): Ghunnah preserved during transition
"""
if not HAS_LIBROSA:
return PhysicsResult(
status=ValidationStatus.SKIPPED,
metric_name="Energy Continuity",
expected_pattern="smooth_transition",
observed_pattern="unknown",
score=0.0
)
# Extract the transition window (nun end to next letter)
start_sample = int(nun_start * self.sample_rate)
end_sample = int(next_letter_end * self.sample_rate)
segment = audio[start_sample:end_sample]
if len(segment) < 100:
return PhysicsResult(
status=ValidationStatus.FAIL,
metric_name="Energy Continuity",
expected_pattern="smooth_transition",
observed_pattern="segment_too_short",
score=0.0
)
# Calculate RMS to check for smooth energy transition
frame_length = 256
hop_length = 64
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
# Calculate energy variance - low variance = smooth transition
rms_variance = np.std(rms) / np.mean(rms) if np.mean(rms) > 0 else 1.0
# For Idgham, we expect smooth continuous energy (low variance)
smoothness_score = 1.0 - min(1.0, rms_variance)
# Check for boundary sharpness (should be LOW for Idgham)
rms_diff = np.abs(np.diff(rms))
max_jump = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0
boundary_score = 1.0 - min(1.0, max_jump)
total_score = (smoothness_score + boundary_score) / 2
if total_score >= 0.6:
status = ValidationStatus.PASS
elif total_score >= 0.4:
status = ValidationStatus.MARGINAL
else:
status = ValidationStatus.FAIL
return PhysicsResult(
status=status,
metric_name="Energy Continuity",
expected_pattern="smooth_transition" if not has_ghunnah else "smooth_with_ghunnah",
observed_pattern=f"smoothness={smoothness_score:.2f}",
score=total_score,
details={"smoothness": smoothness_score, "boundary_score": boundary_score}
)
def validate_ikhfa(self,
audio: np.ndarray,
start: float,
end: float) -> PhysicsResult:
"""
Validate Ikhfa (concealment) rule
Physics:
- Gradual nasalization transition (not abrupt like pure Ghunnah)
- Partial nasal resonance that fades
"""
if not HAS_LIBROSA:
return PhysicsResult(
status=ValidationStatus.SKIPPED,
metric_name="Nasalization Gradient",
expected_pattern="gradual_nasal",
observed_pattern="unknown",
score=0.0
)
start_sample = int(start * self.sample_rate)
end_sample = int(end * self.sample_rate)
segment = audio[start_sample:end_sample]
if len(segment) < 100:
return PhysicsResult(
status=ValidationStatus.FAIL,
metric_name="Nasalization Gradient",
expected_pattern="gradual_nasal",
observed_pattern="segment_too_short",
score=0.0
)
# Split into thirds to check for gradient
third = len(segment) // 3
# Calculate spectral centroid (nasal sounds have lower centroid)
sc = librosa.feature.spectral_centroid(y=segment, sr=self.sample_rate)[0]
if len(sc) < 3:
return PhysicsResult(
status=ValidationStatus.FAIL,
metric_name="Nasalization Gradient",
expected_pattern="gradual_nasal",
observed_pattern="insufficient_frames",
score=0.0
)
# Check for gradient pattern: centroid should change gradually
sc_diff = np.abs(np.diff(sc))
gradient_smoothness = 1.0 - min(1.0, np.std(sc_diff) / np.mean(sc_diff)) if np.mean(sc_diff) > 0 else 0.5
# Duration check (Ikhfa should have reasonable duration)
duration_ms = (end - start) * 1000
duration_score = min(1.0, duration_ms / 100) if duration_ms > 0 else 0
total_score = (gradient_smoothness + duration_score) / 2
if total_score >= 0.6:
status = ValidationStatus.PASS
elif total_score >= 0.4:
status = ValidationStatus.MARGINAL
else:
status = ValidationStatus.FAIL
return PhysicsResult(
status=status,
metric_name="Nasalization Gradient",
expected_pattern="gradual_nasal",
observed_pattern=f"gradient={gradient_smoothness:.2f}",
score=total_score,
details={"gradient_smoothness": gradient_smoothness, "duration_ms": duration_ms}
)
def validate_iqlab(self,
audio: np.ndarray,
start: float,
end: float) -> PhysicsResult:
"""
Validate Iqlab (ن→م before ب)
Physics:
- Same as Ghunnah but with bilabial closure
- Nasal formant + lip closure pattern (F1/F2 characteristic of /m/)
"""
# Iqlab is essentially Ghunnah with bilabial characteristics
# Reuse ghunnah validation logic
ghunnah_result = self.validate_ghunnah(audio, start, end)
# Modify result type for Iqlab
return PhysicsResult(
status=ghunnah_result.status,
metric_name="Bilabial Nasal",
expected_pattern="mim_like_nasal",
observed_pattern=ghunnah_result.observed_pattern,
score=ghunnah_result.score,
details={"ghunnah_check": ghunnah_result.status.value}
)
def validate_izhar(self,
audio: np.ndarray,
letter_start: float,
letter_end: float,
next_letter_start: float) -> PhysicsResult:
"""
Validate Izhar (clear pronunciation)
Physics:
- Clean, sharp boundary between letters
- No nasalization
- Clear articulation energy pattern
"""
if not HAS_LIBROSA:
return PhysicsResult(
status=ValidationStatus.SKIPPED,
metric_name="Boundary Sharpness",
expected_pattern="clean_boundary",
observed_pattern="unknown",
score=0.0
)
# Check boundary region
boundary_start = max(0, letter_end - 0.02) # 20ms before boundary
boundary_end = min(len(audio) / self.sample_rate, next_letter_start + 0.02) # 20ms after
start_sample = int(boundary_start * self.sample_rate)
end_sample = int(boundary_end * self.sample_rate)
segment = audio[start_sample:end_sample]
if len(segment) < 50:
return PhysicsResult(
status=ValidationStatus.FAIL,
metric_name="Boundary Sharpness",
expected_pattern="clean_boundary",
observed_pattern="segment_too_short",
score=0.0
)
# Calculate RMS to find sharp transitions
frame_length = 128
hop_length = 32
rms = librosa.feature.rms(y=segment, frame_length=frame_length, hop_length=hop_length)[0]
# Look for clear dip/change at boundary
rms_diff = np.abs(np.diff(rms))
max_change = np.max(rms_diff) / np.mean(rms) if np.mean(rms) > 0 else 0
# High change = sharp boundary = good for Izhar
sharpness_score = min(1.0, max_change)
if sharpness_score >= 0.3: # Clear boundary detected
status = ValidationStatus.PASS
score = min(1.0, sharpness_score * 2)
elif sharpness_score >= 0.15:
status = ValidationStatus.MARGINAL
score = sharpness_score * 2
else:
status = ValidationStatus.FAIL
score = sharpness_score
return PhysicsResult(
status=status,
metric_name="Boundary Sharpness",
expected_pattern="clean_boundary",
observed_pattern=f"sharpness={sharpness_score:.2f}",
score=score,
details={"boundary_sharpness": sharpness_score}
)
def validate_tarqeeq(self,
audio: np.ndarray,
start: float,
end: float) -> PhysicsResult:
"""
Validate Tarqeeq (light letters) - opposite of Tafkheem
Physics: Light letters show elevated F2 formant (F2 > 1400 Hz)
"""
# Reuse Tafkheem logic but invert the threshold
tafkheem_result = self.validate_tafkheem(audio, start, end)
if tafkheem_result.status == ValidationStatus.SKIPPED:
return PhysicsResult(
status=ValidationStatus.SKIPPED,
metric_name="F2 Formant",
expected_pattern="F2 > 1400 Hz",
observed_pattern="unknown",
score=0.0
)
# For Tarqeeq, we want HIGH F2 (opposite of Tafkheem)
f2_value = tafkheem_result.details.get('f2_value_hz', tafkheem_result.f2_value_hz if hasattr(tafkheem_result, 'f2_value_hz') else 0)
TARQEEQ_F2_MIN_HZ = 1400.0
if f2_value >= TARQEEQ_F2_MIN_HZ:
status = ValidationStatus.PASS
score = 1.0
elif f2_value >= 1300:
status = ValidationStatus.MARGINAL
score = 0.6
else:
status = ValidationStatus.FAIL
score = f2_value / TARQEEQ_F2_MIN_HZ if f2_value > 0 else 0
return PhysicsResult(
status=status,
metric_name="F2 Formant",
expected_pattern=f"F2 > {TARQEEQ_F2_MIN_HZ} Hz",
observed_pattern=f"F2 = {f2_value:.0f} Hz",
score=score,
details={"f2_value_hz": f2_value}
)
def validate_sakt(self,
audio: np.ndarray,
start: float,
end: float) -> PhysicsResult:
"""
Validate Sakt (brief pause without breath)
Physics:
- Brief silence (50-200ms)
- RMS below threshold
- No breathing artifacts
"""
if not HAS_LIBROSA:
return PhysicsResult(
status=ValidationStatus.SKIPPED,
metric_name="Silence Detection",
expected_pattern="brief_silence",
observed_pattern="unknown",
score=0.0
)
start_sample = int(start * self.sample_rate)
end_sample = int(end * self.sample_rate)
segment = audio[start_sample:end_sample]
duration_ms = (end - start) * 1000
if len(segment) < 10:
return PhysicsResult(
status=ValidationStatus.FAIL,
metric_name="Silence Detection",
expected_pattern="brief_silence",
observed_pattern="segment_too_short",
score=0.0
)
# Calculate RMS
rms = np.sqrt(np.mean(segment**2))
# Thresholds
SAKT_RMS_THRESHOLD = 0.05
SAKT_MIN_MS = 50
SAKT_MAX_MS = 200
# Check RMS (should be very low)
is_silent = rms < SAKT_RMS_THRESHOLD
# Check duration
duration_ok = SAKT_MIN_MS <= duration_ms <= SAKT_MAX_MS
if is_silent and duration_ok:
status = ValidationStatus.PASS
score = 1.0
elif is_silent and (duration_ms > 30):
status = ValidationStatus.MARGINAL
score = 0.6
else:
status = ValidationStatus.FAIL
score = 0.0 if rms >= SAKT_RMS_THRESHOLD else 0.3
return PhysicsResult(
status=status,
metric_name="Silence Detection",
expected_pattern=f"silence_{SAKT_MIN_MS}-{SAKT_MAX_MS}ms",
observed_pattern=f"rms={rms:.3f}, dur={duration_ms:.0f}ms",
score=score,
details={"rms": rms, "duration_ms": duration_ms, "is_silent": is_silent}
)
def calibrate_average_vowel(self, audio: np.ndarray, vowel_segments: List[Tuple[float, float]]) -> float:
"""
Calibrate average vowel duration for this reciter
This is crucial for Madd validation as reciter pace varies
"""
if not vowel_segments:
return 0.1 # Default 100ms
durations = [end - start for start, end in vowel_segments]
self._average_vowel_duration = np.mean(durations)
return self._average_vowel_duration
def main():
"""Test physics validator"""
print("=" * 50)
print("TajweedSST Physics Validator Test")
print("=" * 50)
# Create mock audio
sample_rate = 22050
duration = 2.0
t = np.linspace(0, duration, int(sample_rate * duration))
# Create a test signal with dip→spike pattern (simulating Qalqalah)
audio = np.sin(2 * np.pi * 440 * t) * 0.5
# Add dip in middle
dip_start = int(len(audio) * 0.4)
dip_end = int(len(audio) * 0.5)
audio[dip_start:dip_end] *= 0.1
# Add spike after dip
spike_start = int(len(audio) * 0.5)
spike_end = int(len(audio) * 0.6)
audio[spike_start:spike_end] *= 2.0
validator = PhysicsValidator(sample_rate=sample_rate)
# Test Qalqalah
print("\nQalqalah Test:")
result = validator.validate_qalqalah(audio, 0.3, 0.8)
print(f" Status: {result.status.value}")
print(f" Profile: {result.rms_profile}")
print(f" Score: {result.score:.2f}")
print(f" Dip Depth: {result.dip_depth:.2f}")
print(f" Spike Height: {result.spike_height:.2f}")
# Test Madd
print("\nMadd Test:")
validator._average_vowel_duration = 0.1 # 100ms average
result = validator.validate_madd(audio, 0.0, 0.4, expected_count=4)
print(f" Status: {result.status.value}")
print(f" Ratio: {result.ratio:.1f}x")
print(f" Score: {result.score:.2f}")
if __name__ == "__main__":
main()