tajweedsst / tests /test_alignment_engine.py
enver's picture
Upload folder using huggingface_hub
21f2aa3 verified
#!/usr/bin/env python3
"""
TajweedSST - Alignment Engine Unit Tests
Tests word and phoneme timing accuracy:
- WhisperX word alignment
- MFA phoneme alignment
- Phoneme normalization within word boundaries
- Mock alignment for testing without models
"""
import pytest
import os
import sys
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from alignment_engine import (
AlignmentEngine,
MockAlignmentEngine,
PhonemeAlignment,
WordAlignment,
AlignmentResult
)
class TestDataclasses:
"""Test alignment data structures"""
def test_phoneme_alignment(self):
"""PhonemeAlignment stores timing correctly"""
pa = PhonemeAlignment(phoneme="ب", start=0.0, end=0.1, duration=0.1)
assert pa.phoneme == "ب"
assert pa.duration == 0.1
def test_phoneme_normalized_duration(self):
"""Normalized duration calculation"""
pa = PhonemeAlignment(phoneme="ا", start=0.0, end=0.2, duration=0.2)
# normalized_duration is a property
assert pa.normalized_duration == 0.2
def test_word_alignment(self):
"""WordAlignment stores word and phonemes"""
wa = WordAlignment(
word_text="بسم",
whisper_start=0.0,
whisper_end=0.5,
phonemes=[
PhonemeAlignment("ب", 0.0, 0.15, 0.15),
PhonemeAlignment("س", 0.15, 0.35, 0.20),
PhonemeAlignment("م", 0.35, 0.5, 0.15),
]
)
assert wa.word_text == "بسم"
assert len(wa.phonemes) == 3
assert wa.whisper_duration == 0.5
def test_alignment_result(self):
"""AlignmentResult stores full alignment"""
ar = AlignmentResult(
audio_path="/path/to/audio.wav",
surah=91,
ayah=1,
words=[]
)
assert ar.surah == 91
assert ar.ayah == 1
class TestMockAlignmentEngine:
"""Test mock alignment for development without models"""
@pytest.fixture
def mock_engine(self):
return MockAlignmentEngine()
def test_mock_align_returns_result(self, mock_engine):
"""Mock alignment returns AlignmentResult"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["b i s m", "a l l a h"],
surah=1,
ayah=1
)
assert isinstance(result, AlignmentResult)
def test_mock_align_word_count(self, mock_engine):
"""Mock alignment produces correct word count"""
phonetic_words = ["b i s m", "a l l a h", "a r r a h m a n"]
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=phonetic_words,
surah=1,
ayah=1
)
assert len(result.words) == len(phonetic_words)
def test_mock_align_phoneme_generation(self, mock_engine):
"""Mock alignment generates phonemes for each word"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["b i s m"],
surah=1,
ayah=1
)
# "b i s m" should produce ~4 phonemes
assert len(result.words[0].phonemes) >= 3
def test_mock_align_timing_monotonic(self, mock_engine):
"""Mock timing should be monotonically increasing"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["word1", "word2", "word3"],
surah=1,
ayah=1
)
prev_end = 0.0
for word in result.words:
assert word.whisper_start >= prev_end, "Word start before previous end"
prev_end = word.whisper_end
class TestTimingMonotonicity:
"""Test that timing never goes backwards"""
@pytest.fixture
def mock_engine(self):
return MockAlignmentEngine()
def test_word_timing_monotonic(self, mock_engine):
"""Word-level timing is strictly increasing"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["w1", "w2", "w3", "w4", "w5"],
surah=1,
ayah=1
)
for i in range(1, len(result.words)):
prev = result.words[i-1]
curr = result.words[i]
assert curr.whisper_start >= prev.whisper_end, \
f"Word {i} starts ({curr.whisper_start}) before word {i-1} ends ({prev.whisper_end})"
def test_phoneme_timing_monotonic(self, mock_engine):
"""Phoneme-level timing is strictly increasing within words"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["a l r a h m a n"],
surah=1,
ayah=1
)
for word in result.words:
for i in range(1, len(word.phonemes)):
prev = word.phonemes[i-1]
curr = word.phonemes[i]
assert curr.start >= prev.end, \
f"Phoneme {curr.phoneme} starts before {prev.phoneme} ends"
class TestPhonemeNormalization:
"""Test phoneme duration normalization"""
def test_phonemes_fit_word_boundary(self):
"""Normalized phonemes should fit exactly in word boundaries"""
word = WordAlignment(
word_text="test",
whisper_start=1.0,
whisper_end=2.0,
phonemes=[
PhonemeAlignment("t", 1.0, 1.25, 0.25),
PhonemeAlignment("e", 1.25, 1.5, 0.25),
PhonemeAlignment("s", 1.5, 1.75, 0.25),
PhonemeAlignment("t", 1.75, 2.0, 0.25),
]
)
# First phoneme should start at word start
assert word.phonemes[0].start == word.whisper_start
# Last phoneme should end at word end
assert word.phonemes[-1].end == word.whisper_end
def test_phonemes_cover_word_duration(self):
"""Phoneme durations should sum to word duration"""
word = WordAlignment(
word_text="test",
whisper_start=0.0,
whisper_end=1.0,
phonemes=[
PhonemeAlignment("a", 0.0, 0.333, 0.333),
PhonemeAlignment("b", 0.333, 0.666, 0.333),
PhonemeAlignment("c", 0.666, 1.0, 0.334),
]
)
total_phoneme_duration = sum(p.duration for p in word.phonemes)
word_duration = word.whisper_duration
# Allow small floating point error
assert abs(total_phoneme_duration - word_duration) < 0.01
class TestArabicPhonemes:
"""Test Arabic-specific phoneme handling"""
@pytest.fixture
def mock_engine(self):
return MockAlignmentEngine()
def test_arabic_phonetic_transcription(self, mock_engine):
"""Engine handles Arabic phonetic transcription"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["b i s m i", "a l l aa h i"], # Arabic transliteration
surah=1,
ayah=1
)
assert len(result.words) == 2
if __name__ == "__main__":
pytest.main([__file__, "-v"])