File size: 7,278 Bytes
21f2aa3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | #!/usr/bin/env python3
"""
TajweedSST - Alignment Engine Unit Tests
Tests word and phoneme timing accuracy:
- WhisperX word alignment
- MFA phoneme alignment
- Phoneme normalization within word boundaries
- Mock alignment for testing without models
"""
import pytest
import os
import sys
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from alignment_engine import (
AlignmentEngine,
MockAlignmentEngine,
PhonemeAlignment,
WordAlignment,
AlignmentResult
)
class TestDataclasses:
"""Test alignment data structures"""
def test_phoneme_alignment(self):
"""PhonemeAlignment stores timing correctly"""
pa = PhonemeAlignment(phoneme="ب", start=0.0, end=0.1, duration=0.1)
assert pa.phoneme == "ب"
assert pa.duration == 0.1
def test_phoneme_normalized_duration(self):
"""Normalized duration calculation"""
pa = PhonemeAlignment(phoneme="ا", start=0.0, end=0.2, duration=0.2)
# normalized_duration is a property
assert pa.normalized_duration == 0.2
def test_word_alignment(self):
"""WordAlignment stores word and phonemes"""
wa = WordAlignment(
word_text="بسم",
whisper_start=0.0,
whisper_end=0.5,
phonemes=[
PhonemeAlignment("ب", 0.0, 0.15, 0.15),
PhonemeAlignment("س", 0.15, 0.35, 0.20),
PhonemeAlignment("م", 0.35, 0.5, 0.15),
]
)
assert wa.word_text == "بسم"
assert len(wa.phonemes) == 3
assert wa.whisper_duration == 0.5
def test_alignment_result(self):
"""AlignmentResult stores full alignment"""
ar = AlignmentResult(
audio_path="/path/to/audio.wav",
surah=91,
ayah=1,
words=[]
)
assert ar.surah == 91
assert ar.ayah == 1
class TestMockAlignmentEngine:
"""Test mock alignment for development without models"""
@pytest.fixture
def mock_engine(self):
return MockAlignmentEngine()
def test_mock_align_returns_result(self, mock_engine):
"""Mock alignment returns AlignmentResult"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["b i s m", "a l l a h"],
surah=1,
ayah=1
)
assert isinstance(result, AlignmentResult)
def test_mock_align_word_count(self, mock_engine):
"""Mock alignment produces correct word count"""
phonetic_words = ["b i s m", "a l l a h", "a r r a h m a n"]
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=phonetic_words,
surah=1,
ayah=1
)
assert len(result.words) == len(phonetic_words)
def test_mock_align_phoneme_generation(self, mock_engine):
"""Mock alignment generates phonemes for each word"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["b i s m"],
surah=1,
ayah=1
)
# "b i s m" should produce ~4 phonemes
assert len(result.words[0].phonemes) >= 3
def test_mock_align_timing_monotonic(self, mock_engine):
"""Mock timing should be monotonically increasing"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["word1", "word2", "word3"],
surah=1,
ayah=1
)
prev_end = 0.0
for word in result.words:
assert word.whisper_start >= prev_end, "Word start before previous end"
prev_end = word.whisper_end
class TestTimingMonotonicity:
"""Test that timing never goes backwards"""
@pytest.fixture
def mock_engine(self):
return MockAlignmentEngine()
def test_word_timing_monotonic(self, mock_engine):
"""Word-level timing is strictly increasing"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["w1", "w2", "w3", "w4", "w5"],
surah=1,
ayah=1
)
for i in range(1, len(result.words)):
prev = result.words[i-1]
curr = result.words[i]
assert curr.whisper_start >= prev.whisper_end, \
f"Word {i} starts ({curr.whisper_start}) before word {i-1} ends ({prev.whisper_end})"
def test_phoneme_timing_monotonic(self, mock_engine):
"""Phoneme-level timing is strictly increasing within words"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["a l r a h m a n"],
surah=1,
ayah=1
)
for word in result.words:
for i in range(1, len(word.phonemes)):
prev = word.phonemes[i-1]
curr = word.phonemes[i]
assert curr.start >= prev.end, \
f"Phoneme {curr.phoneme} starts before {prev.phoneme} ends"
class TestPhonemeNormalization:
"""Test phoneme duration normalization"""
def test_phonemes_fit_word_boundary(self):
"""Normalized phonemes should fit exactly in word boundaries"""
word = WordAlignment(
word_text="test",
whisper_start=1.0,
whisper_end=2.0,
phonemes=[
PhonemeAlignment("t", 1.0, 1.25, 0.25),
PhonemeAlignment("e", 1.25, 1.5, 0.25),
PhonemeAlignment("s", 1.5, 1.75, 0.25),
PhonemeAlignment("t", 1.75, 2.0, 0.25),
]
)
# First phoneme should start at word start
assert word.phonemes[0].start == word.whisper_start
# Last phoneme should end at word end
assert word.phonemes[-1].end == word.whisper_end
def test_phonemes_cover_word_duration(self):
"""Phoneme durations should sum to word duration"""
word = WordAlignment(
word_text="test",
whisper_start=0.0,
whisper_end=1.0,
phonemes=[
PhonemeAlignment("a", 0.0, 0.333, 0.333),
PhonemeAlignment("b", 0.333, 0.666, 0.333),
PhonemeAlignment("c", 0.666, 1.0, 0.334),
]
)
total_phoneme_duration = sum(p.duration for p in word.phonemes)
word_duration = word.whisper_duration
# Allow small floating point error
assert abs(total_phoneme_duration - word_duration) < 0.01
class TestArabicPhonemes:
"""Test Arabic-specific phoneme handling"""
@pytest.fixture
def mock_engine(self):
return MockAlignmentEngine()
def test_arabic_phonetic_transcription(self, mock_engine):
"""Engine handles Arabic phonetic transcription"""
result = mock_engine.align(
audio_path="/fake/path.wav",
phonetic_words=["b i s m i", "a l l aa h i"], # Arabic transliteration
surah=1,
ayah=1
)
assert len(result.words) == 2
if __name__ == "__main__":
pytest.main([__file__, "-v"])
|