File size: 1,099 Bytes
4193bcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import unittest

from speaker_detection import detect_speakers
from text_processing import chunk_text, normalize_text


class TextProcessingTests(unittest.TestCase):
    def test_normalize_text_preserves_paragraphs(self):
        self.assertEqual(normalize_text("One   two\n\n\nThree"), "One two\n\nThree")

    def test_chunk_text_limits_size(self):
        text = " ".join(["word"] * 220)
        chunks = chunk_text(text, max_chars=80)
        self.assertGreater(len(chunks), 1)
        self.assertTrue(all(len(chunk) <= 80 for chunk in chunks))


class SpeakerDetectionTests(unittest.TestCase):
    def test_dialogue_labels_create_distinct_speakers(self):
        segments = detect_speakers("Alex: Hello.\nSarah: Hi there.", "dialogue")
        self.assertEqual([speaker for speaker, _text in segments], [0, 1])

    def test_paragraph_mode_alternates(self):
        segments = detect_speakers("First paragraph.\n\nSecond paragraph.\n\nThird paragraph.", "paragraph")
        self.assertEqual([speaker for speaker, _text in segments], [0, 1, 0])


if __name__ == "__main__":
    unittest.main()