File size: 626 Bytes
fda8fb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from __future__ import annotations

from app.analysis.sentence_split import normalize_trace_text, split_sentences


def test_sentence_split_preserves_spacing_and_newlines() -> None:
    raw = "<think>First sentence.  Second sentence?\n\nThird sentence!</think>"
    normalized = normalize_trace_text(raw)
    spans = split_sentences(normalized)

    assert normalized == "First sentence.  Second sentence?\n\nThird sentence!"
    assert [span.text for span in spans] == [
        "First sentence.  ",
        "Second sentence?\n\n",
        "Third sentence!",
    ]
    assert spans[1].start_char == len("First sentence.  ")